xref: /minix/minix/net/lwip/ifaddr.c (revision e4dbab1e)
1 /* LWIP service - ifaddr.c - network interface address management */
2 /*
3  * This module is an exception to the regular source organization of this
4  * service, in that it manages part of another module's data structures, namely
5  * ifdev.  As such, it should be seen as logically part of ifdev.  It is
6  * separated only to keep the source code more manageable.  Still, this module
7  * may use direct access only on the address-related fields of the ifdev
8  * structure, so that those one day may be move into an ifaddr-specific
9  * substructure within ifdev.
10  */
11 /*
12  * We manage three types of addresses here: IPv4 addresses (ifaddr_v4),
13  * IPv6 addresses (ifaddr_v6), and link-layer a.k.a. MAC addresses (ifaddr_dl).
14  *
15  * Managing IPv4 addresses is easy.  lwIP supports only one IPv4 address per
16  * netif.  While it would be possible to construct a model where one ifdev
17  * consists of multiple netifs (with one IPv4 address each), we not support
18  * this--mostly because it is a pain to keep state synchronized between the
19  * netifs in that case.  Such support can still be added later; the IPv4 API
20  * exposed from here does support multiple IPv4 addresses already just in case,
21  * as does much of the code using the API.
22  *
23  * For IPv4 addresses we maintain only one extra piece of information here,
24  * which is whether an IPv4 address has been set at all.  This is because for
25  * our userland (DHCP clients in particular), we must allow assigning 0.0.0.0
26  * as address to an interface.  We do not use the lwIP per-netif IPv4 gateway
27  * field, nor the concept of a "default netif", in both cases because we
28  * override all (routing) decisions that would use those settings.  lwIP does
29  * not allow a broadcast address to be set, so support for broadcast addresses
30  * is botched here: we disregard custom broadcast addresses given to us, and
31  * instead expose the broadcast address that is used within lwIP.
32  *
33  * Managing IPv6 addresses is much more complicated.  First of all, even though
34  * lwIP supports stateless address autoconfiguration (SLAAC) as per RFC 4862,
35  * we disable that and instead make dhcpcd(8) responsible for all IPv6 address
36  * configuration.  dhcpcd(8) will set addresses and routes as necessary, the
37  * latter of which are used in lwIP through our routing hooks (in the route
38  * module).  This approach, which is in line with where NetBSD is headed,
39  * allows us to work around a number of lwIP limitations.  As a result we do
40  * differ in this respect from NetBSD, which may switch between kernel-only,
41  * dhcpcd-only, and hybrid autoconfiguration, mainly throught the accept_rtadv
42  * sysctl(7) node.  Writing to this node has no real effect on MINIX 3.
43  *
44  * All IPv6 addresses have a prefix length, which is almost but not quite the
45  * same as IPv4's subnet masks (see RFC 5942).  We must maintain the per-
46  * address prefix length ourselves, as lwIP supports IPv6 prefix lengths of 64
47  * bits only.  Our dhcpcd(8)-based approach allows us to work around that.
48  *
49  * All IPv6 addresses also have a state and a lifetime, both of which are
50  * managed by lwIP.  Unlike for IPv4, address-derived routes and routing socket
51  * messages are only created for addresses that are "valid", which means that
52  * they are in either PREFERRED or DEPRECATED state.  This means that we have
53  * to be aware of all address state transitions between "valid" and "not
54  * valid", some of which (namely address duplication detection and lifetime
55  * expirations) are initiated by lwIP.  As such, we need to keep shadow state
56  * for each address, and use a callback to detect whether state has changed.
57  *
58  * For understanding of this module as well as lwIP, it is important to note
59  * that "valid" is not the opposite of "invalid" in this context: "not valid"
60  * includes the address states INVALID, DUPLICATED, and TENTATIVE, while
61  * "invalid"/INVALID simply means that the address slot is free.
62  *
63  * Each IPv6 address also has associated flags.  We support an AUTOCONF flag
64  * which indicates that no subnet route should be added for the address; on
65  * MINIX 3, dhcpcd(8) is modified to pass in that flag when appropriate, thus
66  * solving a problem that NetBSD suffers from, namely that it does not know
67  * whether a userland-given route is static (implying a subnet) or auto-
68  * configured (implying no subnet, again as per RFC 5942), leading to it doing
69  * the wrong thing in dhcpcd-only autoconfiguration mode.  The TEMPORARY flag,
70  * for privacy addresses (RFC 4941) should be the same as on NetBSD; it is
71  * currently used only in source address selection (RFC 6724).  We override
72  * lwIP's IPv6 source address selection algorithm to include support for not
73  * just this flag, but also label and proper longest-common-prefix comparisons.
74  * Finally, there is an HWBASED flag to make sure that when the link-layer
75  * address is changed, the IPv6 link-local address is changed accordingly only
76  * if the previous link-local address was also autogenerated from a link-layer
77  * address and not set manually by userland.
78  *
79  * Finally, we support multiple link-layer addresses per interface, but only
80  * because NetBSD's ifconfig(8) uses an API that expects such multi-address
81  * support.  At any time, only one of the addresses is marked as "active",
82  * which means it is used as MAC address in outgoing packets.  We support only
83  * one MAC address per device driver, so the support for additional, inactive
84  * link-layer addresses is there exclusively for ifconfig(8) interoperability.
85  *
86  * All interfaces, including those that do not have MAC addresses at all (e.g.,
87  * loopback interfaces), do have one link-layer address.  This is expected in
88  * particular by getifaddrs(3), which only recognizes interfaces that have a
89  * link-layer address.
90  *
91  * Many features are still missing here, especially for IP addresses.  For
92  * example, we do not yet support destination addresses at all yet, simply
93  * because there is no interface type that uses them.  For IPv6, more work is
94  * to be done to support proper netif status transitions versus address states,
95  * fallout from address duplication, and various ND6_IFF_ flags.
96  */
97 
98 #include "lwip.h"
99 #include "rtsock.h"
100 #include "route.h"
101 
102 #include "lwip/etharp.h"
103 
104 #include <netinet6/in6_var.h>
105 #include <netinet6/nd6.h>
106 
107 /*
108  * Routing flags for local address and local network routing entries.  This
109  * may later have to be refined, for example in order not to set RTF_CLONING
110  * for routes on interfaces that do not have link-layer addressing.
111  *
112  * IMPORTANT: as of NetBSD 8, RTF_CLONING has been renamed to RTF_CONNECTED.
113  */
114 #define IFADDR_HOST_RTFLAGS	(RTF_UP | RTF_HOST | RTF_LOCAL)
115 #define IFADDR_NET_RTFLAGS	(RTF_UP | RTF_CLONING)
116 
117 /* Address-related sysctl(7) settings. */
118 int ifaddr_auto_linklocal = 1;	/* different from NetBSD, see its usage */
119 int ifaddr_accept_rtadv = 0;	/* settable but completely disregarded */
120 
121 /*
122  * Initialize the local address administration for an interface that is in the
123  * process of being created.
124  */
125 void
126 ifaddr_init(struct ifdev * ifdev)
127 {
128 	unsigned int i;
129 
130 	ifdev->ifdev_v4set = FALSE;
131 
132 	for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; i++)
133 		ifdev->ifdev_v6state[i] = IP6_ADDR_INVALID;
134 
135 	for (i = 0; i < __arraycount(ifdev->ifdev_hwlist); i++)
136 		ifdev->ifdev_hwlist[i].ifhwa_flags = 0;
137 }
138 
139 /*
140  * Find an IPv4 address locally assigned to a interface.  The IPv4 address is
141  * given as 'addr'.  The interface is given as 'ifdev'.  On success, return OK,
142  * with the IPv4 address number stored in 'num'.  On failure, return a negative
143  * error code.
144  */
145 int
146 ifaddr_v4_find(struct ifdev * ifdev, const struct sockaddr_in * addr,
147 	ifaddr_v4_num_t * num)
148 {
149 	ip_addr_t ipaddr;
150 	int r;
151 
152 	if ((r = addr_get_inet((const struct sockaddr *)addr, sizeof(*addr),
153 	    IPADDR_TYPE_V4, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
154 		return r;
155 
156 	if (!ifdev->ifdev_v4set ||
157 	    !ip_addr_cmp(netif_ip_addr4(ifdev_get_netif(ifdev)), &ipaddr))
158 		return EADDRNOTAVAIL;
159 
160 	*num = 0;
161 	return OK;
162 }
163 
164 /*
165  * Enumerate IPv4 addresses locally assigned to the given interface 'ifdev'.
166  * The caller should set 'nump' to 0 initially, and increase it by one between
167  * a successful call and the next enumeration call.  Return TRUE on success,
168  * meaning that starting from the given value of 'nump' there is at least one
169  * IPv4 address, of which the number is stored in 'nump' on return.  Return
170  * FALSE if there are no more IPv4 addresses locally assigned to the interface.
171  */
172 int
173 ifaddr_v4_enum(struct ifdev * ifdev, ifaddr_v4_num_t * num)
174 {
175 
176 	/*
177 	 * For now, we support only up to one IPv4 address per interface.
178 	 * set if we are to return it.
179 	 */
180 	return (*num == 0 && ifdev->ifdev_v4set);
181 }
182 
183 /*
184  * Obtain information about the IPv4 address 'num' assigned to the interface
185  * 'ifdev'.  On success, return OK, with the IPv4 address stored in 'addr', the
186  * network mask stored in 'mask', the broadcast stored in 'bcast', and the
187  * destination address stored in 'dest'.  Each of these pointers may be NULL.
188  * The interface may not have a broadcast and/or destination address; in that
189  * case, their corresponding structures are not filled in at all, and thus must
190  * be preinitialized by the caller to a default state.  The reason for not
191  * zeroing them is that some callers use the same buffer for both.  On failure,
192  * return a negative error code.
193  */
194 int
195 ifaddr_v4_get(struct ifdev * ifdev, ifaddr_v4_num_t num,
196 	struct sockaddr_in * addr, struct sockaddr_in * mask,
197 	struct sockaddr_in * bcast, struct sockaddr_in * dest)
198 {
199 	const ip_addr_t *ipaddr, *netmask;
200 	struct netif *netif;
201 	ip_addr_t broad;
202 	socklen_t addr_len;
203 
204 	if (!ifaddr_v4_enum(ifdev, &num))
205 		return EADDRNOTAVAIL;
206 
207 	netif = ifdev_get_netif(ifdev);
208 
209 	if (addr != NULL) {
210 		addr_len = sizeof(*addr);
211 
212 		addr_put_inet((struct sockaddr *)addr, &addr_len,
213 		    netif_ip_addr4(netif), TRUE /*kame*/, 0 /*port*/);
214 	}
215 
216 	if (mask != NULL) {
217 		addr_len = sizeof(*mask);
218 
219 		/*
220 		 * Do not bother using addr_put_netmask() here, as we would
221 		 * then first have to compute the prefix length..
222 		 */
223 		addr_put_inet((struct sockaddr *)mask, &addr_len,
224 		    netif_ip_netmask4(netif), TRUE /*kame*/, 0 /*port*/);
225 	}
226 
227 	if (bcast != NULL) {
228 		if (netif->flags & NETIF_FLAG_BROADCAST) {
229 			/* Fake a broadcast address. */
230 			ipaddr = netif_ip_addr4(netif);
231 			netmask = netif_ip_netmask4(netif);
232 
233 			ip_addr_set_ip4_u32(&broad,
234 			    ip_addr_get_ip4_u32(ipaddr) |
235 			    ~ip_addr_get_ip4_u32(netmask));
236 
237 			addr_len = sizeof(*bcast);
238 
239 			addr_put_inet((struct sockaddr *)bcast, &addr_len,
240 			    &broad, TRUE /*kame*/, 0 /*port*/);
241 		} else {
242 			bcast->sin_len = 0;
243 			bcast->sin_family = AF_UNSPEC;
244 		}
245 	}
246 
247 	if (dest != NULL) {
248 		/* TODO: dest */
249 		dest->sin_len = 0;
250 		dest->sin_family = AF_UNSPEC;
251 	}
252 
253 	return OK;
254 }
255 
256 /*
257  * Obtain NetBSD-style state flags (IN_IFF_) for the given local IPv4 address.
258  * The given number must identify an existing address.  Return the flags.
259  */
260 int
261 ifaddr_v4_get_flags(struct ifdev * ifdev, ifaddr_v4_num_t num)
262 {
263 
264 	/* IPv4 per-address flags are not supported yet. */
265 	return 0;
266 }
267 
268 /*
269  * Determine whether there should be a local subnet route for the given
270  * assigned IPv4 address, and if so, compute the subnet mask to add.  Return
271  * TRUE if a local subnet route should be added, and return the network base
272  * address in 'netbase' and the number of prefix bits in 'prefixp'.  Return
273  * FALSE if no subnet route should be added for the assigned address.
274  */
275 static unsigned int
276 ifaddr_v4_netroute(struct ifdev * ifdev, ifaddr_v4_num_t num,
277 	ip_addr_t * netbase, unsigned int * prefixp)
278 {
279 	const ip_addr_t *ipaddr, *netmask;
280 	unsigned int prefix;
281 	uint32_t val;
282 
283 	/* Do not add subnet masks for loopback interfaces. */
284 	if (ifdev_is_loopback(ifdev))
285 		return FALSE;
286 
287 	assert(num == 0);
288 	assert(ifdev->ifdev_v4set);
289 
290 	ipaddr = netif_ip_addr4(ifdev_get_netif(ifdev));
291 	netmask = netif_ip_netmask4(ifdev_get_netif(ifdev));
292 
293 	/*
294 	 * If the subnet is a /32, skip adding a local host route: not only
295 	 * would it not be useful, it would fail anyway because we currently do
296 	 * not support adding a host-type route and a full-width net-type route
297 	 * for the same IP address.
298 	 */
299 	if (ip_addr_get_ip4_u32(netmask) == PP_HTONL(0xffffffffUL))
300 		return FALSE;
301 
302 	/* Compute the network base address. */
303 	ip_addr_set_ip4_u32(netbase,
304 	    ip_addr_get_ip4_u32(ipaddr) & ip_addr_get_ip4_u32(netmask));
305 
306 	/* Find the number of prefix bits of the netmask.  TODO: improve.. */
307 	val = ntohl(ip_addr_get_ip4_u32(netmask));
308 
309 	for (prefix = 0; prefix < IP4_BITS; prefix++)
310 		if (!(val & (1 << (IP4_BITS - prefix - 1))))
311 			break;
312 
313 	*prefixp = prefix;
314 	return TRUE;
315 }
316 
317 /*
318  * A local IPv4 address has been added to an interface.  The interface is given
319  * as 'ifdev', and the number of the just-added IPv4 address is given as 'num'.
320  * Generate a routing socket message and add local routes as appropriate.
321  */
322 static void
323 ifaddr_v4_added(struct ifdev * ifdev, ifaddr_v4_num_t num)
324 {
325 	const ip_addr_t *ipaddr;
326 	ip_addr_t netbase;
327 	unsigned int prefix;
328 
329 	assert(num == 0);
330 	assert(ifdev->ifdev_v4set);
331 
332 	/* Report the addition of the interface address. */
333 	rtsock_msg_addr_v4(ifdev, RTM_NEWADDR, num);
334 
335 	/*
336 	 * Add the local host route.  This will always succeed: for addition,
337 	 * we just checked with route_can_add(); when updating, we first remove
338 	 * the exact same route.  For now, we forbid users from messing with
339 	 * RTF_LOCAL routes directly, since nothing good (and a whole lot of
340 	 * bad) can come out of that, so the routes will not change under us.
341 	 *
342 	 * Why are we not using lo0 for this route, like the BSDs do?  Because
343 	 * that approach is not compatible with link-local addresses.  Instead,
344 	 * we intercept outgoing traffic to the local address, and redirect it
345 	 * over lo0, bypassing routing.  If we did not do this, we would never
346 	 * know the originally intended zone of the outgoing packet.  As an
347 	 * intended side effect, the traffic does show up on lo0 with BPF, just
348 	 * like on BSDs.  Similarly, we do not need to set a gateway here.
349 	 *
350 	 * We currently do not use the routing tables for lookups on local
351 	 * addresses - see ifaddr_v6_map() as to why.  If we ever do, that adds
352 	 * another reason that the interface associated with the route must be
353 	 * the interface that owns the address (and not, say, lo0).
354 	 */
355 	ipaddr = netif_ip_addr4(ifdev_get_netif(ifdev));
356 
357 	(void)route_add(ipaddr, IP4_BITS, NULL /*gateway*/, ifdev,
358 	    IFADDR_HOST_RTFLAGS, NULL /*rtr*/);
359 
360 	/*
361 	 * Add the local network route, if the rules say that we should.  Even
362 	 * then, adding the route may fail for various reasons, but this route
363 	 * is not essential and so we ignore failures here.
364 	 */
365 	if (ifaddr_v4_netroute(ifdev, num, &netbase, &prefix))
366 		(void)route_add(&netbase, prefix, NULL /*gateway*/, ifdev,
367 		    IFADDR_NET_RTFLAGS, NULL /*rtr*/);
368 }
369 
370 /*
371  * A particular local IPv4 address is being deleted.  See if there is another
372  * local IPv4 address assigned to another interface that should have the same
373  * local subnet route (but didn't, as such duplicate routes can obviously not
374  * be added), and if so, readd the route for that other address.
375  */
376 static void
377 ifaddr_v4_dupcheck(struct ifdev * oifdev, const ip_addr_t * onetbase,
378 	unsigned int oprefix)
379 {
380 	struct ifdev *ifdev;
381 	ip_addr_t netbase;
382 	unsigned int prefix;
383 
384 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
385 		if (ifdev == oifdev || !ifdev->ifdev_v4set)
386 			continue;
387 
388 		if (ifaddr_v4_netroute(ifdev, (ifaddr_v4_num_t)0, &netbase,
389 		    &prefix) && prefix == oprefix &&
390 		    ip_addr_cmp(&netbase, onetbase)) {
391 			(void)route_add(&netbase, prefix, NULL /*gateway*/,
392 			    ifdev, IFADDR_NET_RTFLAGS, NULL /*rtr*/);
393 
394 			return;
395 		}
396 	}
397 }
398 
399 /*
400  * A local IPv4 address is about to be deleted from an interface, or the
401  * interface itself is about to be destroyed.  Generate a routing socket
402  * message about this and delete local routes as appropriate.  The interface is
403  * given as 'ifdev', and the number of the IPv4 address that is about to be
404  * deleted is given as 'num'.
405  */
406 static void
407 ifaddr_v4_deleted(struct ifdev * ifdev, ifaddr_v4_num_t num)
408 {
409 	struct route_entry *route;
410 	ip_addr_t netbase;
411 	unsigned int prefix;
412 
413 	assert(num == 0);
414 	assert(ifdev->ifdev_v4set);
415 
416 	/* Delete the local network route, if we tried adding it at all. */
417 	if (ifaddr_v4_netroute(ifdev, num, &netbase, &prefix) &&
418 	    (route = route_find(&netbase, prefix,
419 	    FALSE /*is_host*/)) != NULL &&
420 	    route_get_flags(route) == IFADDR_NET_RTFLAGS) {
421 		route_delete(route, NULL /*rtr*/);
422 
423 		/*
424 		 * Readd the local network route for another interface, if that
425 		 * interface has a local address on the very same network.
426 		 */
427 		ifaddr_v4_dupcheck(ifdev, &netbase, prefix);
428 	}
429 
430 	/* Delete the local host route. */
431 	if ((route = route_find(netif_ip_addr4(ifdev_get_netif(ifdev)),
432 	    IP4_BITS, TRUE /*is_host*/)) != NULL)
433 		route_delete(route, NULL /*rtr*/);
434 
435 	/* Report the deletion of the interface address. */
436 	rtsock_msg_addr_v4(ifdev, RTM_DELADDR, num);
437 }
438 
439 /*
440  * Add or update an IPv4 address on an interface.  The interface is given as
441  * 'ifdev'.  The address to add or update is pointed to by 'addr', which must
442  * always be a pointer to a valid address.  For DHCP clients it must be
443  * possible to add the 'any' address (0.0.0.0).  The network mask, broadcast
444  * address, and destination address parameters 'mask', 'bcast', and 'dest'
445  * (respectively) may be NULL pointers or pointers to AF_UNSPEC addresses, and
446  * will be disregarded if they are.  If 'mask' and/or 'bcast' are NULL when
447  * adding an address, default values will be computed for them.  The 'flags'
448  * field may contain NetBSD-style address flags (IN_IFF_).  Return OK if the
449  * address was successfully added or updated, or a negative error code if not.
450  */
451 int
452 ifaddr_v4_add(struct ifdev * ifdev, const struct sockaddr_in * addr,
453 	const struct sockaddr_in * mask, const struct sockaddr_in * bcast,
454 	const struct sockaddr_in * dest, int flags)
455 {
456 	ip_addr_t ipaddr, netmask, broad;
457 	ip4_addr_t ip4zero;
458 	struct netif *netif;
459 	unsigned int dummy;
460 	uint32_t val;
461 	int r;
462 
463 	assert(addr != NULL);
464 
465 	if ((r = addr_get_inet((const struct sockaddr *)addr, sizeof(*addr),
466 	    IPADDR_TYPE_V4, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
467 		return r;
468 
469 	/* Forbid multicast (class D) and experimental (class E) addresses. */
470 	val = ntohl(ip_addr_get_ip4_u32(&ipaddr));
471 
472 	if (ip_addr_ismulticast(&ipaddr) || IP_EXPERIMENTAL(val))
473 		return EINVAL;
474 
475 	if (mask != NULL && mask->sin_family != AF_UNSPEC) {
476 		if ((r = addr_get_netmask((const struct sockaddr *)mask,
477 		    sizeof(*mask), IPADDR_TYPE_V4, &dummy, &netmask)) != OK)
478 			return r;
479 	} else {
480 		/*
481 		 * Generate a netmask based on IP class.  Old, obsolete stuff,
482 		 * but we can't have no netmask.
483 		 */
484 		if (IN_CLASSA(val))
485 			ip_addr_set_ip4_u32(&netmask, PP_HTONL(IN_CLASSA_NET));
486 		else if (IN_CLASSB(val))
487 			ip_addr_set_ip4_u32(&netmask, PP_HTONL(IN_CLASSB_NET));
488 		else if (IN_CLASSC(val))
489 			ip_addr_set_ip4_u32(&netmask, PP_HTONL(IN_CLASSC_NET));
490 		else /* should not trigger */
491 			ip_addr_set_ip4_u32(&netmask, PP_HTONL(IN_CLASSD_NET));
492 	}
493 
494 	if (bcast != NULL && bcast->sin_family != AF_UNSPEC) {
495 		if ((r = addr_get_inet((const struct sockaddr *)bcast,
496 		    sizeof(*bcast), IPADDR_TYPE_V4, &broad, TRUE /*kame*/,
497 		    NULL /*port*/)) != OK)
498 			return r;
499 
500 		/*
501 		 * lwIP does not allow setting the broadcast address, so we
502 		 * must ensure that the given address is what lwIP uses anyway.
503 		 * No need to perform byte order swaps here.
504 		 */
505 		if (ip_addr_get_ip4_u32(&broad) !=
506 		    (ip_addr_get_ip4_u32(&ipaddr) |
507 		    ~ip_addr_get_ip4_u32(&netmask)))
508 			return EINVAL;
509 	}
510 
511 	/* TODO: dest (note: may be NULL) */
512 
513 	/*
514 	 * We currently do not support any IPv4 address flags.  Even though
515 	 * supporting them would make maintaining dhcpcd(8) easier, lwIP does
516 	 * not offers the means to implement them properly.
517 	 */
518 	if (flags != 0)
519 		return EINVAL;
520 
521 	netif = ifdev_get_netif(ifdev);
522 
523 	/* Should we add a new address, or update an existing one? */
524 	if (!ifdev->ifdev_v4set ||
525 	    !ip_addr_cmp(netif_ip_addr4(netif), &ipaddr)) {
526 		/*
527 		 * Add a new address.  lwIP supports only one IPv4 address per
528 		 * netif.
529 		 */
530 		if (ifdev->ifdev_v4set)
531 			return ENOBUFS; /* TODO: a better error code */
532 
533 		/*
534 		 * It must be possible to add the address to the routing table,
535 		 * so make sure that we can add such a route later on.  The
536 		 * error code should be accurate for most real-world cases.
537 		 */
538 		if (!route_can_add(&ipaddr, IP4_BITS, TRUE /*is_host*/))
539 			return EEXIST;
540 
541 		ip4_addr_set_zero(&ip4zero);
542 
543 		netif_set_addr(netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask),
544 		    &ip4zero);
545 
546 		ifdev->ifdev_v4set = TRUE;
547 	} else {
548 		/*
549 		 * Update an existing address.  First report the address as
550 		 * deleted.  Do not actually delete the address in netif,
551 		 * because that would cause problems with its changing IP
552 		 * addresses on existing sockets.
553 		 */
554 		ifaddr_v4_deleted(ifdev, (ifaddr_v4_num_t)0);
555 
556 		/* Update the one part that may have actually changed. */
557 		netif_set_netmask(netif, ip_2_ip4(&netmask));
558 	}
559 
560 	/* In both cases, we now need to report the address as added. */
561 	ifaddr_v4_added(ifdev, (ifaddr_v4_num_t)0);
562 
563 	return OK;
564 }
565 
566 /*
567  * Delete an IPv4 address from an interface.  The given address number 'num'
568  * must have been obtained from ifaddr_v4_find() or ifaddr_v4_enum() on the
569  * same interface just before.  This function always succeeds.
570  */
571 void
572 ifaddr_v4_del(struct ifdev * ifdev, ifaddr_v4_num_t num)
573 {
574 	ip4_addr_t ip4zero;
575 
576 	assert(num == 0);
577 	assert(ifdev->ifdev_v4set);
578 
579 	/*
580 	 * Report the address as deleted.  Always do this first, because the
581 	 * reporting requires that the address is still there.
582 	 */
583 	ifaddr_v4_deleted(ifdev, num);
584 
585 	/* Then actually delete the address. */
586 	ip4_addr_set_zero(&ip4zero);
587 
588 	netif_set_addr(ifdev_get_netif(ifdev), &ip4zero, &ip4zero, &ip4zero);
589 
590 	ifdev->ifdev_v4set = FALSE;
591 }
592 
593 /*
594  * Announce all IPv4 addresses associated with the given interface as deleted,
595  * Used (only) right before the interface is destroyed.
596  */
597 void
598 ifaddr_v4_clear(struct ifdev * ifdev)
599 {
600 
601 	if (ifdev->ifdev_v4set)
602 		ifaddr_v4_deleted(ifdev, (ifaddr_v4_num_t)0);
603 }
604 
605 /*
606  * Return the first interface device that owns the given IPv4 address, or NULL
607  * if it is not a valid local IPv4 address.
608  */
609 struct ifdev *
610 ifaddr_v4_map_by_addr(const ip4_addr_t * ip4addr)
611 {
612 	struct ifdev *ifdev;
613 
614 	/*
615 	 * It would be nice to be able to do a route lookup on an RTF_LOCAL
616 	 * entry here, but we do not do this for IPv6 either - see the comment
617 	 * in ifaddr_v6_map() - and it is much less needed here, because each
618 	 * interface has at most one IPv4 address.
619 	 */
620 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
621 		if (ifdev->ifdev_v4set &&
622 		    ip4_addr_cmp(netif_ip4_addr(ifdev_get_netif(ifdev)),
623 		    ip4addr))
624 			return ifdev;
625 	}
626 
627 	return NULL;
628 }
629 
630 /*
631  * Return the first interface device for which the given IPv4 address is on a
632  * configured local subnet, or NULL if no match was found.
633  */
634 static struct ifdev *
635 ifaddr_v4_map_by_subnet(const ip4_addr_t * ip4addr)
636 {
637 	struct ifdev *ifdev;
638 	struct netif *netif;
639 	uint32_t addr1, addr2, mask;
640 
641 	addr1 = ip4_addr_get_u32(ip4addr);
642 
643 	/*
644 	 * Here, we must never do a route lookup, because this routine is used
645 	 * for SO_DONTROUTE/MSG_DONTROUTE.
646 	 */
647 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
648 		if (!ifdev->ifdev_v4set)
649 			continue;
650 
651 		netif = ifdev_get_netif(ifdev);
652 
653 		addr2 = ip4_addr_get_u32(netif_ip4_addr(netif));
654 		mask = ip4_addr_get_u32(netif_ip4_netmask(netif));
655 
656 		if ((addr1 & mask) == (addr2 & mask))
657 			return ifdev;
658 	}
659 
660 	return NULL;
661 }
662 
663 /*
664  * Return TRUE if the given local IPv6 interface address is valid (= preferred
665  * or deprecated), or FALSE if it is not (= tentative or duplicated).  The
666  * address slot must be in use, that is, it must not be free (= invalid).
667  */
668 static int
669 ifaddr_v6_isvalid(struct ifdev * ifdev, ifaddr_v6_num_t num)
670 {
671 	int state;
672 
673 	state = ifdev->ifdev_v6state[num];
674 
675 	/* Note that 'valid' and 'invalid' are not each other's inverse! */
676 	assert(!ip6_addr_isinvalid(state));
677 
678 	return ip6_addr_isvalid(state);
679 }
680 
681 /*
682  * Find an IPv6 address assigned to the given interface that matches the given
683  * IPv6 address.  Return TRUE if a match was found, with its number stored in
684  * 'nump'.  Return FALSE if the address is not assigned to the interface.
685  */
686 static int
687 ifaddr_v6_match(struct ifdev * ifdev, const ip_addr_t * ipaddr,
688 	ifaddr_v6_num_t * nump)
689 {
690 	int8_t i;
691 
692 	assert(IP_IS_V6(ipaddr));
693 
694 	i = netif_get_ip6_addr_match(ifdev_get_netif(ifdev), ip_2_ip6(ipaddr));
695 	if (i < 0)
696 		return FALSE;
697 
698 	*nump = i;
699 	return TRUE;
700 }
701 
702 /*
703  * Find an IPv6 address locally assigned to a interface.  The IPv6 address is
704  * given as 'addr6', and must use KAME-style embedding for zones.  The
705  * interface is given as 'ifdev'.  On success, return OK, with the IPv6 address
706  * number stored in 'num'.  On failure, return a negative error code.  This
707  * function also returns tentative and duplicated addresses.
708  */
709 int
710 ifaddr_v6_find(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
711 	ifaddr_v6_num_t * nump)
712 {
713 	ip_addr_t ipaddr;
714 	int r;
715 
716 	if ((r = addr_get_inet((const struct sockaddr *)addr6, sizeof(*addr6),
717 	    IPADDR_TYPE_V6, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
718 		return r;
719 
720 	if (ip6_addr_has_zone(ip_2_ip6(&ipaddr)) &&
721 	    ip6_addr_zone(ip_2_ip6(&ipaddr)) != ifdev_get_index(ifdev))
722 		return EADDRNOTAVAIL;
723 
724 	if (!ifaddr_v6_match(ifdev, &ipaddr, nump))
725 		return EADDRNOTAVAIL;
726 
727 	return OK;
728 }
729 
730 /*
731  * Enumerate IPv6 addresses locally assigned to the given interface 'ifdev'.
732  * The caller should set 'nump' to 0 initially, and increase it by one between
733  * a successful call and the next enumeration call.  Return TRUE on success,
734  * meaning that starting from the given value of 'nump' there is at least one
735  * IPv6 address, of which the number is stored in 'nump' on return.  Return
736  * FALSE if there are no more IPv6 addresses locally assigned to the interface.
737  * This function also returns tentative and duplicated address entries.
738  */
739 int
740 ifaddr_v6_enum(struct ifdev * ifdev, ifaddr_v6_num_t * nump)
741 {
742 	ifaddr_v6_num_t num;
743 
744 	for (num = *nump; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
745 		if (!ip6_addr_isinvalid(ifdev->ifdev_v6state[num])) {
746 			*nump = num;
747 			return TRUE;
748 		}
749 	}
750 
751 	return FALSE;
752 }
753 
754 /*
755  * Obtain information about the IPv6 address 'num' assigned to the interface
756  * 'ifdev'.  Store the IPv6 address in 'addr6', the network mask in 'mask6',
757  * and the destination address in 'dest6'.  Each of these pointers may be NULL.
758  * The returned addresses use KAME-style embedding for zones.  This function
759  * also returns tentative and duplicated addresses.  It always succeeds.
760  */
761 void
762 ifaddr_v6_get(struct ifdev * ifdev, ifaddr_v6_num_t num,
763 	struct sockaddr_in6 * addr6, struct sockaddr_in6 * mask6,
764 	struct sockaddr_in6 * dest6)
765 {
766 	struct netif *netif;
767 	socklen_t addr_len;
768 
769 	/*
770 	 * Due to route message generation upon address addition and deletion,
771 	 * either the ifdev_v6state or the netif state may not yet have been
772 	 * updated here.
773 	 */
774 	assert(!ip6_addr_isinvalid(ifdev->ifdev_v6state[num]) ||
775 	    !ip6_addr_isinvalid(netif_ip6_addr_state(ifdev_get_netif(ifdev),
776 	    (int)num)));
777 
778 	netif = ifdev_get_netif(ifdev);
779 
780 	if (addr6 != NULL) {
781 		addr_len = sizeof(*addr6);
782 
783 		(void)addr_put_inet((struct sockaddr *)addr6, &addr_len,
784 		    netif_ip_addr6(netif, (int)num), TRUE /*kame*/,
785 		    0 /*port*/);
786 	}
787 
788 	if (mask6 != NULL) {
789 		addr_len = sizeof(*mask6);
790 
791 		addr_put_netmask((struct sockaddr *)mask6, &addr_len,
792 		    IPADDR_TYPE_V6, ifdev->ifdev_v6prefix[num]);
793 	}
794 
795 	if (dest6 != NULL) {
796 		/* TODO: dest6 */
797 		dest6->sin6_len = 0;
798 		dest6->sin6_family = AF_UNSPEC;
799 	}
800 }
801 
802 /*
803  * Obtain NetBSD-style state flags (IN6_IFF_) for the given local IPv6 address.
804  * The given number must identify an existing address.  Return the flags.
805  */
806 int
807 ifaddr_v6_get_flags(struct ifdev * ifdev, ifaddr_v6_num_t num)
808 {
809 	int state, flags;
810 
811 	state = ifdev->ifdev_v6state[num];
812 
813 	assert(!ip6_addr_isinvalid(state));
814 
815 	flags = 0;
816 	if (ip6_addr_isduplicated(state))
817 		flags |= IN6_IFF_DUPLICATED;
818 	if (ip6_addr_istentative(state))
819 		flags |= IN6_IFF_TENTATIVE;
820 	if (ip6_addr_isdeprecated(state))
821 		flags |= IN6_IFF_DEPRECATED;
822 	if (ifdev->ifdev_v6flags[num] & IFADDR_V6F_AUTOCONF)
823 		flags |= IN6_IFF_AUTOCONF;
824 	if (ifdev->ifdev_v6flags[num] & IFADDR_V6F_TEMPORARY)
825 		flags |= IN6_IFF_TEMPORARY;
826 
827 	return flags;
828 }
829 
830 /*
831  * Obtain lifetime information about the given local IPv6 address.  The given
832  * 'lifetime' structure is filled as a result.  This function always succeeds.
833  */
834 void
835 ifaddr_v6_get_lifetime(struct ifdev * ifdev, ifaddr_v6_num_t num,
836 	struct in6_addrlifetime * lifetime)
837 {
838 	struct netif *netif;
839 	uint32_t valid_life, pref_life;
840 	time_t now;
841 
842 	assert(!ip6_addr_isinvalid(ifdev->ifdev_v6state[num]));
843 
844 	netif = ifdev_get_netif(ifdev);
845 
846 	valid_life = netif_ip6_addr_valid_life(netif, (int)num);
847 	pref_life = netif_ip6_addr_pref_life(netif, (int)num);
848 
849 	/*
850 	 * Represent 'static' as 'infinite' to userland.  This applies only to
851 	 * link-local addresses, which do not have lifetimes at all.
852 	 */
853 	if (ip6_addr_life_isstatic(valid_life)) {
854 		valid_life = IP6_ADDR_LIFE_INFINITE;
855 		pref_life = IP6_ADDR_LIFE_INFINITE;
856 	}
857 
858 	now = clock_time(NULL);
859 
860 	/*
861 	 * TODO: the _vltime and _pltime values filled in here are not correct.
862 	 * They should be set to the originally assigned values rather than the
863 	 * current ones.  Getting this right would mean we'd have to save the
864 	 * original values.  So far it does not look like userland needs that..
865 	 */
866 	memset(lifetime, 0, sizeof(*lifetime));
867 	lifetime->ia6t_vltime = valid_life;
868 	lifetime->ia6t_pltime = pref_life;
869 	if (!ip6_addr_life_isinfinite(valid_life))
870 		lifetime->ia6t_expire = now + valid_life;
871 	if (!ip6_addr_life_isinfinite(pref_life))
872 		lifetime->ia6t_preferred = now + pref_life;
873 }
874 
875 /*
876  * Determine whether there should be a local subnet route for the given
877  * assigned IPv6 address, and if so, compute the subnet mask to add.  Return
878  * TRUE if a local subnet route should be added, and return the network base
879  * address in 'netbase' and the number of prefix bits in 'prefixp'.  Return
880  * FALSE if no subnet route should be added for the assigned address.
881  */
882 static unsigned int
883 ifaddr_v6_netroute(struct ifdev * ifdev, ifaddr_v6_num_t num,
884 	ip_addr_t * netbase, unsigned int * prefixp)
885 {
886 	const ip_addr_t *ipaddr;
887 
888 	ipaddr = netif_ip_addr6(ifdev_get_netif(ifdev), (int)num);
889 
890 	/*
891 	 * A local network route should be added only if all of the following
892 	 * conditions are met:
893 	 *
894 	 * 1) The address is not auto-configured.  Autoconfigured addresses do
895 	 *    not have an implied subnet, as explained in RFC 5942.
896 	 *    Consistency with respect to subnet routes is why we do not allow
897 	 *    changing the AUTOCONF flag after an address has been added.
898 	 * 2) The subnet assignment is not a /128 prefix.  Not only would such
899 	 *    a route not be useful, adding it would fail anyway because we
900 	 *    currently do not support adding a host-type route and a
901 	 *    full-width net-type route for the same IP address.
902 	 * 3) If the interface is a loopback device, the address is not a link-
903 	 *    local address.  This appears to be what NetBSD does, but
904 	 *    additional loopback-related exceptions may be needed here.
905 	 */
906 	if ((ifdev->ifdev_v6flags[num] & IFADDR_V6F_AUTOCONF) ||
907 	    ifdev->ifdev_v6prefix[num] == IP6_BITS ||
908 	    (ifdev_is_loopback(ifdev) &&
909 	    ip6_addr_islinklocal(ip_2_ip6(ipaddr))))
910 		return FALSE;
911 
912 	addr_normalize(netbase, ipaddr, ifdev->ifdev_v6prefix[num]);
913 
914 	*prefixp = ifdev->ifdev_v6prefix[num];
915 	return TRUE;
916 }
917 
918 /*
919  * A local IPv6 has become valid (preferred or deprecated) after previously
920  * being invalid (tentative, duplicated, or free).  Report the addition of the
921  * now-usable address, and add appropriate routes to the IPv6 routing table.
922  *
923  * This function is *not* called immediately when an address is added, but
924  * rather when the address becomes valid (meaning it is no longer tentative,
925  * and thus supposedly collision-free).  For that reason, unlike for IPv4, this
926  * function is only ever called indirectly, through the netif status callback.
927  */
928 static void
929 ifaddr_v6_added(struct ifdev * ifdev, ifaddr_v6_num_t num)
930 {
931 	const ip_addr_t *ipaddr;
932 	ip_addr_t base;
933 	ip6_addr_t *base6;
934 	unsigned int prefix;
935 
936 	/* Check the netif as ifdev_v6state is not yet updated here. */
937 	assert(!ip6_addr_isinvalid(netif_ip6_addr_state(ifdev_get_netif(ifdev),
938 	    (int)num)));
939 
940 	/* Report the addition of the interface address. */
941 	rtsock_msg_addr_v6(ifdev, RTM_NEWADDR, num);
942 
943 	/*
944 	 * Add the local host route.  This will always succeed.  See the IPv4
945 	 * version of this code for more information.
946 	 */
947 	ipaddr = netif_ip_addr6(ifdev_get_netif(ifdev), (int)num);
948 
949 	(void)route_add(ipaddr, IP6_BITS, NULL /*gateway*/, ifdev,
950 	    IFADDR_HOST_RTFLAGS, NULL /*rtr*/);
951 
952 	/*
953 	 * Add the local network route, if the rules say that we should.  Even
954 	 * then, adding the route may fail for various reasons, but this route
955 	 * is not essential and so we ignore failures here.
956 	 */
957 	if (ifaddr_v6_netroute(ifdev, num, &base, &prefix))
958 		(void)route_add(&base, prefix, NULL /*gateway*/, ifdev,
959 		    IFADDR_NET_RTFLAGS, NULL /*rtr*/);
960 
961 	/*
962 	 * Add the node-local and link-local scope multicast routes.  These are
963 	 * interface-specific rather than address-specific.  They are (re)added
964 	 * for every address, and never deleted until interface destruction.
965 	 */
966 	ip_addr_set_zero_ip6(&base);
967 	base6 = ip_2_ip6(&base);
968 
969 	base6->addr[0] = htonl(0xff010000UL | ifdev_get_index(ifdev));
970 
971 	(void)route_add(&base, 32, NULL /*gateway*/, ifdev, IFADDR_NET_RTFLAGS,
972 	    NULL /*rtr*/);
973 
974 	base6->addr[0] = htonl(0xff020000UL | ifdev_get_index(ifdev));
975 
976 	(void)route_add(&base, 32, NULL /*gateway*/, ifdev, IFADDR_NET_RTFLAGS,
977 	    NULL /*rtr*/);
978 }
979 
980 /*
981  * A particular local IPv6 address is being deleted.  See if there is another
982  * local IPv6 address assigned that should have the same local subnet route
983  * (but didn't, as such duplicate routes can obviously not be added), and if
984  * so, readd the route for that other address, possibly for the same interface.
985  */
986 static void
987 ifaddr_v6_dupcheck(struct ifdev * oifdev, const ip_addr_t * onetbase,
988 	unsigned int oprefix)
989 {
990 	struct ifdev *ifdev;
991 	ip_addr_t netbase;
992 	unsigned int prefix;
993 	ifaddr_v6_num_t num;
994 
995 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
996 		if (ifdev == oifdev)
997 			continue;
998 
999 		for (num = 0; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1000 			if (ip6_addr_isinvalid(ifdev->ifdev_v6state[num]) ||
1001 			    !ifaddr_v6_isvalid(ifdev, num))
1002 				continue;
1003 
1004 			if (!ifaddr_v6_netroute(ifdev, num, &netbase, &prefix))
1005 				continue;
1006 
1007 			if (prefix != oprefix ||
1008 			    !ip_addr_cmp(&netbase, onetbase))
1009 				continue;
1010 
1011 			(void)route_add(&netbase, prefix, NULL /*gateway*/,
1012 			    ifdev, IFADDR_NET_RTFLAGS, NULL /*rtr*/);
1013 
1014 			return;
1015 		}
1016 	}
1017 }
1018 
1019 /*
1020  * A local IPv6 has become invalid (tentative, duplicated, or free) after
1021  * previously being valid (preferred or deprecated).  Report the deletion of
1022  * the previously-usable address, and remove previously added routes from the
1023  * IPv6 routing table.
1024  *
1025  * This function is not always called for every deleted address: instead, it is
1026  * called only when the address was previously valid, meaning that
1027  * ifaddr_v6_added() was invoked on it before as well.  Unlike for IPv4, this
1028  * function is typically called indirectly, through the netif status callback.
1029  */
1030 static void
1031 ifaddr_v6_deleted(struct ifdev * ifdev, ifaddr_v6_num_t num)
1032 {
1033 	struct route_entry *route;
1034 	const ip_addr_t *ipaddr;
1035 	ip_addr_t netbase;
1036 	unsigned int prefix;
1037 
1038 	assert(!ip6_addr_isinvalid(ifdev->ifdev_v6state[num]));
1039 
1040 	ipaddr = netif_ip_addr6(ifdev_get_netif(ifdev), (int)num);
1041 
1042 	/* Delete the local network route, if we tried adding it at all. */
1043 	if (ifaddr_v6_netroute(ifdev, num, &netbase, &prefix) &&
1044 	    (route = route_find(&netbase, prefix,
1045 	    FALSE /*is_host*/)) != NULL &&
1046 	    route_get_flags(route) == IFADDR_NET_RTFLAGS) {
1047 		route_delete(route, NULL /*rtr*/);
1048 
1049 		/*
1050 		 * Readd the local network route for another interface, if that
1051 		 * interface has a local address on the very same network.
1052 		 * Skip scoped (e.g., link-local) addresses, for which the
1053 		 * routes are unique anyway.
1054 		 */
1055 		if (!ip6_addr_has_scope(ip_2_ip6(ipaddr), IP6_UNICAST))
1056 			ifaddr_v6_dupcheck(ifdev, &netbase, prefix);
1057 	}
1058 
1059 	/* Delete the local host route. */
1060 	if ((route = route_find(ipaddr, IP6_BITS, TRUE /*is_host*/)) != NULL)
1061 		route_delete(route, NULL /*rtr*/);
1062 
1063 	/* Report the deletion of the interface address. */
1064 	rtsock_msg_addr_v6(ifdev, RTM_DELADDR, num);
1065 }
1066 
1067 /*
1068  * Add or update an IPv6 address on an interface.  The interface is given as
1069  * 'ifdev'.  The IPv6 address to add or update is pointed to by 'addr6', which
1070  * must always be a pointer to a valid address.  The network mask is given as
1071  * 'mask6', but may be NULL when updating an address.  The same applies to the
1072  * destination address 'dest6'.  The given IPv6 address and destination address
1073  * must use KAME-style embedding for zones.  The flags field 'flags' contains
1074  * a set of NetBSD-style address flags (IN6_IFF_).  The 'lifetime' parameter
1075  * always points to lifetime information to be set or updated.  Return OK if
1076  * the address was successfully added or updated, or a negative error code
1077  * otherwise.
1078  */
1079 int
1080 ifaddr_v6_add(struct ifdev * ifdev, const struct sockaddr_in6 * addr6,
1081 	const struct sockaddr_in6 * mask6, const struct sockaddr_in6 * dest6,
1082 	int flags, const struct in6_addrlifetime * lifetime)
1083 {
1084 	ip_addr_t ipaddr;
1085 	ip6_addr_t *ip6addr;
1086 	struct netif *netif;
1087 	unsigned int prefix;
1088 	ifaddr_v6_num_t num;
1089 	uint32_t valid_life;
1090 	int r, state;
1091 
1092 	netif = ifdev_get_netif(ifdev);
1093 
1094 	/*
1095 	 * Somewhat curiously, NetBSD ignores the zone ID for these requests,
1096 	 * rather than rejecting requests with a zone ID that does not match
1097 	 * the associated interface's.  We have no reason to be stricter, and
1098 	 * so we overwrite whatever zone was given..
1099 	 */
1100 	if ((r = addr_get_inet((const struct sockaddr *)addr6, sizeof(*addr6),
1101 	    IPADDR_TYPE_V6, &ipaddr, TRUE /*kame*/, NULL /*port*/)) != OK)
1102 		return r;
1103 
1104 	/*
1105 	 * Forbid locally-assigned multicast addresses.  Not only are those
1106 	 * absolutely disallowed in theory, we also assume all locally assigned
1107 	 * addresses are unicast in various places in practice.
1108 	 */
1109 	if (ip_addr_ismulticast(&ipaddr))
1110 		return EINVAL;
1111 
1112 	ip6_addr_assign_zone(ip_2_ip6(&ipaddr), IP6_UNICAST, netif);
1113 
1114 	/*
1115 	 * The netmask needs to be there only when adding a new address, but if
1116 	 * a netmask is given, it must be valid.  Note that lwIP itself
1117 	 * supports only /64 subnets; however, due to our custom routing hooks,
1118 	 * combined with giving lifetimes to all addresses (except the primary
1119 	 * link-local address, which is a /64), we control all routing
1120 	 * decisions that would otherwise be affected by that lwIP limitation.
1121 	 */
1122 	if (mask6 != NULL && mask6->sin6_family != AF_UNSPEC) {
1123 		if ((r = addr_get_netmask((const struct sockaddr *)mask6,
1124 		    sizeof(*mask6), IPADDR_TYPE_V6, &prefix,
1125 		    NULL /*ipaddr*/)) != OK)
1126 			return r;
1127 	} else
1128 		prefix = 0;
1129 
1130 	/* TODO: dest6 (note: may be NULL) */
1131 
1132 	/* TODO: support for IN6_IFF_ANYCAST and IN6_IFF_DETACHED. */
1133 	if (flags & ~(IN6_IFF_TENTATIVE | IN6_IFF_DEPRECATED | IN6_IFF_NODAD |
1134 	    IN6_IFF_AUTOCONF | IN6_IFF_TEMPORARY))
1135 		return EINVAL;
1136 
1137 	/* Should we add a new address, or update an existing one? */
1138 	ip6addr = ip_2_ip6(&ipaddr);
1139 
1140 	if (!ifaddr_v6_match(ifdev, &ipaddr, &num)) {
1141 		/* Add a new address. */
1142 		if (prefix == 0)
1143 			return EINVAL;
1144 
1145 		/*
1146 		 * It must be possible to add the address to the routing table,
1147 		 * so make sure that we can add such a route later on.  The
1148 		 * error code should be accurate for most real-world cases.
1149 		 */
1150 		if (!route_can_add(&ipaddr, IP6_BITS, TRUE /*is_host*/))
1151 			return EEXIST;
1152 
1153 		/*
1154 		 * As an exception, if the given address is a link-local
1155 		 * address and there is no link-local address in slot 0, use
1156 		 * slot 0 to store this address.  This requires a /64 prefix
1157 		 * length, because lwIP will use an implied /64 subnet for it.
1158 		 */
1159 		if (ip6_addr_isinvalid(ifdev->ifdev_v6state[0]) &&
1160 		    ip6_addr_islinklocal(ip6addr) && prefix == 64) {
1161 			num = (ifaddr_v6_num_t)0;
1162 
1163 			/*
1164 			 * Such link-local addresses are not considered to be
1165 			 * autoconfigured, because they always have an implied
1166 			 * subnet.  Therefore, clear that flag.
1167 			 */
1168 			flags &= ~IN6_IFF_AUTOCONF;
1169 		} else {
1170 			/*
1171 			 * Find a free slot.  We bypass netif_ip6_addr_add() as
1172 			 * it makes things more, rather than less, complicated
1173 			 * for us here.
1174 			 */
1175 			for (num = 1; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1176 				state = ifdev->ifdev_v6state[num];
1177 
1178 				if (ip6_addr_isinvalid(state))
1179 					break;
1180 			}
1181 
1182 			if (num == LWIP_IPV6_NUM_ADDRESSES)
1183 				return ENOBUFS;	/* TODO: a better error code */
1184 		}
1185 
1186 		assert(ip6_addr_isinvalid(netif_ip6_addr_state(netif, num)));
1187 
1188 		/*
1189 		 * We bypass the standard netif IPv6 address assignment
1190 		 * functions here, because we may want to change the state of
1191 		 * the address to something particular (rather than always
1192 		 * tentative) and set the state only when we're otherwise done.
1193 		 */
1194 		netif->ip6_addr[num] = ipaddr;
1195 
1196 		ifdev->ifdev_v6prefix[num] = prefix;
1197 
1198 		/*
1199 		 * New addresses are always DAD-tested for collisions first,
1200 		 * except on loopback interfaces, which will simply get back
1201 		 * its own DAD request and conclude there is a collision..
1202 		 */
1203 		if (flags & IN6_IFF_TENTATIVE)
1204 			state = IP6_ADDR_TENTATIVE;
1205 		else if (flags & IN6_IFF_DEPRECATED)
1206 			state = IP6_ADDR_VALID;
1207 		else if (ifdev_is_loopback(ifdev) || (flags & IN6_IFF_NODAD))
1208 			state = IP6_ADDR_PREFERRED;
1209 		else
1210 			state = IP6_ADDR_TENTATIVE;
1211 
1212 		ifdev->ifdev_v6flags[num] = 0;
1213 		if (flags & IN6_IFF_AUTOCONF)
1214 			ifdev->ifdev_v6flags[num] |= IFADDR_V6F_AUTOCONF;
1215 		if (flags & IN6_IFF_TEMPORARY)
1216 			ifdev->ifdev_v6flags[num] |= IFADDR_V6F_TEMPORARY;
1217 
1218 		/* Precompute the address scope as well. */
1219 		ifdev->ifdev_v6scope[num] =
1220 		    addrpol_get_scope(&ipaddr, TRUE /*is_src*/);
1221 	} else {
1222 		/* Update an existing address. */
1223 		/*
1224 		 * Since no fundamental aspects about the address may change
1225 		 * we also do not need to delete and readd the address here.
1226 		 */
1227 		if (prefix != 0 && prefix != ifdev->ifdev_v6prefix[num])
1228 			return EINVAL;
1229 
1230 		/* TODO: figure out exactly what userland wants here.. */
1231 		if (flags & IN6_IFF_TENTATIVE)
1232 			state = IP6_ADDR_TENTATIVE;
1233 		else if (flags & IN6_IFF_DEPRECATED)
1234 			state = IP6_ADDR_VALID;
1235 		else
1236 			state = IP6_ADDR_PREFERRED;
1237 
1238 		/*
1239 		 * Leave the AUTOCONF flag as is, because otherwise we might
1240 		 * also have to add or delete a subnet route here.
1241 		 */
1242 		if (flags & IN6_IFF_TEMPORARY)
1243 			ifdev->ifdev_v6flags[num] |= IFADDR_V6F_TEMPORARY;
1244 		else
1245 			ifdev->ifdev_v6flags[num] &= ~IFADDR_V6F_TEMPORARY;
1246 	}
1247 
1248 	/*
1249 	 * In our implementation, all addresses except the first link-local
1250 	 * address (which is always stored in slot 0) have a lifetime and are
1251 	 * thus not static as far as lwIP is concerned.  The result is that all
1252 	 * those addresses are considered to be /128 assignments, leaving the
1253 	 * routing decisions entirely to us, which is exactly what we want.  As
1254 	 * such we have to be careful not to assign a valid lifetime of 0
1255 	 * ("static").  For preferred lifetimes, 0 is not a special value,
1256 	 * though.  Either value may be 0xffffffff, which denotes "infinite".
1257 	 *
1258 	 * As for those routing decisions: we use the AUTOCONF flag as the
1259 	 * indication whether or not to add a subnet (= on-link prefix) route
1260 	 * for the address.  See also ifaddr_v6_added().
1261 	 */
1262 	if (num != 0) {
1263 		valid_life = lifetime->ia6t_vltime;
1264 		if (ip6_addr_life_isstatic(valid_life))
1265 			valid_life++;
1266 		netif_ip6_addr_set_valid_life(netif, (int)num, valid_life);
1267 		netif_ip6_addr_set_pref_life(netif, (int)num,
1268 		    lifetime->ia6t_pltime);
1269 	}
1270 
1271 	/*
1272 	 * The lifetime of address slot 0 is initialized to, and remains at all
1273 	 * times, zero ("static").  All other slots have an actual lifetime.
1274 	 */
1275 	assert(netif_ip6_addr_isstatic(netif, (int)num) == !num);
1276 
1277 	/*
1278 	 * Change the address state last, as this may immediately trigger
1279 	 * reports and route addition etc, although usually it will not:
1280 	 * addresses are typically added as tentative, and ifaddr_v6_added()
1281 	 * will be called only once the address is valid.
1282 	 */
1283 	netif_ip6_addr_set_state(netif, (int)num, state);
1284 
1285 	return OK;
1286 }
1287 
1288 /*
1289  * Delete an IPv6 address from an interface.  The given address number must
1290  * have been obtained through ifaddr_v6_find() or ifaddr_v6_enum().
1291  * This function always succeeds.
1292  */
1293 void
1294 ifaddr_v6_del(struct ifdev * ifdev, ifaddr_v6_num_t num)
1295 {
1296 
1297 	assert(num <= LWIP_IPV6_NUM_ADDRESSES);
1298 	assert(!ip6_addr_isinvalid(ifdev->ifdev_v6state[num]));
1299 
1300 	/* The state change will also trigger ifaddr_v6_deleted() if needed. */
1301 	netif_ip6_addr_set_state(ifdev_get_netif(ifdev), (int)num,
1302 	    IP6_ADDR_INVALID);
1303 }
1304 
1305 /*
1306  * Announce all IPv6 addresses associated with the given interface as deleted.
1307  * Used (only) right before the interface is destroyed.
1308  */
1309 void
1310 ifaddr_v6_clear(struct ifdev * ifdev)
1311 {
1312 	ifaddr_v6_num_t num;
1313 
1314 	for (num = 0; ifaddr_v6_enum(ifdev, &num); num++) {
1315 		if (ifaddr_v6_isvalid(ifdev, num))
1316 			ifaddr_v6_deleted(ifdev, num);
1317 	}
1318 }
1319 
1320 /*
1321  * Check state changes on local IPv6 addresses and update shadow state
1322  * accordingly.
1323  */
1324 void
1325 ifaddr_v6_check(struct ifdev * ifdev)
1326 {
1327 	struct netif *netif;
1328 	ifaddr_v6_num_t num;
1329 	int old_state, new_state, was_valid, is_valid;
1330 
1331 	netif = ifdev_get_netif(ifdev);
1332 
1333 	for (num = 0; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1334 		/*
1335 		 * Since we compile lwIP without support for stateless
1336 		 * autoconfiguration, there will be no cases where new
1337 		 * addresses appear out of nowhere.  As such, we can rely on
1338 		 * all necessary fields already being initialized here.
1339 		 */
1340 		old_state = ifdev->ifdev_v6state[num];
1341 		new_state = netif_ip6_addr_state(netif, num);
1342 
1343 		if (old_state == new_state)
1344 			continue;
1345 
1346 		was_valid = ip6_addr_isvalid(old_state);
1347 		is_valid = ip6_addr_isvalid(new_state);
1348 
1349 		if (was_valid != is_valid) {
1350 			if (is_valid)
1351 				ifaddr_v6_added(ifdev, num);
1352 			else
1353 				ifaddr_v6_deleted(ifdev, num);
1354 		}
1355 
1356 		ifdev->ifdev_v6state[num] = new_state;
1357 
1358 		/*
1359 		 * TODO: implement the requirements for dealing with duplicated
1360 		 * addresses, in particular the link-local address, as
1361 		 * specified by RFC 4862 Sec. 5.4.5.  NetBSD uses the
1362 		 * ND6_IFF_IFDISABLED flag for this, essentially disabling
1363 		 * the interface completely when that flag is set.
1364 		 */
1365 	}
1366 }
1367 
1368 /*
1369  * A change in the interface and/or link status has resulted in both now being
1370  * up.  Set the link-local address, if any, to tentative state.  Exempt
1371  * loopback interfaces, which would just see their own requests as collisions.
1372  *
1373  * TODO: the current implementation is the absolute minimum required for
1374  * dhcpcd(8) to function somewhat properly, but there is much more to be
1375  * decided and done when it comes to dealing with status changes..
1376  */
1377 void
1378 ifaddr_v6_set_up(struct ifdev * ifdev)
1379 {
1380 
1381 	if (!ifdev_is_loopback(ifdev) &&
1382 	    !ip6_addr_isinvalid(ifdev->ifdev_v6state[0]))
1383 		netif_ip6_addr_set_state(ifdev_get_netif(ifdev), 0,
1384 		    IP6_ADDR_TENTATIVE);
1385 }
1386 
1387 /*
1388  * Check whether all conditions are met for (re)assigning a link-local IPv6
1389  * address, and if so, do just that.
1390  */
1391 void
1392 ifaddr_v6_set_linklocal(struct ifdev * ifdev)
1393 {
1394 
1395 	/*
1396 	 * A few conditions must be met for link-local address assignment.
1397 	 * First of all, link-local address assignment must be enabled both
1398 	 * globally and on the interface.  The BSDs use the global setting as
1399 	 * an initial value for the link-local setting, but if we do this, it
1400 	 * would basically be impossible to change the global setting and have
1401 	 * any effect.  Thus, we use the global setting as an additional
1402 	 * requirement, with as reasoning that people will typically disable
1403 	 * the global setting in order to assign no IPv6 addresses at all.
1404 	 */
1405 	if (!(ifdev_get_nd6flags(ifdev) & ND6_IFF_AUTO_LINKLOCAL) ||
1406 	    !ifaddr_auto_linklocal)
1407 		return;
1408 
1409 	/*
1410 	 * Second, the interface must be up.  This is an artificial requirement
1411 	 * that allows for the above settings to be changed at all: if we
1412 	 * assigned a link-local address as soon as we could (see below), this
1413 	 * would leave virtually no opportunity to change the settings.  Once
1414 	 * assigned, a link-local address is never removed automatically.
1415 	 */
1416 	if (!ifdev_is_up(ifdev))
1417 		return;
1418 
1419 	/*
1420 	 * A proper (48-bit) hardware address must be set.  Interfaces without
1421 	 * hardware addresses (e.g., loopback devices) do not have this kind of
1422 	 * auto-assignment.  It may take a while for the driver to get back to
1423 	 * us with its initial hardware address, so wait for at least that.
1424 	 * Also update the link-local address upon subsequent (user-initiated)
1425 	 * changes to the hardware address, as long as if the IPv6 address has
1426 	 * not been overridden by userland by then.
1427 	 */
1428 	if (ifdev_get_hwlen(ifdev) != ETHARP_HWADDR_LEN ||
1429 	    !(ifdev->ifdev_hwlist[0].ifhwa_flags & IFHWAF_VALID))
1430 		return;
1431 
1432 	if (!ip6_addr_isinvalid(ifdev->ifdev_v6state[0]) &&
1433 	    (ifdev->ifdev_v6flags[0] & IFADDR_V6F_HWBASED))
1434 		return;
1435 
1436 	/*
1437 	 * All conditions are met.  Set or replace the interface's IPv6
1438 	 * link-local address.  This uses the first IPv6 address slot, which
1439 	 * will be skipped when adding non-link-local addresses.  We first
1440 	 * delete the old address if any, in order to force invalidation of
1441 	 * bound sockets, because setting the new address does not (currently)
1442 	 * consider sockets.
1443 	 */
1444 	if (!ip6_addr_isinvalid(ifdev->ifdev_v6state[0]))
1445 		ifaddr_v6_del(ifdev, (ifaddr_v6_num_t)0);
1446 
1447 #ifdef INET6
1448 	ifdev->ifdev_v6flags[0] = IFADDR_V6F_HWBASED;
1449 	ifdev->ifdev_v6prefix[0] = 64;
1450 	netif_create_ip6_linklocal_address(ifdev_get_netif(ifdev),
1451 	    1 /*from_mac_48bit*/);
1452 	assert(!ip6_addr_isinvalid(ifdev->ifdev_v6state[0]));
1453 
1454 	ifdev->ifdev_v6scope[0] =
1455 	    addrpol_get_scope(netif_ip_addr6(ifdev_get_netif(ifdev), 0),
1456 	      TRUE /*is_src*/);
1457 #endif /* INET6 */
1458 }
1459 
1460 /*
1461  * Return the first interface device that owns the given (non-any) IPv6
1462  * address, or NULL if it is not a valid local IPv6 address.  Addresses that
1463  * exist but are not usable ("usually assigned" in the RFC4862 sense) are
1464  * considered not valid in this context.
1465  */
1466 struct ifdev *
1467 ifaddr_v6_map_by_addr(const ip6_addr_t * ip6addr)
1468 {
1469 	struct ifdev *ifdev;
1470 	struct netif *netif;
1471 	ifaddr_v6_num_t num;
1472 
1473 	/*
1474 	 * It would be nice to be able to do a route lookup on an RTF_LOCAL
1475 	 * entry here, but this approach would currently have two problems.
1476 	 *
1477 	 * 1) link-local addresses would require a lookup with a different
1478 	 *    embedded zone for each possible interface, requiring a loop over
1479 	 *    all interfaces after all; we could do a route lookup for global
1480 	 *    addresses only, but then there's also the issue that..
1481 	 * 2) once we get the interface from the route, we still have to check
1482 	 *    check the state of the address, as done below, and that requires
1483 	 *    us to go through all the interface addresses after all; we could
1484 	 *    embed the local address number in the RTF_LOCAL routing entry but
1485 	 *    that would get rather messy API-wise.
1486 	 *
1487 	 * Still, if it turns out that this function is a bottleneck, the above
1488 	 * workarounds should offer a way forward for the common case.
1489 	 */
1490 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
1491 		netif = ifdev_get_netif(ifdev);
1492 
1493 		for (num = 0; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1494 			if (ip6_addr_isinvalid(ifdev->ifdev_v6state[num]))
1495 				continue;
1496 
1497 			/*
1498 			 * An address may be used as a local address only if it
1499 			 * is preferred or deprecated, not if it is tentative
1500 			 * or duplicated.
1501 			 */
1502 			if (!ifaddr_v6_isvalid(ifdev, num))
1503 				continue;
1504 
1505 			/*
1506 			 * Ignore the zone if the given address does not have
1507 			 * one set.  Otherwise, the zone must match.
1508 			 */
1509 			if (ip6_addr_cmp_zoneless(netif_ip6_addr(netif, num),
1510 			    ip6addr) && (!ip6_addr_has_zone(ip6addr) ||
1511 			    ip6_addr_test_zone(ip6addr, netif)))
1512 				return ifdev;
1513 		}
1514 	}
1515 
1516 	return NULL;
1517 }
1518 
1519 /*
1520  * Return the first interface device for which the given IPv6 address is on a
1521  * configured local subnet, or NULL if no match was found.
1522  */
1523 static struct ifdev *
1524 ifaddr_v6_map_by_subnet(const ip_addr_t * ipaddr)
1525 {
1526 	const ip_addr_t *addr;
1527 	struct ifdev *ifdev;
1528 	struct netif *netif;
1529 	ifaddr_v6_num_t num;
1530 	unsigned int prefix;
1531 
1532 	assert(IP_IS_V6(ipaddr));
1533 
1534 	for (ifdev = NULL; (ifdev = ifdev_enum(ifdev)) != NULL; ) {
1535 		netif = ifdev_get_netif(ifdev);
1536 
1537 		if (ip6_addr_has_zone(ip_2_ip6(ipaddr)) &&
1538 		    !ip6_addr_test_zone(ip_2_ip6(ipaddr), netif))
1539 			continue;
1540 
1541 		for (num = 0; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1542 			if (ip6_addr_isinvalid(ifdev->ifdev_v6state[num]))
1543 				continue;
1544 
1545 			if (!ifaddr_v6_isvalid(ifdev, num))
1546 				continue;
1547 
1548 			addr = netif_ip_addr6(netif, num);
1549 
1550 			/*
1551 			 * For addresses with no implied subnet, check against
1552 			 * the full address, so as to match only that address.
1553 			 */
1554 			if (ifdev->ifdev_v6flags[num] & IFADDR_V6F_AUTOCONF)
1555 				prefix = IP6_BITS;
1556 			else
1557 				prefix = ifdev->ifdev_v6prefix[num];
1558 
1559 			if (addr_get_common_bits(ipaddr, addr, prefix) ==
1560 			    prefix)
1561 				return ifdev;
1562 		}
1563 	}
1564 
1565 	return NULL;
1566 }
1567 
1568 /*
1569  * Select an IPv6 source address for communication to the given destination
1570  * address on the given interface.  Return the selected source address, or NULL
1571  * if no appropriate source address could be found.  This function implements
1572  * RFC 6724 Sec. 5, and is very close to a drop-in replacement for lwIP's own
1573  * ip6_select_source_address() function.  We can do a slightly better job
1574  * because we have more information (for Rules 6 and 7) and can offer a more
1575  * complete, less lightweight implementation (for Rule 8).
1576  *
1577  * In summary, this is the implementation status of the rules:
1578  *
1579  * - Rules 1, 2, 3: fully implemented
1580  * - Rules 4, 5, 5.5: not applicable
1581  * - Rules 6, 7, 8: fully implemented
1582  *
1583  * Note that for rule 2, scope decisions are left to the addrpol module, which
1584  * makes a deliberate exception from the RFC for Unique-Local Addresses.
1585  *
1586  * The given destination address may not be properly zoned.
1587  */
1588 static const ip_addr_t *
1589 ifaddr_v6_select(struct ifdev * ifdev, const ip_addr_t * dest_addr)
1590 {
1591 	const ip_addr_t *cand_addr, *best_addr;
1592 	int dest_scope, cand_scope, best_scope;
1593 	int dest_label, cand_label, best_label = 0 /*gcc*/;
1594 	uint8_t cand_pref, best_pref = 0 /*gcc*/;
1595 	uint8_t cand_temp, best_temp = 0 /*gcc*/;
1596 	int cand_bits, best_bits = 0 /*gcc*/;
1597 	ifaddr_v6_num_t num, best_num;
1598 
1599 	assert(ifdev != NULL);
1600 	assert(IP_IS_V6(dest_addr));
1601 
1602 	dest_scope = addrpol_get_scope(dest_addr, FALSE /*is_src*/);
1603 	dest_label = -1; /* obtain only when necessary */
1604 
1605 	best_addr = NULL;
1606 	best_num = -1;
1607 
1608 	for (num = 0; num < LWIP_IPV6_NUM_ADDRESSES; num++) {
1609 		/* Consider only valid (preferred and deprecated) addresses. */
1610 		if (!ip6_addr_isvalid(ifdev->ifdev_v6state[num]))
1611 			continue;
1612 
1613 		cand_addr = netif_ip_addr6(ifdev_get_netif(ifdev), (int)num);
1614 
1615 		/* Rule 1 */
1616 		if (ip6_addr_cmp_zoneless(ip_2_ip6(cand_addr),
1617 		    ip_2_ip6(dest_addr)))
1618 			return cand_addr;
1619 
1620 		cand_scope = ifdev->ifdev_v6scope[num];
1621 		cand_pref = ip6_addr_ispreferred(ifdev->ifdev_v6state[num]);
1622 		cand_temp = (ifdev->ifdev_v6flags[num] & IFADDR_V6F_TEMPORARY);
1623 		cand_label = -1;
1624 		cand_bits = -1;
1625 
1626 		/*
1627 		 * The following monster of an if-condition relies on order of
1628 		 * evaluation to obtain the more expensive-to-compute values
1629 		 * only when strictly necessary.  We use a shortcut for Rule 6:
1630 		 * labels are computed based on longest matching prefix, so if
1631 		 * Rule 6 prefers the candidate address, Rule 8 would have
1632 		 * preferred the candidate address as well.  Therefore, skip
1633 		 * even computing labels when Rule 7 would not prefer either
1634 		 * address, i.e. the "temporary" state of the candidate and the
1635 		 * best address are equal.  For complete ties (which exist,
1636 		 * because Rule 8 - longest common prefix - checks up to the
1637 		 * subnet size), as "policy" we always pick the first address.
1638 		 */
1639 #define ADDRPOL_GET_LABEL(addr, label) \
1640 	(label != -1 || (label = addrpol_get_label(addr), 1))
1641 #define ADDR_GET_COMMON_BITS(addr1, addr2, num, bits) \
1642 	(bits != -1 || (bits = (int) \
1643 	addr_get_common_bits(addr1, addr2, ifdev->ifdev_v6prefix[num]), 1))
1644 
1645 		if (best_addr == NULL || /* no alternative yet */
1646 		    /* Rule 2 */
1647 		    (cand_scope < best_scope && cand_scope >= dest_scope) ||
1648 		    (cand_scope > best_scope && best_scope < dest_scope) ||
1649 		    (cand_scope == best_scope &&
1650 		    /* Rule 3 */
1651 		    (cand_pref > best_pref || (cand_pref == best_pref &&
1652 		    /* Rule 6 */
1653 		    ((cand_temp != best_temp && /* shortcut, part 1 */
1654 		    ADDRPOL_GET_LABEL(dest_addr, dest_label) &&
1655 		    ADDRPOL_GET_LABEL(cand_addr, cand_label) &&
1656 		    ADDRPOL_GET_LABEL(best_addr, best_label) &&
1657 		    cand_label == dest_label && best_label != dest_label) ||
1658 		    ((cand_temp == best_temp || /* shortcut, part 2 */
1659 		    ((cand_label == dest_label) ==
1660 		      (best_label == dest_label))) &&
1661 		    /* Rule 7 */
1662 		    (cand_temp > best_temp || (cand_temp == best_temp &&
1663 		    /* Rule 8 */
1664 		    ADDR_GET_COMMON_BITS(cand_addr, dest_addr, num,
1665 		      cand_bits) &&
1666 		    ADDR_GET_COMMON_BITS(best_addr, dest_addr, best_num,
1667 		      best_bits) &&
1668 		    cand_bits > best_bits)))))))) {
1669 			/* We found a new "winning" candidate. */
1670 			best_addr = cand_addr;
1671 			best_scope = cand_scope;
1672 			best_pref = cand_pref;
1673 			best_temp = cand_temp;
1674 			best_label = cand_label;
1675 			best_bits = cand_bits;
1676 			best_num = num;
1677 		}
1678 	}
1679 
1680 	/* Return the best candidate, if any. */
1681 	return best_addr;
1682 }
1683 
1684 /*
1685  * Pick an IPv6 source address locally assigned to the given interface, for use
1686  * with the given IPv6 destination address.  See ifaddr_v6_select() on why we
1687  * override lwIP's version of this function.
1688  *
1689  * This is a full replacement of the corresponding lwIP function, which should
1690  * be overridden with weak symbols, using patches against the lwIP source code.
1691  * As such, the lwIP headers should already provide the correct prototype for
1692  * this function.  If not, something will have changed in the lwIP
1693  * implementation, and this code must be revised accordingly.
1694  *
1695  * Important: there are currently no tests that will detect that overriding is
1696  * broken, since our test code (necessarily) uses the code path that calls
1697  * ifaddr_v6_select() directly, even though there are other places in the lwIP
1698  * source code that explicitly call this functions.
1699  */
1700 const ip_addr_t *
1701 ip6_select_source_address(struct netif * netif, const ip6_addr_t * dest_addr)
1702 {
1703 	ip_addr_t ipaddr;
1704 
1705 	ip_addr_copy_from_ip6(ipaddr, *dest_addr);
1706 
1707 	return ifaddr_v6_select(netif_get_ifdev(netif), &ipaddr);
1708 }
1709 
1710 /*
1711  * Find and return the interface to which the given address is assigned as a
1712  * local (source) address, or NULL if the given address is not a local address
1713  * for any interface.  The 'any' address as well as IPv4-mapped IPv6 addresses
1714  * are not supported and will yield NULL.
1715  */
1716 struct ifdev *
1717 ifaddr_map_by_addr(const ip_addr_t * ipaddr)
1718 {
1719 
1720 	switch (IP_GET_TYPE(ipaddr)) {
1721 	case IPADDR_TYPE_V4:
1722 		return ifaddr_v4_map_by_addr(ip_2_ip4(ipaddr));
1723 
1724 	case IPADDR_TYPE_V6:
1725 		if (ip6_addr_isipv4mappedipv6(ip_2_ip6(ipaddr)))
1726 			return NULL;
1727 
1728 		return ifaddr_v6_map_by_addr(ip_2_ip6(ipaddr));
1729 
1730 	case IPADDR_TYPE_ANY:
1731 		return NULL;
1732 
1733 	default:
1734 		panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
1735 	}
1736 }
1737 
1738 /*
1739  * Find and return an interface that has a local network configured that
1740  * contains the given address, or NULL if there is no match.  If there are
1741  * multiple matches, an arbitrary one is returned.  The 'any' address as well
1742  * as IPv4-mapped IPv6 addresses are not supported and will yield NULL.
1743  */
1744 struct ifdev *
1745 ifaddr_map_by_subnet(const ip_addr_t * ipaddr)
1746 {
1747 
1748 	switch (IP_GET_TYPE(ipaddr)) {
1749 	case IPADDR_TYPE_V4:
1750 		return ifaddr_v4_map_by_subnet(ip_2_ip4(ipaddr));
1751 
1752 	case IPADDR_TYPE_V6:
1753 		if (ip6_addr_isipv4mappedipv6(ip_2_ip6(ipaddr)))
1754 			return NULL;
1755 
1756 		return ifaddr_v6_map_by_subnet(ipaddr);
1757 
1758 	case IPADDR_TYPE_ANY:
1759 		return NULL;
1760 
1761 	default:
1762 		panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
1763 	}
1764 }
1765 
1766 /*
1767  * Select a local address to use as source address for the given destination
1768  * address.  If 'ifdev' is not NULL, it points to the interface from which to
1769  * select a source address.  If 'ifdev' is NULL, this function will attempt to
1770  * select an interface as well.  On success, return the selected source
1771  * address, and if 'ifdevp' is not NULL, store the selected interface in it.
1772  * On failure, return NULL.
1773  */
1774 const ip_addr_t *
1775 ifaddr_select(const ip_addr_t * dst_addr, struct ifdev * ifdev,
1776 	struct ifdev ** ifdevp)
1777 {
1778 	struct route_entry *route;
1779 	const ip6_addr_t *ip6addr;
1780 
1781 	/*
1782 	 * If no interface is provided yet, start by determining the interface.
1783 	 * If the destination address has a zone, this step is easy.  Otherwise
1784 	 * we have to do a routing query on the destination address.
1785 	 */
1786 	if (ifdev == NULL) {
1787 		ip6addr = ip_2_ip6(dst_addr);
1788 
1789 		if (IP_IS_V6(dst_addr) && ip6_addr_has_zone(ip6addr)) {
1790 			ifdev = ifdev_get_by_index(ip6_addr_zone(ip6addr));
1791 
1792 			if (ifdev == NULL)
1793 				return NULL;
1794 		} else {
1795 			if ((route = route_lookup(dst_addr)) == NULL)
1796 				return NULL;
1797 
1798 			ifdev = route_get_ifdev(route);
1799 		}
1800 	}
1801 
1802 	if (ifdevp != NULL)
1803 		*ifdevp = ifdev;
1804 
1805 	/*
1806 	 * We have found an interface.  Now select an IP address assigned to
1807 	 * that interface.  For IPv4, this is easy: each interface has only one
1808 	 * local address (if that).  For IPv6, we may have to select one of the
1809 	 * locally assigned addresses: global, link-local, etc.
1810 	 */
1811 	switch (IP_GET_TYPE(dst_addr)) {
1812 	case IPADDR_TYPE_V4:
1813 		/* Use the IPv4 source address if one is set at all. */
1814 		if (!ifdev->ifdev_v4set)
1815 			return FALSE;
1816 
1817 		return netif_ip_addr4(ifdev_get_netif(ifdev));
1818 
1819 	case IPADDR_TYPE_V6:
1820 		return ifaddr_v6_select(ifdev, dst_addr);
1821 
1822 	default:
1823 		panic("unknown IP address type: %u", IP_GET_TYPE(dst_addr));
1824 	}
1825 }
1826 
1827 /*
1828  * Check the given IPv6 address for a zone violation against the given
1829  * interface--that is, a scoped address leaving its original zone if used in
1830  * the context of the interface.  Return TRUE if the address is zone-
1831  * incompatible with the interface, and thus must not be used in packets sent
1832  * to that interface.  Return FALSE if there is no such zone incompatibility.
1833  */
1834 int
1835 ifaddr_is_zone_mismatch(const ip6_addr_t * ipaddr, struct ifdev * ifdev)
1836 {
1837 
1838 	/*
1839 	 * The IPv6 loopback address (::1) has an implicit link-local scope,
1840 	 * with a zone corresponding to the interface it is assigned to.  We
1841 	 * take a shortcut by assuming that the loopback address is assigned to
1842 	 * the primary loopback interface.
1843 	 */
1844 	if (ip6_addr_isloopback(ipaddr))
1845 		return (ifdev != ifdev_get_loopback());
1846 
1847 	/* Zoned addresses must not leave their zone. */
1848 	if (ip6_addr_has_zone(ipaddr))
1849 		return !ip6_addr_test_zone(ipaddr, ifdev_get_netif(ifdev));
1850 
1851 	return FALSE;
1852 }
1853 
1854 /*
1855  * Find a data link (hardware) address locally assigned to a interface.  The
1856  * address is given as 'addr', and the length of the memory area that contains
1857  * 'addr' is given as 'addr_len'.  The interface is given as 'ifdev'.  On
1858  * success, return OK, with the data link address number stored in 'num'.  For
1859  * interfaces that do not support hardware addresses, if the given address
1860  * provides a zero-length hardware address, always return successfully with 0
1861  * stored in 'nump'.  On failure, return a negative error code.
1862  */
1863 int
1864 ifaddr_dl_find(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
1865 	socklen_t addr_len, ifaddr_dl_num_t * nump)
1866 {
1867 	uint8_t hwaddr[NETIF_MAX_HWADDR_LEN];
1868 	ifaddr_dl_num_t num;
1869 	int r;
1870 
1871 	if ((r = addr_get_link((const struct sockaddr *)addr, addr_len,
1872 	    NULL /*name*/, 0 /*name_max*/, hwaddr,
1873 	    ifdev_get_hwlen(ifdev))) != OK)
1874 		return r;
1875 
1876 	/*
1877 	 * For interfaces without hardware addresses, after passing the above
1878 	 * sanity checks (which guarantee that the searched-for address is of
1879 	 * zero length), return the pseudo-entry zero, which yields an entry
1880 	 * with a zero-sized hardware address once obtained.  This is required
1881 	 * for at least ifconfig(8).
1882 	 */
1883 	if (ifdev->ifdev_ops->iop_set_hwaddr == NULL) {
1884 		*nump = 0;
1885 		return OK;
1886 	}
1887 
1888 	for (num = 0; (size_t)num < __arraycount(ifdev->ifdev_hwlist); num++) {
1889 		if ((ifdev->ifdev_hwlist[num].ifhwa_flags & IFHWAF_VALID) &&
1890 		    !memcmp(ifdev->ifdev_hwlist[num].ifhwa_addr, hwaddr,
1891 		    ifdev_get_hwlen(ifdev))) {
1892 			*nump = num;
1893 			return OK;
1894 		}
1895 	}
1896 
1897 	return EADDRNOTAVAIL;
1898 }
1899 
1900 /*
1901  * Enumerate data link (hardware) addresses locally assigned to the given
1902  * interface 'ifdev'.  The caller should set 'nump' to 0 initially, and
1903  * increase it by one between a successful call and the next enumeration call.
1904  * Return TRUE on success, meaning that starting from the given value of 'nump'
1905  * there is at least one data link address, of which the number is stored in
1906  * 'nump' on return.  Return FALSE if there are no more data link addresses
1907  * locally assigned to the interface.
1908  */
1909 int
1910 ifaddr_dl_enum(struct ifdev * ifdev, ifaddr_dl_num_t * num)
1911 {
1912 
1913 	/*
1914 	 * If hardware addresses are not supported, or if no hardware address
1915 	 * has been added to this interface yet (this shouldn't happen but
1916 	 * still), there is always one entry with a (zero-sized) address.
1917 	 * That is required for the IFP (name) entry as used by getifaddrs(3).
1918 	 */
1919 	if (ifdev->ifdev_ops->iop_set_hwaddr == NULL ||
1920 	    !(ifdev->ifdev_hwlist[0].ifhwa_flags & IFHWAF_VALID))
1921 		return (*num == 0);
1922 
1923 	for (; (size_t)*num < __arraycount(ifdev->ifdev_hwlist); (*num)++) {
1924 		if (ifdev->ifdev_hwlist[*num].ifhwa_flags & IFHWAF_VALID)
1925 			return TRUE;
1926 	}
1927 
1928 	return FALSE;
1929 }
1930 
1931 /*
1932  * Retrieve a data link (hardware) address for an interface.  For interfaces
1933  * that support hardware addresses, 'num' must be a number returned by
1934  * ifaddr_dl_find() or ifaddr_dl_enum().  For others, 'num' must be zero, and a
1935  * pseudo-address of zero size will be returned.  The address will be stored in
1936  * 'addr'.  This function always succeeds.
1937  */
1938 void
1939 ifaddr_dl_get(struct ifdev * ifdev, ifaddr_dl_num_t num,
1940 	struct sockaddr_dlx * addr)
1941 {
1942 	const uint8_t *hwaddr;
1943 	size_t hwaddr_len;
1944 	socklen_t addr_len;
1945 
1946 	if ((hwaddr_len = ifdev_get_hwlen(ifdev)) > 0) {
1947 		/*
1948 		 * Note that if we have no hardware addresses yet (which should
1949 		 * not happen but still), the first entry may not be marked as
1950 		 * valid yet.  Ignore it, and return an all-zeroes address.
1951 		 */
1952 		hwaddr = ifdev->ifdev_hwlist[num].ifhwa_addr;
1953 	} else
1954 		hwaddr = NULL;
1955 
1956 	addr_len = sizeof(*addr);
1957 
1958 	addr_put_link((struct sockaddr *)addr, &addr_len,
1959 	    ifdev_get_index(ifdev), ifdev_get_iftype(ifdev),
1960 	    ifdev_get_name(ifdev), hwaddr, hwaddr_len);
1961 }
1962 
1963 /*
1964  * Obtain NetBSD-style state flags (IFLR_) for the given local data link
1965  * address.  The given number may be 0, in which case that slot's state may not
1966  * be valid.  Otherwise, the given number must identify an existing address.
1967  * Return the flags, 0 if the slot was not valid.
1968  */
1969 int
1970 ifaddr_dl_get_flags(struct ifdev * ifdev, ifaddr_dl_num_t num)
1971 {
1972 	int flags;
1973 
1974 	assert(num >= 0 && (size_t)num < __arraycount(ifdev->ifdev_hwlist));
1975 
1976 	if (!(ifdev->ifdev_hwlist[num].ifhwa_flags & IFHWAF_VALID))
1977 		return 0;
1978 
1979 	flags = (num == 0) ? IFLR_ACTIVE : 0;
1980 
1981 	if (ifdev->ifdev_hwlist[num].ifhwa_flags & IFHWAF_FACTORY)
1982 		flags |= IFLR_FACTORY;
1983 
1984 	return flags;
1985 }
1986 
1987 /*
1988  * Scan the list of hardware addresses of the given interface for a particular
1989  * hardware address, as well as for an available entry.  Return the entry found
1990  * or -1 if the given hardware address was not found.  Independently, return an
1991  * available entry in 'availp' or -1 if no entries are available.
1992  */
1993 static ifaddr_dl_num_t
1994 ifaddr_dl_scan(struct ifdev * ifdev, const uint8_t * hwaddr,
1995 	ifaddr_dl_num_t * availp)
1996 {
1997 	ifaddr_dl_num_t num, found, avail;
1998 
1999 	found = avail = -1;
2000 
2001 	for (num = 0; (size_t)num < __arraycount(ifdev->ifdev_hwlist); num++) {
2002 		if (!(ifdev->ifdev_hwlist[num].ifhwa_flags & IFHWAF_VALID)) {
2003 			if (avail == -1)
2004 				avail = num;
2005 		} else if (!memcmp(ifdev->ifdev_hwlist[num].ifhwa_addr, hwaddr,
2006 		    ifdev_get_hwlen(ifdev)))
2007 			found = num;
2008 	}
2009 
2010 	*availp = avail;
2011 	return found;
2012 }
2013 
2014 /*
2015  * Set a hardware address entry in the hardware address list of the given
2016  * interface.
2017  */
2018 static void
2019 ifaddr_dl_set(struct ifdev * ifdev, ifaddr_dl_num_t num,
2020 	const uint8_t * hwaddr, int is_factory)
2021 {
2022 
2023 	memcpy(&ifdev->ifdev_hwlist[num].ifhwa_addr, hwaddr,
2024 	    ifdev_get_hwlen(ifdev));
2025 
2026 	ifdev->ifdev_hwlist[num].ifhwa_flags = IFHWAF_VALID;
2027 	if (is_factory)
2028 		ifdev->ifdev_hwlist[num].ifhwa_flags |= IFHWAF_FACTORY;
2029 
2030 	rtsock_msg_addr_dl(ifdev, RTM_NEWADDR, num);
2031 }
2032 
2033 /*
2034  * Mark a new hardware address as active, after it has already been activated
2035  * on the hardware and in local administration.  The active slot is always slot
2036  * zero, so swap slots if needed.
2037  */
2038 static void
2039 ifaddr_dl_activate(struct ifdev * ifdev, ifaddr_dl_num_t num)
2040 {
2041 	struct ifdev_hwaddr tmp;
2042 	struct netif *netif;
2043 	size_t sz;
2044 
2045 	assert(num != -1);
2046 
2047 	/* The given slot may be zero if this is the initial address. */
2048 	if (num != 0) {
2049 		sz = sizeof(tmp);
2050 		memcpy(&tmp, &ifdev->ifdev_hwlist[0], sz);
2051 		memcpy(&ifdev->ifdev_hwlist[0], &ifdev->ifdev_hwlist[num], sz);
2052 		memcpy(&ifdev->ifdev_hwlist[num], &tmp, sz);
2053 	}
2054 
2055 	netif = ifdev_get_netif(ifdev);
2056 
2057 	/* Tell lwIP and routing sockets. */
2058 	memcpy(&netif->hwaddr, &ifdev->ifdev_hwlist[0].ifhwa_addr,
2059 	    ifdev_get_hwlen(ifdev));
2060 
2061 	rtsock_msg_addr_dl(ifdev, RTM_CHGADDR, 0);
2062 
2063 	/* See if we can and should generate a link-local IPv6 address now. */
2064 	ifaddr_v6_set_linklocal(ifdev);
2065 }
2066 
2067 /*
2068  * Add a data link (hardware) address to an interface, or if it already exists,
2069  * update its associated flags (IFLR_).
2070  */
2071 int
2072 ifaddr_dl_add(struct ifdev * ifdev, const struct sockaddr_dlx * addr,
2073 	socklen_t addr_len, int flags)
2074 {
2075 	uint8_t hwaddr[NETIF_MAX_HWADDR_LEN];
2076 	ifaddr_dl_num_t found, avail;
2077 	int r;
2078 
2079 	/*
2080 	 * If this interface type does not support setting hardware addresses,
2081 	 * refuse the call.  If the interface type supports it but the
2082 	 * underlying hardware does not, we cannot report failure here, though.
2083 	 * In that case, attempts to activate an address will fail instead.
2084 	 */
2085 	if (ifdev->ifdev_ops->iop_set_hwaddr == NULL)
2086 		return EINVAL;
2087 
2088 	if ((r = addr_get_link((const struct sockaddr *)addr, addr_len,
2089 	    NULL /*name*/, 0 /*name_max*/, hwaddr,
2090 	    ifdev_get_hwlen(ifdev))) != OK)
2091 		return r;
2092 
2093 	/*
2094 	 * Find the slot for the given hardware address.  Also find the slot of
2095 	 * the active address, and a free slot.  All of these may not exist.
2096 	 */
2097 	found = ifaddr_dl_scan(ifdev, hwaddr, &avail);
2098 
2099 	if (found == -1) {
2100 		if (avail == -1)
2101 			return ENOBUFS;	/* TODO: a better error code */
2102 		found = avail;
2103 	}
2104 
2105 	/*
2106 	 * If we are asked to activate this address, try that first: this may
2107 	 * fail if the network device does not support setting addresses, in
2108 	 * which case we want to fail without causing routing socket noise.
2109 	 */
2110 	if ((flags & IFLR_ACTIVE) && found != 0 &&
2111 	    (r = ifdev->ifdev_ops->iop_set_hwaddr(ifdev, hwaddr)) != OK)
2112 		return r;
2113 
2114 	/*
2115 	 * If this is a new address, add and announce it.  Otherwise, just
2116 	 * update its flags.
2117 	 */
2118 	if (found == avail) {
2119 		ifaddr_dl_set(ifdev, found, hwaddr,
2120 		    (flags & IFLR_FACTORY));
2121 	} else {
2122 		ifdev->ifdev_hwlist[found].ifhwa_flags &= ~IFLR_FACTORY;
2123 		if (flags & IFLR_FACTORY)
2124 			ifdev->ifdev_hwlist[found].ifhwa_flags |= IFLR_FACTORY;
2125 	}
2126 
2127 	/*
2128 	 * Activate the address if requested, swapping slots as needed.  It is
2129 	 * not possible to deactivate the active address by changing its flags.
2130 	 */
2131 	if ((flags & IFLR_ACTIVE) && found != 0)
2132 		ifaddr_dl_activate(ifdev, found);
2133 
2134 	return OK;
2135 }
2136 
2137 /*
2138  * Delete a data link (hardware) address from an interface.
2139  */
2140 int
2141 ifaddr_dl_del(struct ifdev * ifdev, ifaddr_dl_num_t num)
2142 {
2143 
2144 	if (ifdev->ifdev_ops->iop_set_hwaddr == NULL)
2145 		return EINVAL;
2146 
2147 	assert(num >= 0 && (size_t)num < __arraycount(ifdev->ifdev_hwlist));
2148 	assert(ifdev->ifdev_hwlist[num].ifhwa_flags & IFHWAF_VALID);
2149 
2150 	/* It is not possible to delete the active address. */
2151 	if (num == 0)
2152 		return EBUSY;
2153 
2154 	rtsock_msg_addr_dl(ifdev, RTM_DELADDR, num);
2155 
2156 	ifdev->ifdev_hwlist[num].ifhwa_flags = 0;
2157 
2158 	return OK;
2159 }
2160 
2161 /*
2162  * Announce all data link (hardware) addresses associated with the given
2163  * interface as deleted, including the active address.  Used (only) right
2164  * before the interface is destroyed.
2165  */
2166 void
2167 ifaddr_dl_clear(struct ifdev * ifdev)
2168 {
2169 	ifaddr_dl_num_t num;
2170 
2171 	/*
2172 	 * Do the active address last, because all announcements carry the
2173 	 * active address's hardware address as well.
2174 	 */
2175 	for (num = 1; ifaddr_dl_enum(ifdev, &num); num++)
2176 		rtsock_msg_addr_dl(ifdev, RTM_DELADDR, num);
2177 
2178 	if (ifdev->ifdev_hwlist[0].ifhwa_flags & IFHWAF_VALID)
2179 		rtsock_msg_addr_dl(ifdev, RTM_DELADDR, (ifaddr_dl_num_t)0);
2180 }
2181 
2182 /*
2183  * Update the interface's active hardware address.  If the 'is_factory' flag is
2184  * set, the address is the factory (driver-given) address.  This function may
2185  * only be called from ifdev_update_hwaddr().
2186  */
2187 void
2188 ifaddr_dl_update(struct ifdev * ifdev, const uint8_t * hwaddr, int is_factory)
2189 {
2190 	ifaddr_dl_num_t found, avail;
2191 
2192 	/*
2193 	 * Find the slot for the given hardware address.  Also find the slot of
2194 	 * the active address, and a free slot.  All of these may not exist.
2195 	 */
2196 	found = ifaddr_dl_scan(ifdev, hwaddr, &avail);
2197 
2198 	/* If the given address is already the active one, do nothing. */
2199 	if (found == 0) {
2200 		/* Factory addresses are always added first! */
2201 		assert(!is_factory);
2202 
2203 		return;
2204 	}
2205 
2206 	if (found == -1) {
2207 		/*
2208 		 * If the given address is not in the list, add it.  If the
2209 		 * list is full, first remove any non-active address.  The user
2210 		 * won't like this, but it preserves correctness without too
2211 		 * many complications, because this case is unlikely to happen.
2212 		 */
2213 		if (avail == -1) {
2214 			found = 1;
2215 
2216 			(void)ifaddr_dl_del(ifdev, found);
2217 		} else
2218 			found = avail;
2219 
2220 		ifaddr_dl_set(ifdev, found, hwaddr, is_factory);
2221 	}
2222 
2223 	ifaddr_dl_activate(ifdev, found);
2224 }
2225