xref: /freebsd/sys/netinet/in.c (revision 0957b409)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (C) 2001 WIDE Project.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)in.c	8.4 (Berkeley) 1/9/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include "opt_mpath.h"
39 
40 #include <sys/param.h>
41 #include <sys/eventhandler.h>
42 #include <sys/systm.h>
43 #include <sys/sockio.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/socket.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/proc.h>
51 #include <sys/rmlock.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/sx.h>
55 
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_llatbl.h>
61 #include <net/if_types.h>
62 #include <net/route.h>
63 #include <net/vnet.h>
64 
65 #include <netinet/if_ether.h>
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_pcb.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_carp.h>
71 #include <netinet/igmp_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/udp_var.h>
74 
75 static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
76 static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
77 
78 static void	in_socktrim(struct sockaddr_in *);
79 static void	in_purgemaddrs(struct ifnet *);
80 
81 VNET_DEFINE_STATIC(int, nosameprefix);
82 #define	V_nosameprefix			VNET(nosameprefix)
83 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
84 	&VNET_NAME(nosameprefix), 0,
85 	"Refuse to create same prefixes on different interfaces");
86 
87 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
88 #define	V_ripcbinfo			VNET(ripcbinfo)
89 
90 static struct sx in_control_sx;
91 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
92 
93 /*
94  * Return 1 if an internet address is for a ``local'' host
95  * (one to which we have a connection).
96  */
97 int
98 in_localaddr(struct in_addr in)
99 {
100 	struct rm_priotracker in_ifa_tracker;
101 	u_long i = ntohl(in.s_addr);
102 	struct in_ifaddr *ia;
103 
104 	IN_IFADDR_RLOCK(&in_ifa_tracker);
105 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
106 		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
107 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
108 			return (1);
109 		}
110 	}
111 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
112 	return (0);
113 }
114 
115 /*
116  * Return 1 if an internet address is for the local host and configured
117  * on one of its interfaces.
118  */
119 int
120 in_localip(struct in_addr in)
121 {
122 	struct rm_priotracker in_ifa_tracker;
123 	struct in_ifaddr *ia;
124 
125 	IN_IFADDR_RLOCK(&in_ifa_tracker);
126 	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
127 		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
128 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
129 			return (1);
130 		}
131 	}
132 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
133 	return (0);
134 }
135 
136 /*
137  * Return 1 if an internet address is configured on an interface.
138  */
139 int
140 in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
141 {
142 	struct epoch_tracker et;
143 	struct ifaddr *ifa;
144 	struct in_ifaddr *ia;
145 
146 	NET_EPOCH_ENTER(et);
147 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
148 		if (ifa->ifa_addr->sa_family != AF_INET)
149 			continue;
150 		ia = (struct in_ifaddr *)ifa;
151 		if (ia->ia_addr.sin_addr.s_addr == in.s_addr) {
152 			NET_EPOCH_EXIT(et);
153 			return (1);
154 		}
155 	}
156 	NET_EPOCH_EXIT(et);
157 
158 	return (0);
159 }
160 
161 /*
162  * Return a reference to the interface address which is different to
163  * the supplied one but with same IP address value.
164  */
165 static struct in_ifaddr *
166 in_localip_more(struct in_ifaddr *ia)
167 {
168 	struct rm_priotracker in_ifa_tracker;
169 	in_addr_t in = IA_SIN(ia)->sin_addr.s_addr;
170 	struct in_ifaddr *it;
171 
172 	IN_IFADDR_RLOCK(&in_ifa_tracker);
173 	LIST_FOREACH(it, INADDR_HASH(in), ia_hash) {
174 		if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) {
175 			ifa_ref(&it->ia_ifa);
176 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
177 			return (it);
178 		}
179 	}
180 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
181 
182 	return (NULL);
183 }
184 
185 /*
186  * Determine whether an IP address is in a reserved set of addresses
187  * that may not be forwarded, or whether datagrams to that destination
188  * may be forwarded.
189  */
190 int
191 in_canforward(struct in_addr in)
192 {
193 	u_long i = ntohl(in.s_addr);
194 	u_long net;
195 
196 	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
197 		return (0);
198 	if (IN_CLASSA(i)) {
199 		net = i & IN_CLASSA_NET;
200 		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
201 			return (0);
202 	}
203 	return (1);
204 }
205 
206 /*
207  * Trim a mask in a sockaddr
208  */
209 static void
210 in_socktrim(struct sockaddr_in *ap)
211 {
212     char *cplim = (char *) &ap->sin_addr;
213     char *cp = (char *) (&ap->sin_addr + 1);
214 
215     ap->sin_len = 0;
216     while (--cp >= cplim)
217 	if (*cp) {
218 	    (ap)->sin_len = cp - (char *) (ap) + 1;
219 	    break;
220 	}
221 }
222 
223 /*
224  * Generic internet control operations (ioctl's).
225  */
226 int
227 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
228     struct thread *td)
229 {
230 	struct ifreq *ifr = (struct ifreq *)data;
231 	struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
232 	struct epoch_tracker et;
233 	struct ifaddr *ifa;
234 	struct in_ifaddr *ia;
235 	int error;
236 
237 	if (ifp == NULL)
238 		return (EADDRNOTAVAIL);
239 
240 	/*
241 	 * Filter out 4 ioctls we implement directly.  Forward the rest
242 	 * to specific functions and ifp->if_ioctl().
243 	 */
244 	switch (cmd) {
245 	case SIOCGIFADDR:
246 	case SIOCGIFBRDADDR:
247 	case SIOCGIFDSTADDR:
248 	case SIOCGIFNETMASK:
249 		break;
250 	case SIOCDIFADDR:
251 		sx_xlock(&in_control_sx);
252 		error = in_difaddr_ioctl(cmd, data, ifp, td);
253 		sx_xunlock(&in_control_sx);
254 		return (error);
255 	case OSIOCAIFADDR:	/* 9.x compat */
256 	case SIOCAIFADDR:
257 		sx_xlock(&in_control_sx);
258 		error = in_aifaddr_ioctl(cmd, data, ifp, td);
259 		sx_xunlock(&in_control_sx);
260 		return (error);
261 	case SIOCSIFADDR:
262 	case SIOCSIFBRDADDR:
263 	case SIOCSIFDSTADDR:
264 	case SIOCSIFNETMASK:
265 		/* We no longer support that old commands. */
266 		return (EINVAL);
267 	default:
268 		if (ifp->if_ioctl == NULL)
269 			return (EOPNOTSUPP);
270 		return ((*ifp->if_ioctl)(ifp, cmd, data));
271 	}
272 
273 	if (addr->sin_addr.s_addr != INADDR_ANY &&
274 	    prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0)
275 		return (EADDRNOTAVAIL);
276 
277 	/*
278 	 * Find address for this interface, if it exists.  If an
279 	 * address was specified, find that one instead of the
280 	 * first one on the interface, if possible.
281 	 */
282 	NET_EPOCH_ENTER(et);
283 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
284 		if (ifa->ifa_addr->sa_family != AF_INET)
285 			continue;
286 		ia = (struct in_ifaddr *)ifa;
287 		if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr)
288 			break;
289 	}
290 	if (ifa == NULL)
291 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
292 			if (ifa->ifa_addr->sa_family == AF_INET) {
293 				ia = (struct in_ifaddr *)ifa;
294 				if (prison_check_ip4(td->td_ucred,
295 				    &ia->ia_addr.sin_addr) == 0)
296 					break;
297 			}
298 
299 	if (ifa == NULL) {
300 		NET_EPOCH_EXIT(et);
301 		return (EADDRNOTAVAIL);
302 	}
303 
304 	error = 0;
305 	switch (cmd) {
306 	case SIOCGIFADDR:
307 		*addr = ia->ia_addr;
308 		break;
309 
310 	case SIOCGIFBRDADDR:
311 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
312 			error = EINVAL;
313 			break;
314 		}
315 		*addr = ia->ia_broadaddr;
316 		break;
317 
318 	case SIOCGIFDSTADDR:
319 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
320 			error = EINVAL;
321 			break;
322 		}
323 		*addr = ia->ia_dstaddr;
324 		break;
325 
326 	case SIOCGIFNETMASK:
327 		*addr = ia->ia_sockmask;
328 		break;
329 	}
330 
331 	NET_EPOCH_EXIT(et);
332 
333 	return (error);
334 }
335 
336 static int
337 in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
338 {
339 	const struct in_aliasreq *ifra = (struct in_aliasreq *)data;
340 	const struct sockaddr_in *addr = &ifra->ifra_addr;
341 	const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr;
342 	const struct sockaddr_in *mask = &ifra->ifra_mask;
343 	const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
344 	const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
345 	struct epoch_tracker et;
346 	struct ifaddr *ifa;
347 	struct in_ifaddr *ia;
348 	bool iaIsFirst;
349 	int error = 0;
350 
351 	error = priv_check(td, PRIV_NET_ADDIFADDR);
352 	if (error)
353 		return (error);
354 
355 	/*
356 	 * ifra_addr must be present and be of INET family.
357 	 * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional.
358 	 */
359 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
360 	    addr->sin_family != AF_INET)
361 		return (EINVAL);
362 	if (broadaddr->sin_len != 0 &&
363 	    (broadaddr->sin_len != sizeof(struct sockaddr_in) ||
364 	    broadaddr->sin_family != AF_INET))
365 		return (EINVAL);
366 	if (mask->sin_len != 0 &&
367 	    (mask->sin_len != sizeof(struct sockaddr_in) ||
368 	    mask->sin_family != AF_INET))
369 		return (EINVAL);
370 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
371 	    (dstaddr->sin_len != sizeof(struct sockaddr_in) ||
372 	     dstaddr->sin_addr.s_addr == INADDR_ANY))
373 		return (EDESTADDRREQ);
374 	if (vhid > 0 && carp_attach_p == NULL)
375 		return (EPROTONOSUPPORT);
376 
377 	/*
378 	 * See whether address already exist.
379 	 */
380 	iaIsFirst = true;
381 	ia = NULL;
382 	NET_EPOCH_ENTER(et);
383 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
384 		struct in_ifaddr *it;
385 
386 		if (ifa->ifa_addr->sa_family != AF_INET)
387 			continue;
388 
389 		it = (struct in_ifaddr *)ifa;
390 		iaIsFirst = false;
391 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
392 		    prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
393 			ia = it;
394 	}
395 	NET_EPOCH_EXIT(et);
396 
397 	if (ia != NULL)
398 		(void )in_difaddr_ioctl(cmd, data, ifp, td);
399 
400 	ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK);
401 	ia = (struct in_ifaddr *)ifa;
402 	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
403 	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
404 	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
405 	callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock,
406 	    CALLOUT_RETURNUNLOCKED);
407 
408 	ia->ia_ifp = ifp;
409 	ia->ia_addr = *addr;
410 	if (mask->sin_len != 0) {
411 		ia->ia_sockmask = *mask;
412 		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
413 	} else {
414 		in_addr_t i = ntohl(addr->sin_addr.s_addr);
415 
416 		/*
417 	 	 * Be compatible with network classes, if netmask isn't
418 		 * supplied, guess it based on classes.
419 	 	 */
420 		if (IN_CLASSA(i))
421 			ia->ia_subnetmask = IN_CLASSA_NET;
422 		else if (IN_CLASSB(i))
423 			ia->ia_subnetmask = IN_CLASSB_NET;
424 		else
425 			ia->ia_subnetmask = IN_CLASSC_NET;
426 		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
427 	}
428 	ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask;
429 	in_socktrim(&ia->ia_sockmask);
430 
431 	if (ifp->if_flags & IFF_BROADCAST) {
432 		if (broadaddr->sin_len != 0) {
433 			ia->ia_broadaddr = *broadaddr;
434 		} else if (ia->ia_subnetmask == IN_RFC3021_MASK) {
435 			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
436 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
437 			ia->ia_broadaddr.sin_family = AF_INET;
438 		} else {
439 			ia->ia_broadaddr.sin_addr.s_addr =
440 			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
441 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
442 			ia->ia_broadaddr.sin_family = AF_INET;
443 		}
444 	}
445 
446 	if (ifp->if_flags & IFF_POINTOPOINT)
447 		ia->ia_dstaddr = *dstaddr;
448 
449 	/* XXXGL: rtinit() needs this strange assignment. */
450 	if (ifp->if_flags & IFF_LOOPBACK)
451                 ia->ia_dstaddr = ia->ia_addr;
452 
453 	if (vhid != 0) {
454 		error = (*carp_attach_p)(&ia->ia_ifa, vhid);
455 		if (error)
456 			return (error);
457 	}
458 
459 	/* if_addrhead is already referenced by ifa_alloc() */
460 	IF_ADDR_WLOCK(ifp);
461 	CK_STAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
462 	IF_ADDR_WUNLOCK(ifp);
463 
464 	ifa_ref(ifa);			/* in_ifaddrhead */
465 	IN_IFADDR_WLOCK();
466 	CK_STAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
467 	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
468 	IN_IFADDR_WUNLOCK();
469 
470 	/*
471 	 * Give the interface a chance to initialize
472 	 * if this is its first address,
473 	 * and to validate the address if necessary.
474 	 */
475 	if (ifp->if_ioctl != NULL) {
476 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
477 		if (error)
478 			goto fail1;
479 	}
480 
481 	/*
482 	 * Add route for the network.
483 	 */
484 	if (vhid == 0) {
485 		int flags = RTF_UP;
486 
487 		if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
488 			flags |= RTF_HOST;
489 
490 		error = in_addprefix(ia, flags);
491 		if (error)
492 			goto fail1;
493 	}
494 
495 	/*
496 	 * Add a loopback route to self.
497 	 */
498 	if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 &&
499 	    ia->ia_addr.sin_addr.s_addr != INADDR_ANY &&
500 	    !((ifp->if_flags & IFF_POINTOPOINT) &&
501 	     ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) {
502 		struct in_ifaddr *eia;
503 
504 		eia = in_localip_more(ia);
505 
506 		if (eia == NULL) {
507 			error = ifa_add_loopback_route((struct ifaddr *)ia,
508 			    (struct sockaddr *)&ia->ia_addr);
509 			if (error)
510 				goto fail2;
511 		} else
512 			ifa_free(&eia->ia_ifa);
513 	}
514 
515 	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) {
516 		struct in_addr allhosts_addr;
517 		struct in_ifinfo *ii;
518 
519 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
520 		allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
521 
522 		error = in_joingroup(ifp, &allhosts_addr, NULL,
523 			&ii->ii_allhosts);
524 	}
525 
526 	/*
527 	 * Note: we don't need extra reference for ifa, since we called
528 	 * with sx lock held, and ifaddr can not be deleted in concurrent
529 	 * thread.
530 	 */
531 	EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, ifa, IFADDR_EVENT_ADD);
532 
533 	return (error);
534 
535 fail2:
536 	if (vhid == 0)
537 		(void )in_scrubprefix(ia, LLE_STATIC);
538 
539 fail1:
540 	if (ia->ia_ifa.ifa_carp)
541 		(*carp_detach_p)(&ia->ia_ifa, false);
542 
543 	IF_ADDR_WLOCK(ifp);
544 	CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link);
545 	IF_ADDR_WUNLOCK(ifp);
546 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
547 
548 	IN_IFADDR_WLOCK();
549 	CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link);
550 	LIST_REMOVE(ia, ia_hash);
551 	IN_IFADDR_WUNLOCK();
552 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
553 
554 	return (error);
555 }
556 
557 static int
558 in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
559 {
560 	const struct ifreq *ifr = (struct ifreq *)data;
561 	const struct sockaddr_in *addr = (const struct sockaddr_in *)
562 	    &ifr->ifr_addr;
563 	struct ifaddr *ifa;
564 	struct in_ifaddr *ia;
565 	bool deleteAny, iaIsLast;
566 	int error;
567 
568 	if (td != NULL) {
569 		error = priv_check(td, PRIV_NET_DELIFADDR);
570 		if (error)
571 			return (error);
572 	}
573 
574 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
575 	    addr->sin_family != AF_INET)
576 		deleteAny = true;
577 	else
578 		deleteAny = false;
579 
580 	iaIsLast = true;
581 	ia = NULL;
582 	IF_ADDR_WLOCK(ifp);
583 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
584 		struct in_ifaddr *it;
585 
586 		if (ifa->ifa_addr->sa_family != AF_INET)
587 			continue;
588 
589 		it = (struct in_ifaddr *)ifa;
590 		if (deleteAny && ia == NULL && (td == NULL ||
591 		    prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0))
592 			ia = it;
593 
594 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
595 		    (td == NULL || prison_check_ip4(td->td_ucred,
596 		    &addr->sin_addr) == 0))
597 			ia = it;
598 
599 		if (it != ia)
600 			iaIsLast = false;
601 	}
602 
603 	if (ia == NULL) {
604 		IF_ADDR_WUNLOCK(ifp);
605 		return (EADDRNOTAVAIL);
606 	}
607 
608 	CK_STAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifaddr, ifa_link);
609 	IF_ADDR_WUNLOCK(ifp);
610 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
611 
612 	IN_IFADDR_WLOCK();
613 	CK_STAILQ_REMOVE(&V_in_ifaddrhead, ia, in_ifaddr, ia_link);
614 	LIST_REMOVE(ia, ia_hash);
615 	IN_IFADDR_WUNLOCK();
616 
617 	/*
618 	 * in_scrubprefix() kills the interface route.
619 	 */
620 	in_scrubprefix(ia, LLE_STATIC);
621 
622 	/*
623 	 * in_ifadown gets rid of all the rest of
624 	 * the routes.  This is not quite the right
625 	 * thing to do, but at least if we are running
626 	 * a routing process they will come back.
627 	 */
628 	in_ifadown(&ia->ia_ifa, 1);
629 
630 	if (ia->ia_ifa.ifa_carp)
631 		(*carp_detach_p)(&ia->ia_ifa, cmd == SIOCAIFADDR);
632 
633 	/*
634 	 * If this is the last IPv4 address configured on this
635 	 * interface, leave the all-hosts group.
636 	 * No state-change report need be transmitted.
637 	 */
638 	if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) {
639 		struct in_ifinfo *ii;
640 
641 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
642 		if (ii->ii_allhosts) {
643 			(void)in_leavegroup(ii->ii_allhosts, NULL);
644 			ii->ii_allhosts = NULL;
645 		}
646 	}
647 
648 	IF_ADDR_WLOCK(ifp);
649 	if (callout_stop(&ia->ia_garp_timer) == 1) {
650 		ifa_free(&ia->ia_ifa);
651 	}
652 	IF_ADDR_WUNLOCK(ifp);
653 
654 	EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa,
655 	    IFADDR_EVENT_DEL);
656 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
657 
658 	return (0);
659 }
660 
661 #define rtinitflags(x) \
662 	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
663 	    ? RTF_HOST : 0)
664 
665 /*
666  * Check if we have a route for the given prefix already or add one accordingly.
667  */
668 int
669 in_addprefix(struct in_ifaddr *target, int flags)
670 {
671 	struct rm_priotracker in_ifa_tracker;
672 	struct in_ifaddr *ia;
673 	struct in_addr prefix, mask, p, m;
674 	int error;
675 
676 	if ((flags & RTF_HOST) != 0) {
677 		prefix = target->ia_dstaddr.sin_addr;
678 		mask.s_addr = 0;
679 	} else {
680 		prefix = target->ia_addr.sin_addr;
681 		mask = target->ia_sockmask.sin_addr;
682 		prefix.s_addr &= mask.s_addr;
683 	}
684 
685 	IN_IFADDR_RLOCK(&in_ifa_tracker);
686 	/* Look for an existing address with the same prefix, mask, and fib */
687 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
688 		if (rtinitflags(ia)) {
689 			p = ia->ia_dstaddr.sin_addr;
690 
691 			if (prefix.s_addr != p.s_addr)
692 				continue;
693 		} else {
694 			p = ia->ia_addr.sin_addr;
695 			m = ia->ia_sockmask.sin_addr;
696 			p.s_addr &= m.s_addr;
697 
698 			if (prefix.s_addr != p.s_addr ||
699 			    mask.s_addr != m.s_addr)
700 				continue;
701 		}
702 		if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
703 			continue;
704 
705 		/*
706 		 * If we got a matching prefix route inserted by other
707 		 * interface address, we are done here.
708 		 */
709 		if (ia->ia_flags & IFA_ROUTE) {
710 #ifdef RADIX_MPATH
711 			if (ia->ia_addr.sin_addr.s_addr ==
712 			    target->ia_addr.sin_addr.s_addr) {
713 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
714 				return (EEXIST);
715 			} else
716 				break;
717 #endif
718 			if (V_nosameprefix) {
719 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
720 				return (EEXIST);
721 			} else {
722 				int fibnum;
723 
724 				fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
725 					target->ia_ifp->if_fib;
726 				rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
727 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
728 				return (0);
729 			}
730 		}
731 	}
732 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
733 
734 	/*
735 	 * No-one seem to have this prefix route, so we try to insert it.
736 	 */
737 	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
738 	if (!error)
739 		target->ia_flags |= IFA_ROUTE;
740 	return (error);
741 }
742 
743 /*
744  * Removes either all lle entries for given @ia, or lle
745  * corresponding to @ia address.
746  */
747 static void
748 in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags)
749 {
750 	struct sockaddr_in addr, mask;
751 	struct sockaddr *saddr, *smask;
752 	struct ifnet *ifp;
753 
754 	saddr = (struct sockaddr *)&addr;
755 	bzero(&addr, sizeof(addr));
756 	addr.sin_len = sizeof(addr);
757 	addr.sin_family = AF_INET;
758 	smask = (struct sockaddr *)&mask;
759 	bzero(&mask, sizeof(mask));
760 	mask.sin_len = sizeof(mask);
761 	mask.sin_family = AF_INET;
762 	mask.sin_addr.s_addr = ia->ia_subnetmask;
763 	ifp = ia->ia_ifp;
764 
765 	if (all) {
766 
767 		/*
768 		 * Remove all L2 entries matching given prefix.
769 		 * Convert address to host representation to avoid
770 		 * doing this on every callback. ia_subnetmask is already
771 		 * stored in host representation.
772 		 */
773 		addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr);
774 		lltable_prefix_free(AF_INET, saddr, smask, flags);
775 	} else {
776 		/* Remove interface address only */
777 		addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr;
778 		lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr);
779 	}
780 }
781 
782 /*
783  * If there is no other address in the system that can serve a route to the
784  * same prefix, remove the route.  Hand over the route to the new address
785  * otherwise.
786  */
787 int
788 in_scrubprefix(struct in_ifaddr *target, u_int flags)
789 {
790 	struct rm_priotracker in_ifa_tracker;
791 	struct in_ifaddr *ia;
792 	struct in_addr prefix, mask, p, m;
793 	int error = 0;
794 
795 	/*
796 	 * Remove the loopback route to the interface address.
797 	 */
798 	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
799 	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
800 	    (flags & LLE_STATIC)) {
801 		struct in_ifaddr *eia;
802 
803 		/*
804 		 * XXXME: add fib-aware in_localip.
805 		 * We definitely don't want to switch between
806 		 * prefixes in different fibs.
807 		 */
808 		eia = in_localip_more(target);
809 
810 		if (eia != NULL) {
811 			error = ifa_switch_loopback_route((struct ifaddr *)eia,
812 			    (struct sockaddr *)&target->ia_addr);
813 			ifa_free(&eia->ia_ifa);
814 		} else {
815 			error = ifa_del_loopback_route((struct ifaddr *)target,
816 			    (struct sockaddr *)&target->ia_addr);
817 		}
818 	}
819 
820 	if (rtinitflags(target)) {
821 		prefix = target->ia_dstaddr.sin_addr;
822 		mask.s_addr = 0;
823 	} else {
824 		prefix = target->ia_addr.sin_addr;
825 		mask = target->ia_sockmask.sin_addr;
826 		prefix.s_addr &= mask.s_addr;
827 	}
828 
829 	if ((target->ia_flags & IFA_ROUTE) == 0) {
830 		int fibnum;
831 
832 		fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
833 			target->ia_ifp->if_fib;
834 		rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
835 
836 		/*
837 		 * Removing address from !IFF_UP interface or
838 		 * prefix which exists on other interface (along with route).
839 		 * No entries should exist here except target addr.
840 		 * Given that, delete this entry only.
841 		 */
842 		in_scrubprefixlle(target, 0, flags);
843 		return (0);
844 	}
845 
846 	IN_IFADDR_RLOCK(&in_ifa_tracker);
847 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
848 		if (rtinitflags(ia)) {
849 			p = ia->ia_dstaddr.sin_addr;
850 
851 			if (prefix.s_addr != p.s_addr)
852 				continue;
853 		} else {
854 			p = ia->ia_addr.sin_addr;
855 			m = ia->ia_sockmask.sin_addr;
856 			p.s_addr &= m.s_addr;
857 
858 			if (prefix.s_addr != p.s_addr ||
859 			    mask.s_addr != m.s_addr)
860 				continue;
861 		}
862 
863 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
864 			continue;
865 
866 		/*
867 		 * If we got a matching prefix address, move IFA_ROUTE and
868 		 * the route itself to it.  Make sure that routing daemons
869 		 * get a heads-up.
870 		 */
871 		if ((ia->ia_flags & IFA_ROUTE) == 0) {
872 			ifa_ref(&ia->ia_ifa);
873 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
874 			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
875 			    rtinitflags(target));
876 			if (error == 0)
877 				target->ia_flags &= ~IFA_ROUTE;
878 			else
879 				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
880 					error);
881 			/* Scrub all entries IFF interface is different */
882 			in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp,
883 			    flags);
884 			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
885 			    rtinitflags(ia) | RTF_UP);
886 			if (error == 0)
887 				ia->ia_flags |= IFA_ROUTE;
888 			else
889 				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
890 					error);
891 			ifa_free(&ia->ia_ifa);
892 			return (error);
893 		}
894 	}
895 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
896 
897 	/*
898 	 * remove all L2 entries on the given prefix
899 	 */
900 	in_scrubprefixlle(target, 1, flags);
901 
902 	/*
903 	 * As no-one seem to have this prefix, we can remove the route.
904 	 */
905 	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
906 	if (error == 0)
907 		target->ia_flags &= ~IFA_ROUTE;
908 	else
909 		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
910 	return (error);
911 }
912 
913 #undef rtinitflags
914 
915 void
916 in_ifscrub_all(void)
917 {
918 	struct ifnet *ifp;
919 	struct ifaddr *ifa, *nifa;
920 	struct ifaliasreq ifr;
921 
922 	IFNET_RLOCK();
923 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
924 		/* Cannot lock here - lock recursion. */
925 		/* NET_EPOCH_ENTER(et); */
926 		CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
927 			if (ifa->ifa_addr->sa_family != AF_INET)
928 				continue;
929 
930 			/*
931 			 * This is ugly but the only way for legacy IP to
932 			 * cleanly remove addresses and everything attached.
933 			 */
934 			bzero(&ifr, sizeof(ifr));
935 			ifr.ifra_addr = *ifa->ifa_addr;
936 			if (ifa->ifa_dstaddr)
937 			ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
938 			(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
939 			    ifp, NULL);
940 		}
941 		/* NET_EPOCH_EXIT(et); */
942 		in_purgemaddrs(ifp);
943 		igmp_domifdetach(ifp);
944 	}
945 	IFNET_RUNLOCK();
946 }
947 
948 int
949 in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
950 {
951 
952 	return ((in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
953 	     /*
954 	      * Check for old-style (host 0) broadcast, but
955 	      * taking into account that RFC 3021 obsoletes it.
956 	      */
957 	    (ia->ia_subnetmask != IN_RFC3021_MASK &&
958 	    ntohl(in.s_addr) == ia->ia_subnet)) &&
959 	     /*
960 	      * Check for an all one subnetmask. These
961 	      * only exist when an interface gets a secondary
962 	      * address.
963 	      */
964 	    ia->ia_subnetmask != (u_long)0xffffffff);
965 }
966 
967 /*
968  * Return 1 if the address might be a local broadcast address.
969  */
970 int
971 in_broadcast(struct in_addr in, struct ifnet *ifp)
972 {
973 	struct epoch_tracker et;
974 	struct ifaddr *ifa;
975 	int found;
976 
977 	if (in.s_addr == INADDR_BROADCAST ||
978 	    in.s_addr == INADDR_ANY)
979 		return (1);
980 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
981 		return (0);
982 	found = 0;
983 	/*
984 	 * Look through the list of addresses for a match
985 	 * with a broadcast address.
986 	 */
987 	NET_EPOCH_ENTER(et);
988 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
989 		if (ifa->ifa_addr->sa_family == AF_INET &&
990 		    in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
991 			found = 1;
992 			break;
993 		}
994 	NET_EPOCH_EXIT(et);
995 	return (found);
996 }
997 
998 /*
999  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1000  */
1001 void
1002 in_ifdetach(struct ifnet *ifp)
1003 {
1004 	IN_MULTI_LOCK();
1005 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
1006 	in_pcbpurgeif0(&V_udbinfo, ifp);
1007 	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
1008 	in_purgemaddrs(ifp);
1009 	IN_MULTI_UNLOCK();
1010 }
1011 
1012 /*
1013  * Delete all IPv4 multicast address records, and associated link-layer
1014  * multicast address records, associated with ifp.
1015  * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1016  * XXX This should not race with ifma_protospec being set during
1017  * a new allocation, if it does, we have bigger problems.
1018  */
1019 static void
1020 in_purgemaddrs(struct ifnet *ifp)
1021 {
1022 	struct in_multi_head purgeinms;
1023 	struct in_multi		*inm;
1024 	struct ifmultiaddr	*ifma, *next;
1025 
1026 	SLIST_INIT(&purgeinms);
1027 	IN_MULTI_LIST_LOCK();
1028 
1029 	/*
1030 	 * Extract list of in_multi associated with the detaching ifp
1031 	 * which the PF_INET layer is about to release.
1032 	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
1033 	 * by code further down.
1034 	 */
1035 	IF_ADDR_WLOCK(ifp);
1036  restart:
1037 	CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
1038 		if (ifma->ifma_addr->sa_family != AF_INET ||
1039 		    ifma->ifma_protospec == NULL)
1040 			continue;
1041 		inm = (struct in_multi *)ifma->ifma_protospec;
1042 		inm_rele_locked(&purgeinms, inm);
1043 		if (__predict_false(ifma_restart)) {
1044 			ifma_restart = true;
1045 			goto restart;
1046 		}
1047 	}
1048 	IF_ADDR_WUNLOCK(ifp);
1049 
1050 	inm_release_list_deferred(&purgeinms);
1051 	igmp_ifdetach(ifp);
1052 	IN_MULTI_LIST_UNLOCK();
1053 }
1054 
1055 struct in_llentry {
1056 	struct llentry		base;
1057 };
1058 
1059 #define	IN_LLTBL_DEFAULT_HSIZE	32
1060 #define	IN_LLTBL_HASH(k, h) \
1061 	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
1062 
1063 /*
1064  * Do actual deallocation of @lle.
1065  */
1066 static void
1067 in_lltable_destroy_lle_unlocked(epoch_context_t ctx)
1068 {
1069 	struct llentry *lle;
1070 
1071 	lle = __containerof(ctx, struct llentry, lle_epoch_ctx);
1072 	LLE_LOCK_DESTROY(lle);
1073 	LLE_REQ_DESTROY(lle);
1074 	free(lle, M_LLTABLE);
1075 }
1076 
1077 /*
1078  * Called by the datapath to indicate that
1079  * the entry was used.
1080  */
1081 static void
1082 in_lltable_mark_used(struct llentry *lle)
1083 {
1084 
1085 	LLE_REQ_LOCK(lle);
1086 	lle->r_skip_req = 0;
1087 	LLE_REQ_UNLOCK(lle);
1088 }
1089 
1090 /*
1091  * Called by LLE_FREE_LOCKED when number of references
1092  * drops to zero.
1093  */
1094 static void
1095 in_lltable_destroy_lle(struct llentry *lle)
1096 {
1097 
1098 	LLE_WUNLOCK(lle);
1099 	epoch_call(net_epoch_preempt,  &lle->lle_epoch_ctx, in_lltable_destroy_lle_unlocked);
1100 }
1101 
1102 static struct llentry *
1103 in_lltable_new(struct in_addr addr4, u_int flags)
1104 {
1105 	struct in_llentry *lle;
1106 
1107 	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
1108 	if (lle == NULL)		/* NB: caller generates msg */
1109 		return NULL;
1110 
1111 	/*
1112 	 * For IPv4 this will trigger "arpresolve" to generate
1113 	 * an ARP request.
1114 	 */
1115 	lle->base.la_expire = time_uptime; /* mark expired */
1116 	lle->base.r_l3addr.addr4 = addr4;
1117 	lle->base.lle_refcnt = 1;
1118 	lle->base.lle_free = in_lltable_destroy_lle;
1119 	LLE_LOCK_INIT(&lle->base);
1120 	LLE_REQ_INIT(&lle->base);
1121 	callout_init(&lle->base.lle_timer, 1);
1122 
1123 	return (&lle->base);
1124 }
1125 
1126 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(		\
1127 	((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
1128 
1129 static int
1130 in_lltable_match_prefix(const struct sockaddr *saddr,
1131     const struct sockaddr *smask, u_int flags, struct llentry *lle)
1132 {
1133 	struct in_addr addr, mask, lle_addr;
1134 
1135 	addr = ((const struct sockaddr_in *)saddr)->sin_addr;
1136 	mask = ((const struct sockaddr_in *)smask)->sin_addr;
1137 	lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
1138 
1139 	if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
1140 		return (0);
1141 
1142 	if (lle->la_flags & LLE_IFADDR) {
1143 
1144 		/*
1145 		 * Delete LLE_IFADDR records IFF address & flag matches.
1146 		 * Note that addr is the interface address within prefix
1147 		 * being matched.
1148 		 * Note also we should handle 'ifdown' cases without removing
1149 		 * ifaddr macs.
1150 		 */
1151 		if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0)
1152 			return (1);
1153 		return (0);
1154 	}
1155 
1156 	/* flags & LLE_STATIC means deleting both dynamic and static entries */
1157 	if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
1158 		return (1);
1159 
1160 	return (0);
1161 }
1162 
1163 static void
1164 in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
1165 {
1166 	size_t pkts_dropped;
1167 
1168 	LLE_WLOCK_ASSERT(lle);
1169 	KASSERT(llt != NULL, ("lltable is NULL"));
1170 
1171 	/* Unlink entry from table if not already */
1172 	if ((lle->la_flags & LLE_LINKED) != 0) {
1173 		IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
1174 		lltable_unlink_entry(llt, lle);
1175 	}
1176 
1177 	/* Drop hold queue */
1178 	pkts_dropped = llentry_free(lle);
1179 	ARPSTAT_ADD(dropped, pkts_dropped);
1180 }
1181 
1182 static int
1183 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1184 {
1185 	struct rt_addrinfo info;
1186 	struct sockaddr_in rt_key, rt_mask;
1187 	struct sockaddr rt_gateway;
1188 	int rt_flags;
1189 
1190 	KASSERT(l3addr->sa_family == AF_INET,
1191 	    ("sin_family %d", l3addr->sa_family));
1192 
1193 	bzero(&rt_key, sizeof(rt_key));
1194 	rt_key.sin_len = sizeof(rt_key);
1195 	bzero(&rt_mask, sizeof(rt_mask));
1196 	rt_mask.sin_len = sizeof(rt_mask);
1197 	bzero(&rt_gateway, sizeof(rt_gateway));
1198 	rt_gateway.sa_len = sizeof(rt_gateway);
1199 
1200 	bzero(&info, sizeof(info));
1201 	info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
1202 	info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&rt_mask;
1203 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
1204 
1205 	if (rib_lookup_info(ifp->if_fib, l3addr, NHR_REF, 0, &info) != 0)
1206 		return (EINVAL);
1207 
1208 	rt_flags = info.rti_flags;
1209 
1210 	/*
1211 	 * If the gateway for an existing host route matches the target L3
1212 	 * address, which is a special route inserted by some implementation
1213 	 * such as MANET, and the interface is of the correct type, then
1214 	 * allow for ARP to proceed.
1215 	 */
1216 	if (rt_flags & RTF_GATEWAY) {
1217 		if (!(rt_flags & RTF_HOST) || !info.rti_ifp ||
1218 		    info.rti_ifp->if_type != IFT_ETHER ||
1219 		    (info.rti_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
1220 		    memcmp(rt_gateway.sa_data, l3addr->sa_data,
1221 		    sizeof(in_addr_t)) != 0) {
1222 			rib_free_info(&info);
1223 			return (EINVAL);
1224 		}
1225 	}
1226 	rib_free_info(&info);
1227 
1228 	/*
1229 	 * Make sure that at least the destination address is covered
1230 	 * by the route. This is for handling the case where 2 or more
1231 	 * interfaces have the same prefix. An incoming packet arrives
1232 	 * on one interface and the corresponding outgoing packet leaves
1233 	 * another interface.
1234 	 */
1235 	if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) {
1236 		const char *sa, *mask, *addr, *lim;
1237 		const struct sockaddr_in *l3sin;
1238 
1239 		mask = (const char *)&rt_mask;
1240 		/*
1241 		 * Just being extra cautious to avoid some custom
1242 		 * code getting into trouble.
1243 		 */
1244 		if ((info.rti_addrs & RTA_NETMASK) == 0)
1245 			return (EINVAL);
1246 
1247 		sa = (const char *)&rt_key;
1248 		addr = (const char *)l3addr;
1249 		l3sin = (const struct sockaddr_in *)l3addr;
1250 		lim = addr + l3sin->sin_len;
1251 
1252 		for ( ; addr < lim; sa++, mask++, addr++) {
1253 			if ((*sa ^ *addr) & *mask) {
1254 #ifdef DIAGNOSTIC
1255 				char addrbuf[INET_ADDRSTRLEN];
1256 
1257 				log(LOG_INFO, "IPv4 address: \"%s\" "
1258 				    "is not on the network\n",
1259 				    inet_ntoa_r(l3sin->sin_addr, addrbuf));
1260 #endif
1261 				return (EINVAL);
1262 			}
1263 		}
1264 	}
1265 
1266 	return (0);
1267 }
1268 
1269 static inline uint32_t
1270 in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
1271 {
1272 
1273 	return (IN_LLTBL_HASH(dst.s_addr, hsize));
1274 }
1275 
1276 static uint32_t
1277 in_lltable_hash(const struct llentry *lle, uint32_t hsize)
1278 {
1279 
1280 	return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
1281 }
1282 
1283 static void
1284 in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
1285 {
1286 	struct sockaddr_in *sin;
1287 
1288 	sin = (struct sockaddr_in *)sa;
1289 	bzero(sin, sizeof(*sin));
1290 	sin->sin_family = AF_INET;
1291 	sin->sin_len = sizeof(*sin);
1292 	sin->sin_addr = lle->r_l3addr.addr4;
1293 }
1294 
1295 static inline struct llentry *
1296 in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
1297 {
1298 	struct llentry *lle;
1299 	struct llentries *lleh;
1300 	u_int hashidx;
1301 
1302 	hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
1303 	lleh = &llt->lle_head[hashidx];
1304 	CK_LIST_FOREACH(lle, lleh, lle_next) {
1305 		if (lle->la_flags & LLE_DELETED)
1306 			continue;
1307 		if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
1308 			break;
1309 	}
1310 
1311 	return (lle);
1312 }
1313 
1314 static void
1315 in_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
1316 {
1317 
1318 	lle->la_flags |= LLE_DELETED;
1319 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
1320 #ifdef DIAGNOSTIC
1321 	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
1322 #endif
1323 	llentry_free(lle);
1324 }
1325 
1326 static struct llentry *
1327 in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1328 {
1329 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1330 	struct ifnet *ifp = llt->llt_ifp;
1331 	struct llentry *lle;
1332 	char linkhdr[LLE_MAX_LINKHDR];
1333 	size_t linkhdrsize;
1334 	int lladdr_off;
1335 
1336 	KASSERT(l3addr->sa_family == AF_INET,
1337 	    ("sin_family %d", l3addr->sa_family));
1338 
1339 	/*
1340 	 * A route that covers the given address must have
1341 	 * been installed 1st because we are doing a resolution,
1342 	 * verify this.
1343 	 */
1344 	if (!(flags & LLE_IFADDR) &&
1345 	    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1346 		return (NULL);
1347 
1348 	lle = in_lltable_new(sin->sin_addr, flags);
1349 	if (lle == NULL) {
1350 		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1351 		return (NULL);
1352 	}
1353 	lle->la_flags = flags;
1354 	if (flags & LLE_STATIC)
1355 		lle->r_flags |= RLLE_VALID;
1356 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
1357 		linkhdrsize = LLE_MAX_LINKHDR;
1358 		if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
1359 		    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
1360 			epoch_call(net_epoch_preempt,  &lle->lle_epoch_ctx, in_lltable_destroy_lle_unlocked);
1361 			return (NULL);
1362 		}
1363 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
1364 		    lladdr_off);
1365 		lle->la_flags |= LLE_STATIC;
1366 		lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
1367 	}
1368 
1369 	return (lle);
1370 }
1371 
1372 /*
1373  * Return NULL if not found or marked for deletion.
1374  * If found return lle read locked.
1375  */
1376 static struct llentry *
1377 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1378 {
1379 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1380 	struct llentry *lle;
1381 
1382 	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
1383 	KASSERT(l3addr->sa_family == AF_INET,
1384 	    ("sin_family %d", l3addr->sa_family));
1385 	KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) !=
1386 	    (LLE_UNLOCKED | LLE_EXCLUSIVE),
1387 	    ("wrong lle request flags: %#x", flags));
1388 
1389 	lle = in_lltable_find_dst(llt, sin->sin_addr);
1390 	if (lle == NULL)
1391 		return (NULL);
1392 	if (flags & LLE_UNLOCKED)
1393 		return (lle);
1394 
1395 	if (flags & LLE_EXCLUSIVE)
1396 		LLE_WLOCK(lle);
1397 	else
1398 		LLE_RLOCK(lle);
1399 
1400 	/*
1401 	 * If the afdata lock is not held, the LLE may have been unlinked while
1402 	 * we were blocked on the LLE lock.  Check for this case.
1403 	 */
1404 	if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) {
1405 		if (flags & LLE_EXCLUSIVE)
1406 			LLE_WUNLOCK(lle);
1407 		else
1408 			LLE_RUNLOCK(lle);
1409 		return (NULL);
1410 	}
1411 	return (lle);
1412 }
1413 
1414 static int
1415 in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
1416     struct sysctl_req *wr)
1417 {
1418 	struct ifnet *ifp = llt->llt_ifp;
1419 	/* XXX stack use */
1420 	struct {
1421 		struct rt_msghdr	rtm;
1422 		struct sockaddr_in	sin;
1423 		struct sockaddr_dl	sdl;
1424 	} arpc;
1425 	struct sockaddr_dl *sdl;
1426 	int error;
1427 
1428 	bzero(&arpc, sizeof(arpc));
1429 	/* skip deleted entries */
1430 	if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1431 		return (0);
1432 	/* Skip if jailed and not a valid IP of the prison. */
1433 	lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin);
1434 	if (prison_if(wr->td->td_ucred, (struct sockaddr *)&arpc.sin) != 0)
1435 		return (0);
1436 	/*
1437 	 * produce a msg made of:
1438 	 *  struct rt_msghdr;
1439 	 *  struct sockaddr_in; (IPv4)
1440 	 *  struct sockaddr_dl;
1441 	 */
1442 	arpc.rtm.rtm_msglen = sizeof(arpc);
1443 	arpc.rtm.rtm_version = RTM_VERSION;
1444 	arpc.rtm.rtm_type = RTM_GET;
1445 	arpc.rtm.rtm_flags = RTF_UP;
1446 	arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1447 
1448 	/* publish */
1449 	if (lle->la_flags & LLE_PUB)
1450 		arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1451 
1452 	sdl = &arpc.sdl;
1453 	sdl->sdl_family = AF_LINK;
1454 	sdl->sdl_len = sizeof(*sdl);
1455 	sdl->sdl_index = ifp->if_index;
1456 	sdl->sdl_type = ifp->if_type;
1457 	if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1458 		sdl->sdl_alen = ifp->if_addrlen;
1459 		bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1460 	} else {
1461 		sdl->sdl_alen = 0;
1462 		bzero(LLADDR(sdl), ifp->if_addrlen);
1463 	}
1464 
1465 	arpc.rtm.rtm_rmx.rmx_expire =
1466 	    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1467 	arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1468 	if (lle->la_flags & LLE_STATIC)
1469 		arpc.rtm.rtm_flags |= RTF_STATIC;
1470 	if (lle->la_flags & LLE_IFADDR)
1471 		arpc.rtm.rtm_flags |= RTF_PINNED;
1472 	arpc.rtm.rtm_index = ifp->if_index;
1473 	error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1474 
1475 	return (error);
1476 }
1477 
1478 static struct lltable *
1479 in_lltattach(struct ifnet *ifp)
1480 {
1481 	struct lltable *llt;
1482 
1483 	llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
1484  	llt->llt_af = AF_INET;
1485  	llt->llt_ifp = ifp;
1486 
1487 	llt->llt_lookup = in_lltable_lookup;
1488 	llt->llt_alloc_entry = in_lltable_alloc;
1489 	llt->llt_delete_entry = in_lltable_delete_entry;
1490 	llt->llt_dump_entry = in_lltable_dump_entry;
1491 	llt->llt_hash = in_lltable_hash;
1492 	llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
1493 	llt->llt_free_entry = in_lltable_free_entry;
1494 	llt->llt_match_prefix = in_lltable_match_prefix;
1495 	llt->llt_mark_used = in_lltable_mark_used;
1496  	lltable_link(llt);
1497 
1498 	return (llt);
1499 }
1500 
1501 void *
1502 in_domifattach(struct ifnet *ifp)
1503 {
1504 	struct in_ifinfo *ii;
1505 
1506 	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1507 
1508 	ii->ii_llt = in_lltattach(ifp);
1509 	ii->ii_igmp = igmp_domifattach(ifp);
1510 
1511 	return (ii);
1512 }
1513 
1514 void
1515 in_domifdetach(struct ifnet *ifp, void *aux)
1516 {
1517 	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1518 
1519 	igmp_domifdetach(ifp);
1520 	lltable_free(ii->ii_llt);
1521 	free(ii, M_IFADDR);
1522 }
1523