xref: /dragonfly/sys/net/if.c (revision 6b5c5d0d)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  * $DragonFly: src/sys/net/if.c,v 1.60 2008/01/11 11:59:40 sephe Exp $
36  */
37 
38 #include "opt_compat.h"
39 #include "opt_inet6.h"
40 #include "opt_inet.h"
41 #include "opt_polling.h"
42 
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/socketops.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
54 #include <sys/sockio.h>
55 #include <sys/syslog.h>
56 #include <sys/sysctl.h>
57 #include <sys/domain.h>
58 #include <sys/thread.h>
59 #include <sys/thread2.h>
60 #include <sys/serialize.h>
61 
62 #include <net/if.h>
63 #include <net/if_arp.h>
64 #include <net/if_dl.h>
65 #include <net/if_types.h>
66 #include <net/if_var.h>
67 #include <net/ifq_var.h>
68 #include <net/radix.h>
69 #include <net/route.h>
70 #include <net/if_clone.h>
71 #include <machine/stdarg.h>
72 
73 #if defined(INET) || defined(INET6)
74 /*XXX*/
75 #include <netinet/in.h>
76 #include <netinet/in_var.h>
77 #include <netinet/if_ether.h>
78 #ifdef INET6
79 #include <netinet6/in6_var.h>
80 #include <netinet6/in6_ifattach.h>
81 #endif
82 #endif
83 
84 #if defined(COMPAT_43)
85 #include <emulation/43bsd/43bsd_socket.h>
86 #endif /* COMPAT_43 */
87 
88 /*
89  * Support for non-ALTQ interfaces.
90  */
91 static int	ifq_classic_enqueue(struct ifaltq *, struct mbuf *,
92 				    struct altq_pktattr *);
93 static struct mbuf *
94 		ifq_classic_dequeue(struct ifaltq *, struct mbuf *, int);
95 static int	ifq_classic_request(struct ifaltq *, int, void *);
96 
97 /*
98  * System initialization
99  */
100 static void	if_attachdomain(void *);
101 static void	if_attachdomain1(struct ifnet *);
102 static int	ifconf(u_long, caddr_t, struct ucred *);
103 static void	ifinit(void *);
104 static void	if_slowtimo(void *);
105 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
106 static int	if_rtdel(struct radix_node *, void *);
107 
108 #ifdef INET6
109 /*
110  * XXX: declare here to avoid to include many inet6 related files..
111  * should be more generalized?
112  */
113 extern void	nd6_setmtu(struct ifnet *);
114 #endif
115 
116 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
117 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
118 
119 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
120 
121 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
122 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
123 
124 int			ifqmaxlen = IFQ_MAXLEN;
125 struct ifnethead	ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
126 
127 struct callout		if_slowtimo_timer;
128 
129 int			if_index = 0;
130 struct ifnet		**ifindex2ifnet = NULL;
131 
132 /*
133  * Network interface utility routines.
134  *
135  * Routines with ifa_ifwith* names take sockaddr *'s as
136  * parameters.
137  */
138 /* ARGSUSED*/
139 void
140 ifinit(void *dummy)
141 {
142 	struct ifnet *ifp;
143 
144 	callout_init(&if_slowtimo_timer);
145 
146 	crit_enter();
147 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
148 		if (ifp->if_snd.ifq_maxlen == 0) {
149 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
150 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
151 		}
152 	}
153 	crit_exit();
154 
155 	if_slowtimo(0);
156 }
157 
158 /*
159  * Attach an interface to the list of "active" interfaces.
160  *
161  * The serializer is optional.  If non-NULL access to the interface
162  * may be MPSAFE.
163  */
164 void
165 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
166 {
167 	unsigned socksize, ifasize;
168 	int namelen, masklen;
169 	struct sockaddr_dl *sdl;
170 	struct ifaddr *ifa;
171 	struct ifaltq *ifq;
172 
173 	static int if_indexlim = 8;
174 
175 	/*
176 	 * The serializer can be passed in from the device, allowing the
177 	 * same serializer to be used for both the interrupt interlock and
178 	 * the device queue.  If not specified, the netif structure will
179 	 * use an embedded serializer.
180 	 */
181 	if (serializer == NULL) {
182 		serializer = &ifp->if_default_serializer;
183 		lwkt_serialize_init(serializer);
184 	}
185 	ifp->if_serializer = serializer;
186 
187 #ifdef DEVICE_POLLING
188 	/* Device is not in polling mode by default */
189 	ifp->if_poll_cpuid = -1;
190 #endif
191 
192 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
193 	ifp->if_index = ++if_index;
194 	/*
195 	 * XXX -
196 	 * The old code would work if the interface passed a pre-existing
197 	 * chain of ifaddrs to this code.  We don't trust our callers to
198 	 * properly initialize the tailq, however, so we no longer allow
199 	 * this unlikely case.
200 	 */
201 	TAILQ_INIT(&ifp->if_addrhead);
202 	TAILQ_INIT(&ifp->if_prefixhead);
203 	LIST_INIT(&ifp->if_multiaddrs);
204 	getmicrotime(&ifp->if_lastchange);
205 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
206 		unsigned int n;
207 		struct ifnet **q;
208 
209 		if_indexlim <<= 1;
210 
211 		/* grow ifindex2ifnet */
212 		n = if_indexlim * sizeof(*q);
213 		q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
214 		if (ifindex2ifnet) {
215 			bcopy(ifindex2ifnet, q, n/2);
216 			kfree(ifindex2ifnet, M_IFADDR);
217 		}
218 		ifindex2ifnet = q;
219 	}
220 
221 	ifindex2ifnet[if_index] = ifp;
222 
223 	/*
224 	 * create a Link Level name for this device
225 	 */
226 	namelen = strlen(ifp->if_xname);
227 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
228 	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
229 	socksize = masklen + ifp->if_addrlen;
230 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
231 	if (socksize < sizeof(*sdl))
232 		socksize = sizeof(*sdl);
233 	socksize = ROUNDUP(socksize);
234 	ifasize = sizeof(struct ifaddr) + 2 * socksize;
235 	ifa = kmalloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
236 	sdl = (struct sockaddr_dl *)(ifa + 1);
237 	sdl->sdl_len = socksize;
238 	sdl->sdl_family = AF_LINK;
239 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
240 	sdl->sdl_nlen = namelen;
241 	sdl->sdl_index = ifp->if_index;
242 	sdl->sdl_type = ifp->if_type;
243 	ifp->if_lladdr = ifa;
244 	ifa->ifa_ifp = ifp;
245 	ifa->ifa_rtrequest = link_rtrequest;
246 	ifa->ifa_addr = (struct sockaddr *)sdl;
247 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
248 	ifa->ifa_netmask = (struct sockaddr *)sdl;
249 	sdl->sdl_len = masklen;
250 	while (namelen != 0)
251 		sdl->sdl_data[--namelen] = 0xff;
252 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
253 
254 	EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
255 
256 	ifq = &ifp->if_snd;
257 	ifq->altq_type = 0;
258 	ifq->altq_disc = NULL;
259 	ifq->altq_flags &= ALTQF_CANTCHANGE;
260 	ifq->altq_tbr = NULL;
261 	ifq->altq_ifp = ifp;
262 	ifq_set_classic(ifq);
263 
264 	if (!SLIST_EMPTY(&domains))
265 		if_attachdomain1(ifp);
266 
267 	/* Announce the interface. */
268 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
269 }
270 
271 static void
272 if_attachdomain(void *dummy)
273 {
274 	struct ifnet *ifp;
275 
276 	crit_enter();
277 	TAILQ_FOREACH(ifp, &ifnet, if_list)
278 		if_attachdomain1(ifp);
279 	crit_exit();
280 }
281 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
282 	if_attachdomain, NULL);
283 
284 static void
285 if_attachdomain1(struct ifnet *ifp)
286 {
287 	struct domain *dp;
288 
289 	crit_enter();
290 
291 	/* address family dependent data region */
292 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
293 	SLIST_FOREACH(dp, &domains, dom_next)
294 		if (dp->dom_ifattach)
295 			ifp->if_afdata[dp->dom_family] =
296 				(*dp->dom_ifattach)(ifp);
297 	crit_exit();
298 }
299 
300 /*
301  * Purge all addresses whose type is _not_ AF_LINK
302  */
303 void
304 if_purgeaddrs_nolink(struct ifnet *ifp)
305 {
306 	struct ifaddr *ifa, *next;
307 
308 	TAILQ_FOREACH_MUTABLE(ifa, &ifp->if_addrhead, ifa_link, next) {
309 		/* Leave link ifaddr as it is */
310 		if (ifa->ifa_addr->sa_family == AF_LINK)
311 			continue;
312 #ifdef INET
313 		/* XXX: Ugly!! ad hoc just for INET */
314 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
315 			struct ifaliasreq ifr;
316 
317 			bzero(&ifr, sizeof ifr);
318 			ifr.ifra_addr = *ifa->ifa_addr;
319 			if (ifa->ifa_dstaddr)
320 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
321 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
322 				       NULL) == 0)
323 				continue;
324 		}
325 #endif /* INET */
326 #ifdef INET6
327 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
328 			in6_purgeaddr(ifa);
329 			/* ifp_addrhead is already updated */
330 			continue;
331 		}
332 #endif /* INET6 */
333 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
334 		IFAFREE(ifa);
335 	}
336 }
337 
338 /*
339  * Detach an interface, removing it from the
340  * list of "active" interfaces.
341  */
342 void
343 if_detach(struct ifnet *ifp)
344 {
345 	struct radix_node_head	*rnh;
346 	int i;
347 	int cpu, origcpu;
348 	struct domain *dp;
349 
350 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
351 
352 	/*
353 	 * Remove routes and flush queues.
354 	 */
355 	crit_enter();
356 #ifdef DEVICE_POLLING
357 	if (ifp->if_flags & IFF_POLLING)
358 		ether_poll_deregister(ifp);
359 #endif
360 	if_down(ifp);
361 
362 	if (ifq_is_enabled(&ifp->if_snd))
363 		altq_disable(&ifp->if_snd);
364 	if (ifq_is_attached(&ifp->if_snd))
365 		altq_detach(&ifp->if_snd);
366 
367 	/*
368 	 * Clean up all addresses.
369 	 */
370 	ifp->if_lladdr = NULL;
371 
372 	if_purgeaddrs_nolink(ifp);
373 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
374 		struct ifaddr *ifa;
375 
376 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
377 		KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
378 			("non-link ifaddr is left on if_addrhead"));
379 
380 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
381 		IFAFREE(ifa);
382 		KASSERT(TAILQ_EMPTY(&ifp->if_addrhead),
383 			("there are still ifaddrs left on if_addrhead"));
384 	}
385 
386 #ifdef INET
387 	/*
388 	 * Remove all IPv4 kernel structures related to ifp.
389 	 */
390 	in_ifdetach(ifp);
391 #endif
392 
393 #ifdef INET6
394 	/*
395 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
396 	 * before removing routing entries below, since IPv6 interface direct
397 	 * routes are expected to be removed by the IPv6-specific kernel API.
398 	 * Otherwise, the kernel will detect some inconsistency and bark it.
399 	 */
400 	in6_ifdetach(ifp);
401 #endif
402 
403 	/*
404 	 * Delete all remaining routes using this interface
405 	 * Unfortuneatly the only way to do this is to slog through
406 	 * the entire routing table looking for routes which point
407 	 * to this interface...oh well...
408 	 */
409 	origcpu = mycpuid;
410 	for (cpu = 0; cpu < ncpus2; cpu++) {
411 		lwkt_migratecpu(cpu);
412 		for (i = 1; i <= AF_MAX; i++) {
413 			if ((rnh = rt_tables[mycpuid][i]) == NULL)
414 				continue;
415 			rnh->rnh_walktree(rnh, if_rtdel, ifp);
416 		}
417 	}
418 	lwkt_migratecpu(origcpu);
419 
420 	/* Announce that the interface is gone. */
421 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
422 
423 	SLIST_FOREACH(dp, &domains, dom_next)
424 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
425 			(*dp->dom_ifdetach)(ifp,
426 				ifp->if_afdata[dp->dom_family]);
427 
428 	/*
429 	 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
430 	 */
431 	ifindex2ifnet[ifp->if_index] = NULL;
432 	while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
433 		if_index--;
434 
435 	TAILQ_REMOVE(&ifnet, ifp, if_link);
436 	crit_exit();
437 }
438 
439 /*
440  * Delete Routes for a Network Interface
441  *
442  * Called for each routing entry via the rnh->rnh_walktree() call above
443  * to delete all route entries referencing a detaching network interface.
444  *
445  * Arguments:
446  *	rn	pointer to node in the routing table
447  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
448  *
449  * Returns:
450  *	0	successful
451  *	errno	failed - reason indicated
452  *
453  */
454 static int
455 if_rtdel(struct radix_node *rn, void *arg)
456 {
457 	struct rtentry	*rt = (struct rtentry *)rn;
458 	struct ifnet	*ifp = arg;
459 	int		err;
460 
461 	if (rt->rt_ifp == ifp) {
462 
463 		/*
464 		 * Protect (sorta) against walktree recursion problems
465 		 * with cloned routes
466 		 */
467 		if (!(rt->rt_flags & RTF_UP))
468 			return (0);
469 
470 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
471 				rt_mask(rt), rt->rt_flags,
472 				(struct rtentry **) NULL);
473 		if (err) {
474 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
475 		}
476 	}
477 
478 	return (0);
479 }
480 
481 /*
482  * Locate an interface based on a complete address.
483  */
484 struct ifaddr *
485 ifa_ifwithaddr(struct sockaddr *addr)
486 {
487 	struct ifnet *ifp;
488 	struct ifaddr *ifa;
489 
490 	TAILQ_FOREACH(ifp, &ifnet, if_link)
491 	    TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
492 		if (ifa->ifa_addr->sa_family != addr->sa_family)
493 			continue;
494 		if (sa_equal(addr, ifa->ifa_addr))
495 			return (ifa);
496 		if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr &&
497 		    /* IPv6 doesn't have broadcast */
498 		    ifa->ifa_broadaddr->sa_len != 0 &&
499 		    sa_equal(ifa->ifa_broadaddr, addr))
500 			return (ifa);
501 	}
502 	return ((struct ifaddr *)NULL);
503 }
504 /*
505  * Locate the point to point interface with a given destination address.
506  */
507 struct ifaddr *
508 ifa_ifwithdstaddr(struct sockaddr *addr)
509 {
510 	struct ifnet *ifp;
511 	struct ifaddr *ifa;
512 
513 	TAILQ_FOREACH(ifp, &ifnet, if_link)
514 	    if (ifp->if_flags & IFF_POINTOPOINT)
515 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
516 			if (ifa->ifa_addr->sa_family != addr->sa_family)
517 				continue;
518 			if (ifa->ifa_dstaddr &&
519 			    sa_equal(addr, ifa->ifa_dstaddr))
520 				return (ifa);
521 	}
522 	return ((struct ifaddr *)NULL);
523 }
524 
525 /*
526  * Find an interface on a specific network.  If many, choice
527  * is most specific found.
528  */
529 struct ifaddr *
530 ifa_ifwithnet(struct sockaddr *addr)
531 {
532 	struct ifnet *ifp;
533 	struct ifaddr *ifa;
534 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
535 	u_int af = addr->sa_family;
536 	char *addr_data = addr->sa_data, *cplim;
537 
538 	/*
539 	 * AF_LINK addresses can be looked up directly by their index number,
540 	 * so do that if we can.
541 	 */
542 	if (af == AF_LINK) {
543 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
544 
545 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
546 		return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
547 	}
548 
549 	/*
550 	 * Scan though each interface, looking for ones that have
551 	 * addresses in this address family.
552 	 */
553 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
554 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
555 			char *cp, *cp2, *cp3;
556 
557 			if (ifa->ifa_addr->sa_family != af)
558 next:				continue;
559 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
560 				/*
561 				 * This is a bit broken as it doesn't
562 				 * take into account that the remote end may
563 				 * be a single node in the network we are
564 				 * looking for.
565 				 * The trouble is that we don't know the
566 				 * netmask for the remote end.
567 				 */
568 				if (ifa->ifa_dstaddr != NULL &&
569 				    sa_equal(addr, ifa->ifa_dstaddr))
570 					return (ifa);
571 			} else {
572 				/*
573 				 * if we have a special address handler,
574 				 * then use it instead of the generic one.
575 				 */
576 				if (ifa->ifa_claim_addr) {
577 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
578 						return (ifa);
579 					} else {
580 						continue;
581 					}
582 				}
583 
584 				/*
585 				 * Scan all the bits in the ifa's address.
586 				 * If a bit dissagrees with what we are
587 				 * looking for, mask it with the netmask
588 				 * to see if it really matters.
589 				 * (A byte at a time)
590 				 */
591 				if (ifa->ifa_netmask == 0)
592 					continue;
593 				cp = addr_data;
594 				cp2 = ifa->ifa_addr->sa_data;
595 				cp3 = ifa->ifa_netmask->sa_data;
596 				cplim = ifa->ifa_netmask->sa_len +
597 					(char *)ifa->ifa_netmask;
598 				while (cp3 < cplim)
599 					if ((*cp++ ^ *cp2++) & *cp3++)
600 						goto next; /* next address! */
601 				/*
602 				 * If the netmask of what we just found
603 				 * is more specific than what we had before
604 				 * (if we had one) then remember the new one
605 				 * before continuing to search
606 				 * for an even better one.
607 				 */
608 				if (ifa_maybe == 0 ||
609 				    rn_refines((char *)ifa->ifa_netmask,
610 					       (char *)ifa_maybe->ifa_netmask))
611 					ifa_maybe = ifa;
612 			}
613 		}
614 	}
615 	return (ifa_maybe);
616 }
617 
618 /*
619  * Find an interface address specific to an interface best matching
620  * a given address.
621  */
622 struct ifaddr *
623 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
624 {
625 	struct ifaddr *ifa;
626 	char *cp, *cp2, *cp3;
627 	char *cplim;
628 	struct ifaddr *ifa_maybe = 0;
629 	u_int af = addr->sa_family;
630 
631 	if (af >= AF_MAX)
632 		return (0);
633 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
634 		if (ifa->ifa_addr->sa_family != af)
635 			continue;
636 		if (ifa_maybe == 0)
637 			ifa_maybe = ifa;
638 		if (ifa->ifa_netmask == NULL) {
639 			if (sa_equal(addr, ifa->ifa_addr) ||
640 			    (ifa->ifa_dstaddr != NULL &&
641 			     sa_equal(addr, ifa->ifa_dstaddr)))
642 				return (ifa);
643 			continue;
644 		}
645 		if (ifp->if_flags & IFF_POINTOPOINT) {
646 			if (sa_equal(addr, ifa->ifa_dstaddr))
647 				return (ifa);
648 		} else {
649 			cp = addr->sa_data;
650 			cp2 = ifa->ifa_addr->sa_data;
651 			cp3 = ifa->ifa_netmask->sa_data;
652 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
653 			for (; cp3 < cplim; cp3++)
654 				if ((*cp++ ^ *cp2++) & *cp3)
655 					break;
656 			if (cp3 == cplim)
657 				return (ifa);
658 		}
659 	}
660 	return (ifa_maybe);
661 }
662 
663 /*
664  * Default action when installing a route with a Link Level gateway.
665  * Lookup an appropriate real ifa to point to.
666  * This should be moved to /sys/net/link.c eventually.
667  */
668 static void
669 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
670 {
671 	struct ifaddr *ifa;
672 	struct sockaddr *dst;
673 	struct ifnet *ifp;
674 
675 	if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
676 	    (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
677 		return;
678 	ifa = ifaof_ifpforaddr(dst, ifp);
679 	if (ifa != NULL) {
680 		IFAFREE(rt->rt_ifa);
681 		IFAREF(ifa);
682 		rt->rt_ifa = ifa;
683 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
684 			ifa->ifa_rtrequest(cmd, rt, info);
685 	}
686 }
687 
688 /*
689  * Mark an interface down and notify protocols of
690  * the transition.
691  * NOTE: must be called at splnet or eqivalent.
692  */
693 void
694 if_unroute(struct ifnet *ifp, int flag, int fam)
695 {
696 	struct ifaddr *ifa;
697 
698 	ifp->if_flags &= ~flag;
699 	getmicrotime(&ifp->if_lastchange);
700 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
701 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
702 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
703 	ifq_purge(&ifp->if_snd);
704 	rt_ifmsg(ifp);
705 }
706 
707 /*
708  * Mark an interface up and notify protocols of
709  * the transition.
710  * NOTE: must be called at splnet or eqivalent.
711  */
712 void
713 if_route(struct ifnet *ifp, int flag, int fam)
714 {
715 	struct ifaddr *ifa;
716 
717 	ifp->if_flags |= flag;
718 	getmicrotime(&ifp->if_lastchange);
719 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
720 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
721 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
722 	rt_ifmsg(ifp);
723 #ifdef INET6
724 	in6_if_up(ifp);
725 #endif
726 }
727 
728 /*
729  * Mark an interface down and notify protocols of the transition.  An
730  * interface going down is also considered to be a synchronizing event.
731  * We must ensure that all packet processing related to the interface
732  * has completed before we return so e.g. the caller can free the ifnet
733  * structure that the mbufs may be referencing.
734  *
735  * NOTE: must be called at splnet or eqivalent.
736  */
737 void
738 if_down(struct ifnet *ifp)
739 {
740 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
741 	netmsg_service_sync();
742 }
743 
744 /*
745  * Mark an interface up and notify protocols of
746  * the transition.
747  * NOTE: must be called at splnet or eqivalent.
748  */
749 void
750 if_up(struct ifnet *ifp)
751 {
752 
753 	if_route(ifp, IFF_UP, AF_UNSPEC);
754 }
755 
756 /*
757  * Process a link state change.
758  * NOTE: must be called at splsoftnet or equivalent.
759  */
760 void
761 if_link_state_change(struct ifnet *ifp)
762 {
763 	rt_ifmsg(ifp);
764 }
765 
766 /*
767  * Handle interface watchdog timer routines.  Called
768  * from softclock, we decrement timers (if set) and
769  * call the appropriate interface routine on expiration.
770  */
771 static void
772 if_slowtimo(void *arg)
773 {
774 	struct ifnet *ifp;
775 
776 	crit_enter();
777 
778 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
779 		if (ifp->if_timer == 0 || --ifp->if_timer)
780 			continue;
781 		if (ifp->if_watchdog) {
782 			if (lwkt_serialize_try(ifp->if_serializer)) {
783 				(*ifp->if_watchdog)(ifp);
784 				lwkt_serialize_exit(ifp->if_serializer);
785 			} else {
786 				/* try again next timeout */
787 				++ifp->if_timer;
788 			}
789 		}
790 	}
791 
792 	crit_exit();
793 
794 	callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
795 }
796 
797 /*
798  * Map interface name to
799  * interface structure pointer.
800  */
801 struct ifnet *
802 ifunit(const char *name)
803 {
804 	struct ifnet *ifp;
805 
806 	/*
807 	 * Search all the interfaces for this name/number
808 	 */
809 
810 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
811 		if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
812 			break;
813 	}
814 	return (ifp);
815 }
816 
817 
818 /*
819  * Map interface name in a sockaddr_dl to
820  * interface structure pointer.
821  */
822 struct ifnet *
823 if_withname(struct sockaddr *sa)
824 {
825 	char ifname[IFNAMSIZ+1];
826 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
827 
828 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
829 	     (sdl->sdl_nlen > IFNAMSIZ) )
830 		return NULL;
831 
832 	/*
833 	 * ifunit wants a null-terminated name.  It may not be null-terminated
834 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
835 	 * and there might not be room to put the trailing null anyway, so we
836 	 * make a local copy that we know we can null terminate safely.
837 	 */
838 
839 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
840 	ifname[sdl->sdl_nlen] = '\0';
841 	return ifunit(ifname);
842 }
843 
844 
845 /*
846  * Interface ioctls.
847  */
848 int
849 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
850 {
851 	struct ifnet *ifp;
852 	struct ifreq *ifr;
853 	struct ifstat *ifs;
854 	int error;
855 	short oif_flags;
856 	int new_flags;
857 	size_t namelen, onamelen;
858 	char new_name[IFNAMSIZ];
859 	struct ifaddr *ifa;
860 	struct sockaddr_dl *sdl;
861 
862 	switch (cmd) {
863 
864 	case SIOCGIFCONF:
865 	case OSIOCGIFCONF:
866 		return (ifconf(cmd, data, cred));
867 	}
868 	ifr = (struct ifreq *)data;
869 
870 	switch (cmd) {
871 	case SIOCIFCREATE:
872 	case SIOCIFDESTROY:
873 		if ((error = suser_cred(cred, 0)) != 0)
874 			return (error);
875 		return ((cmd == SIOCIFCREATE) ?
876 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
877 			if_clone_destroy(ifr->ifr_name));
878 
879 	case SIOCIFGCLONERS:
880 		return (if_clone_list((struct if_clonereq *)data));
881 	}
882 
883 	ifp = ifunit(ifr->ifr_name);
884 	if (ifp == 0)
885 		return (ENXIO);
886 	switch (cmd) {
887 
888 	case SIOCGIFFLAGS:
889 		ifr->ifr_flags = ifp->if_flags;
890 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
891 		break;
892 
893 	case SIOCGIFCAP:
894 		ifr->ifr_reqcap = ifp->if_capabilities;
895 		ifr->ifr_curcap = ifp->if_capenable;
896 		break;
897 
898 	case SIOCGIFMETRIC:
899 		ifr->ifr_metric = ifp->if_metric;
900 		break;
901 
902 	case SIOCGIFMTU:
903 		ifr->ifr_mtu = ifp->if_mtu;
904 		break;
905 
906 	case SIOCGIFPHYS:
907 		ifr->ifr_phys = ifp->if_physical;
908 		break;
909 
910 	case SIOCGIFPOLLCPU:
911 #ifdef DEVICE_POLLING
912 		ifr->ifr_pollcpu = ifp->if_poll_cpuid;
913 #else
914 		ifr->ifr_pollcpu = -1;
915 #endif
916 		break;
917 
918 	case SIOCSIFPOLLCPU:
919 #ifdef DEVICE_POLLING
920 		if ((ifp->if_flags & IFF_POLLING) == 0)
921 			ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
922 #endif
923 		break;
924 
925 	case SIOCSIFFLAGS:
926 		error = suser_cred(cred, 0);
927 		if (error)
928 			return (error);
929 		new_flags = (ifr->ifr_flags & 0xffff) |
930 		    (ifr->ifr_flagshigh << 16);
931 		if (ifp->if_flags & IFF_SMART) {
932 			/* Smart drivers twiddle their own routes */
933 		} else if (ifp->if_flags & IFF_UP &&
934 		    (new_flags & IFF_UP) == 0) {
935 			crit_enter();
936 			if_down(ifp);
937 			crit_exit();
938 		} else if (new_flags & IFF_UP &&
939 		    (ifp->if_flags & IFF_UP) == 0) {
940 			crit_enter();
941 			if_up(ifp);
942 			crit_exit();
943 		}
944 
945 #ifdef DEVICE_POLLING
946 		if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
947 			if (new_flags & IFF_POLLING) {
948 				ether_poll_register(ifp);
949 			} else {
950 				ether_poll_deregister(ifp);
951 			}
952 		}
953 #endif
954 
955 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
956 			(new_flags &~ IFF_CANTCHANGE);
957 		if (new_flags & IFF_PPROMISC) {
958 			/* Permanently promiscuous mode requested */
959 			ifp->if_flags |= IFF_PROMISC;
960 		} else if (ifp->if_pcount == 0) {
961 			ifp->if_flags &= ~IFF_PROMISC;
962 		}
963 		if (ifp->if_ioctl) {
964 			lwkt_serialize_enter(ifp->if_serializer);
965 			ifp->if_ioctl(ifp, cmd, data, cred);
966 			lwkt_serialize_exit(ifp->if_serializer);
967 		}
968 		getmicrotime(&ifp->if_lastchange);
969 		break;
970 
971 	case SIOCSIFCAP:
972 		error = suser_cred(cred, 0);
973 		if (error)
974 			return (error);
975 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
976 			return (EINVAL);
977 		lwkt_serialize_enter(ifp->if_serializer);
978 		ifp->if_ioctl(ifp, cmd, data, cred);
979 		lwkt_serialize_exit(ifp->if_serializer);
980 		break;
981 
982 	case SIOCSIFNAME:
983 		error = suser_cred(cred, 0);
984 		if (error != 0)
985 			return (error);
986 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
987 		if (error != 0)
988 			return (error);
989 		if (new_name[0] == '\0')
990 			return (EINVAL);
991 		if (ifunit(new_name) != NULL)
992 			return (EEXIST);
993 
994 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
995 
996 		/* Announce the departure of the interface. */
997 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
998 
999 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1000 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
1001 		/* XXX IFA_LOCK(ifa); */
1002 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1003 		namelen = strlen(new_name);
1004 		onamelen = sdl->sdl_nlen;
1005 		/*
1006 		 * Move the address if needed.  This is safe because we
1007 		 * allocate space for a name of length IFNAMSIZ when we
1008 		 * create this in if_attach().
1009 		 */
1010 		if (namelen != onamelen) {
1011 			bcopy(sdl->sdl_data + onamelen,
1012 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1013 		}
1014 		bcopy(new_name, sdl->sdl_data, namelen);
1015 		sdl->sdl_nlen = namelen;
1016 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1017 		bzero(sdl->sdl_data, onamelen);
1018 		while (namelen != 0)
1019 			sdl->sdl_data[--namelen] = 0xff;
1020 		/* XXX IFA_UNLOCK(ifa) */
1021 
1022 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1023 
1024 		/* Announce the return of the interface. */
1025 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1026 		break;
1027 
1028 	case SIOCSIFMETRIC:
1029 		error = suser_cred(cred, 0);
1030 		if (error)
1031 			return (error);
1032 		ifp->if_metric = ifr->ifr_metric;
1033 		getmicrotime(&ifp->if_lastchange);
1034 		break;
1035 
1036 	case SIOCSIFPHYS:
1037 		error = suser_cred(cred, 0);
1038 		if (error)
1039 			return error;
1040 		if (!ifp->if_ioctl)
1041 		        return EOPNOTSUPP;
1042 		lwkt_serialize_enter(ifp->if_serializer);
1043 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1044 		lwkt_serialize_exit(ifp->if_serializer);
1045 		if (error == 0)
1046 			getmicrotime(&ifp->if_lastchange);
1047 		return (error);
1048 
1049 	case SIOCSIFMTU:
1050 	{
1051 		u_long oldmtu = ifp->if_mtu;
1052 
1053 		error = suser_cred(cred, 0);
1054 		if (error)
1055 			return (error);
1056 		if (ifp->if_ioctl == NULL)
1057 			return (EOPNOTSUPP);
1058 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1059 			return (EINVAL);
1060 		lwkt_serialize_enter(ifp->if_serializer);
1061 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1062 		lwkt_serialize_exit(ifp->if_serializer);
1063 		if (error == 0) {
1064 			getmicrotime(&ifp->if_lastchange);
1065 			rt_ifmsg(ifp);
1066 		}
1067 		/*
1068 		 * If the link MTU changed, do network layer specific procedure.
1069 		 */
1070 		if (ifp->if_mtu != oldmtu) {
1071 #ifdef INET6
1072 			nd6_setmtu(ifp);
1073 #endif
1074 		}
1075 		return (error);
1076 	}
1077 
1078 	case SIOCADDMULTI:
1079 	case SIOCDELMULTI:
1080 		error = suser_cred(cred, 0);
1081 		if (error)
1082 			return (error);
1083 
1084 		/* Don't allow group membership on non-multicast interfaces. */
1085 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1086 			return EOPNOTSUPP;
1087 
1088 		/* Don't let users screw up protocols' entries. */
1089 		if (ifr->ifr_addr.sa_family != AF_LINK)
1090 			return EINVAL;
1091 
1092 		if (cmd == SIOCADDMULTI) {
1093 			struct ifmultiaddr *ifma;
1094 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1095 		} else {
1096 			error = if_delmulti(ifp, &ifr->ifr_addr);
1097 		}
1098 		if (error == 0)
1099 			getmicrotime(&ifp->if_lastchange);
1100 		return error;
1101 
1102 	case SIOCSIFPHYADDR:
1103 	case SIOCDIFPHYADDR:
1104 #ifdef INET6
1105 	case SIOCSIFPHYADDR_IN6:
1106 #endif
1107 	case SIOCSLIFPHYADDR:
1108         case SIOCSIFMEDIA:
1109 	case SIOCSIFGENERIC:
1110 		error = suser_cred(cred, 0);
1111 		if (error)
1112 			return (error);
1113 		if (ifp->if_ioctl == 0)
1114 			return (EOPNOTSUPP);
1115 		lwkt_serialize_enter(ifp->if_serializer);
1116 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1117 		lwkt_serialize_exit(ifp->if_serializer);
1118 		if (error == 0)
1119 			getmicrotime(&ifp->if_lastchange);
1120 		return error;
1121 
1122 	case SIOCGIFSTATUS:
1123 		ifs = (struct ifstat *)data;
1124 		ifs->ascii[0] = '\0';
1125 
1126 	case SIOCGIFPSRCADDR:
1127 	case SIOCGIFPDSTADDR:
1128 	case SIOCGLIFPHYADDR:
1129 	case SIOCGIFMEDIA:
1130 	case SIOCGIFGENERIC:
1131 		if (ifp->if_ioctl == NULL)
1132 			return (EOPNOTSUPP);
1133 		lwkt_serialize_enter(ifp->if_serializer);
1134 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1135 		lwkt_serialize_exit(ifp->if_serializer);
1136 		return (error);
1137 
1138 	case SIOCSIFLLADDR:
1139 		error = suser_cred(cred, 0);
1140 		if (error)
1141 			return (error);
1142 		return if_setlladdr(ifp,
1143 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1144 
1145 	default:
1146 		oif_flags = ifp->if_flags;
1147 		if (so->so_proto == 0)
1148 			return (EOPNOTSUPP);
1149 #ifndef COMPAT_43
1150 		error = so_pru_control(so, cmd, data, ifp);
1151 #else
1152 	    {
1153 		int ocmd = cmd;
1154 
1155 		switch (cmd) {
1156 
1157 		case SIOCSIFDSTADDR:
1158 		case SIOCSIFADDR:
1159 		case SIOCSIFBRDADDR:
1160 		case SIOCSIFNETMASK:
1161 #if BYTE_ORDER != BIG_ENDIAN
1162 			if (ifr->ifr_addr.sa_family == 0 &&
1163 			    ifr->ifr_addr.sa_len < 16) {
1164 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1165 				ifr->ifr_addr.sa_len = 16;
1166 			}
1167 #else
1168 			if (ifr->ifr_addr.sa_len == 0)
1169 				ifr->ifr_addr.sa_len = 16;
1170 #endif
1171 			break;
1172 
1173 		case OSIOCGIFADDR:
1174 			cmd = SIOCGIFADDR;
1175 			break;
1176 
1177 		case OSIOCGIFDSTADDR:
1178 			cmd = SIOCGIFDSTADDR;
1179 			break;
1180 
1181 		case OSIOCGIFBRDADDR:
1182 			cmd = SIOCGIFBRDADDR;
1183 			break;
1184 
1185 		case OSIOCGIFNETMASK:
1186 			cmd = SIOCGIFNETMASK;
1187 		}
1188 		error =  so_pru_control(so, cmd, data, ifp);
1189 		switch (ocmd) {
1190 
1191 		case OSIOCGIFADDR:
1192 		case OSIOCGIFDSTADDR:
1193 		case OSIOCGIFBRDADDR:
1194 		case OSIOCGIFNETMASK:
1195 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1196 
1197 		}
1198 	    }
1199 #endif /* COMPAT_43 */
1200 
1201 		if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1202 #ifdef INET6
1203 			DELAY(100);/* XXX: temporary workaround for fxp issue*/
1204 			if (ifp->if_flags & IFF_UP) {
1205 				crit_enter();
1206 				in6_if_up(ifp);
1207 				crit_exit();
1208 			}
1209 #endif
1210 		}
1211 		return (error);
1212 
1213 	}
1214 	return (0);
1215 }
1216 
1217 /*
1218  * Set/clear promiscuous mode on interface ifp based on the truth value
1219  * of pswitch.  The calls are reference counted so that only the first
1220  * "on" request actually has an effect, as does the final "off" request.
1221  * Results are undefined if the "off" and "on" requests are not matched.
1222  */
1223 int
1224 ifpromisc(struct ifnet *ifp, int pswitch)
1225 {
1226 	struct ifreq ifr;
1227 	int error;
1228 	int oldflags;
1229 
1230 	oldflags = ifp->if_flags;
1231 	if (ifp->if_flags & IFF_PPROMISC) {
1232 		/* Do nothing if device is in permanently promiscuous mode */
1233 		ifp->if_pcount += pswitch ? 1 : -1;
1234 		return (0);
1235 	}
1236 	if (pswitch) {
1237 		/*
1238 		 * If the device is not configured up, we cannot put it in
1239 		 * promiscuous mode.
1240 		 */
1241 		if ((ifp->if_flags & IFF_UP) == 0)
1242 			return (ENETDOWN);
1243 		if (ifp->if_pcount++ != 0)
1244 			return (0);
1245 		ifp->if_flags |= IFF_PROMISC;
1246 		log(LOG_INFO, "%s: promiscuous mode enabled\n",
1247 		    ifp->if_xname);
1248 	} else {
1249 		if (--ifp->if_pcount > 0)
1250 			return (0);
1251 		ifp->if_flags &= ~IFF_PROMISC;
1252 		log(LOG_INFO, "%s: promiscuous mode disabled\n",
1253 		    ifp->if_xname);
1254 	}
1255 	ifr.ifr_flags = ifp->if_flags;
1256 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1257 	lwkt_serialize_enter(ifp->if_serializer);
1258 	error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1259 				 (struct ucred *)NULL);
1260 	lwkt_serialize_exit(ifp->if_serializer);
1261 	if (error == 0)
1262 		rt_ifmsg(ifp);
1263 	else
1264 		ifp->if_flags = oldflags;
1265 	return error;
1266 }
1267 
1268 /*
1269  * Return interface configuration
1270  * of system.  List may be used
1271  * in later ioctl's (above) to get
1272  * other information.
1273  */
1274 static int
1275 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1276 {
1277 	struct ifconf *ifc = (struct ifconf *)data;
1278 	struct ifnet *ifp;
1279 	struct ifaddr *ifa;
1280 	struct sockaddr *sa;
1281 	struct ifreq ifr, *ifrp;
1282 	int space = ifc->ifc_len, error = 0;
1283 
1284 	ifrp = ifc->ifc_req;
1285 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1286 		int addrs;
1287 
1288 		if (space <= sizeof ifr)
1289 			break;
1290 
1291 		/*
1292 		 * Zero the stack declared structure first to prevent
1293 		 * memory disclosure.
1294 		 */
1295 		bzero(&ifr, sizeof(ifr));
1296 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1297 		    >= sizeof(ifr.ifr_name)) {
1298 			error = ENAMETOOLONG;
1299 			break;
1300 		}
1301 
1302 		addrs = 0;
1303 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1304 			if (space <= sizeof ifr)
1305 				break;
1306 			sa = ifa->ifa_addr;
1307 			if (cred->cr_prison &&
1308 			    prison_if(cred, sa))
1309 				continue;
1310 			addrs++;
1311 #ifdef COMPAT_43
1312 			if (cmd == OSIOCGIFCONF) {
1313 				struct osockaddr *osa =
1314 					 (struct osockaddr *)&ifr.ifr_addr;
1315 				ifr.ifr_addr = *sa;
1316 				osa->sa_family = sa->sa_family;
1317 				error = copyout(&ifr, ifrp, sizeof ifr);
1318 				ifrp++;
1319 			} else
1320 #endif
1321 			if (sa->sa_len <= sizeof(*sa)) {
1322 				ifr.ifr_addr = *sa;
1323 				error = copyout(&ifr, ifrp, sizeof ifr);
1324 				ifrp++;
1325 			} else {
1326 				if (space < (sizeof ifr) + sa->sa_len -
1327 					    sizeof(*sa))
1328 					break;
1329 				space -= sa->sa_len - sizeof(*sa);
1330 				error = copyout(&ifr, ifrp,
1331 						sizeof ifr.ifr_name);
1332 				if (error == 0)
1333 					error = copyout(sa, &ifrp->ifr_addr,
1334 							sa->sa_len);
1335 				ifrp = (struct ifreq *)
1336 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1337 			}
1338 			if (error)
1339 				break;
1340 			space -= sizeof ifr;
1341 		}
1342 		if (error)
1343 			break;
1344 		if (!addrs) {
1345 			bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1346 			error = copyout(&ifr, ifrp, sizeof ifr);
1347 			if (error)
1348 				break;
1349 			space -= sizeof ifr;
1350 			ifrp++;
1351 		}
1352 	}
1353 	ifc->ifc_len -= space;
1354 	return (error);
1355 }
1356 
1357 /*
1358  * Just like if_promisc(), but for all-multicast-reception mode.
1359  */
1360 int
1361 if_allmulti(struct ifnet *ifp, int onswitch)
1362 {
1363 	int error = 0;
1364 	struct ifreq ifr;
1365 
1366 	crit_enter();
1367 
1368 	if (onswitch) {
1369 		if (ifp->if_amcount++ == 0) {
1370 			ifp->if_flags |= IFF_ALLMULTI;
1371 			ifr.ifr_flags = ifp->if_flags;
1372 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1373 			lwkt_serialize_enter(ifp->if_serializer);
1374 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1375 					      (struct ucred *)NULL);
1376 			lwkt_serialize_exit(ifp->if_serializer);
1377 		}
1378 	} else {
1379 		if (ifp->if_amcount > 1) {
1380 			ifp->if_amcount--;
1381 		} else {
1382 			ifp->if_amcount = 0;
1383 			ifp->if_flags &= ~IFF_ALLMULTI;
1384 			ifr.ifr_flags = ifp->if_flags;
1385 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1386 			lwkt_serialize_enter(ifp->if_serializer);
1387 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1388 					      (struct ucred *)NULL);
1389 			lwkt_serialize_exit(ifp->if_serializer);
1390 		}
1391 	}
1392 
1393 	crit_exit();
1394 
1395 	if (error == 0)
1396 		rt_ifmsg(ifp);
1397 	return error;
1398 }
1399 
1400 /*
1401  * Add a multicast listenership to the interface in question.
1402  * The link layer provides a routine which converts
1403  */
1404 int
1405 if_addmulti(
1406 	struct ifnet *ifp,	/* interface to manipulate */
1407 	struct sockaddr *sa,	/* address to add */
1408 	struct ifmultiaddr **retifma)
1409 {
1410 	struct sockaddr *llsa, *dupsa;
1411 	int error;
1412 	struct ifmultiaddr *ifma;
1413 
1414 	/*
1415 	 * If the matching multicast address already exists
1416 	 * then don't add a new one, just add a reference
1417 	 */
1418 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1419 		if (sa_equal(sa, ifma->ifma_addr)) {
1420 			ifma->ifma_refcount++;
1421 			if (retifma)
1422 				*retifma = ifma;
1423 			return 0;
1424 		}
1425 	}
1426 
1427 	/*
1428 	 * Give the link layer a chance to accept/reject it, and also
1429 	 * find out which AF_LINK address this maps to, if it isn't one
1430 	 * already.
1431 	 */
1432 	if (ifp->if_resolvemulti) {
1433 		lwkt_serialize_enter(ifp->if_serializer);
1434 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1435 		lwkt_serialize_exit(ifp->if_serializer);
1436 		if (error)
1437 			return error;
1438 	} else {
1439 		llsa = 0;
1440 	}
1441 
1442 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1443 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1444 	bcopy(sa, dupsa, sa->sa_len);
1445 
1446 	ifma->ifma_addr = dupsa;
1447 	ifma->ifma_lladdr = llsa;
1448 	ifma->ifma_ifp = ifp;
1449 	ifma->ifma_refcount = 1;
1450 	ifma->ifma_protospec = 0;
1451 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1452 
1453 	/*
1454 	 * Some network interfaces can scan the address list at
1455 	 * interrupt time; lock them out.
1456 	 */
1457 	crit_enter();
1458 	LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1459 	crit_exit();
1460 	*retifma = ifma;
1461 
1462 	if (llsa != 0) {
1463 		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1464 			if (sa_equal(ifma->ifma_addr, llsa))
1465 				break;
1466 		}
1467 		if (ifma) {
1468 			ifma->ifma_refcount++;
1469 		} else {
1470 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1471 			       M_IFMADDR, M_WAITOK);
1472 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1473 			       M_IFMADDR, M_WAITOK);
1474 			bcopy(llsa, dupsa, llsa->sa_len);
1475 			ifma->ifma_addr = dupsa;
1476 			ifma->ifma_ifp = ifp;
1477 			ifma->ifma_refcount = 1;
1478 			crit_enter();
1479 			LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1480 			crit_exit();
1481 		}
1482 	}
1483 	/*
1484 	 * We are certain we have added something, so call down to the
1485 	 * interface to let them know about it.
1486 	 */
1487 	crit_enter();
1488 	lwkt_serialize_enter(ifp->if_serializer);
1489 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0, (struct ucred *)NULL);
1490 	lwkt_serialize_exit(ifp->if_serializer);
1491 	crit_exit();
1492 
1493 	return 0;
1494 }
1495 
1496 /*
1497  * Remove a reference to a multicast address on this interface.  Yell
1498  * if the request does not match an existing membership.
1499  */
1500 int
1501 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1502 {
1503 	struct ifmultiaddr *ifma;
1504 
1505 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1506 		if (sa_equal(sa, ifma->ifma_addr))
1507 			break;
1508 	if (ifma == 0)
1509 		return ENOENT;
1510 
1511 	if (ifma->ifma_refcount > 1) {
1512 		ifma->ifma_refcount--;
1513 		return 0;
1514 	}
1515 
1516 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1517 	sa = ifma->ifma_lladdr;
1518 	crit_enter();
1519 	LIST_REMOVE(ifma, ifma_link);
1520 	/*
1521 	 * Make sure the interface driver is notified
1522 	 * in the case of a link layer mcast group being left.
1523 	 */
1524 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0) {
1525 		lwkt_serialize_enter(ifp->if_serializer);
1526 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0, (struct ucred *)NULL);
1527 		lwkt_serialize_exit(ifp->if_serializer);
1528 	}
1529 	crit_exit();
1530 	kfree(ifma->ifma_addr, M_IFMADDR);
1531 	kfree(ifma, M_IFMADDR);
1532 	if (sa == 0)
1533 		return 0;
1534 
1535 	/*
1536 	 * Now look for the link-layer address which corresponds to
1537 	 * this network address.  It had been squirreled away in
1538 	 * ifma->ifma_lladdr for this purpose (so we don't have
1539 	 * to call ifp->if_resolvemulti() again), and we saved that
1540 	 * value in sa above.  If some nasty deleted the
1541 	 * link-layer address out from underneath us, we can deal because
1542 	 * the address we stored was is not the same as the one which was
1543 	 * in the record for the link-layer address.  (So we don't complain
1544 	 * in that case.)
1545 	 */
1546 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1547 		if (sa_equal(sa, ifma->ifma_addr))
1548 			break;
1549 	if (ifma == 0)
1550 		return 0;
1551 
1552 	if (ifma->ifma_refcount > 1) {
1553 		ifma->ifma_refcount--;
1554 		return 0;
1555 	}
1556 
1557 	crit_enter();
1558 	lwkt_serialize_enter(ifp->if_serializer);
1559 	LIST_REMOVE(ifma, ifma_link);
1560 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0, (struct ucred *)NULL);
1561 	lwkt_serialize_exit(ifp->if_serializer);
1562 	crit_exit();
1563 	kfree(ifma->ifma_addr, M_IFMADDR);
1564 	kfree(sa, M_IFMADDR);
1565 	kfree(ifma, M_IFMADDR);
1566 
1567 	return 0;
1568 }
1569 
1570 /*
1571  * Set the link layer address on an interface.
1572  *
1573  * At this time we only support certain types of interfaces,
1574  * and we don't allow the length of the address to change.
1575  */
1576 int
1577 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1578 {
1579 	struct sockaddr_dl *sdl;
1580 	struct ifaddr *ifa;
1581 	struct ifreq ifr;
1582 
1583 	sdl = IF_LLSOCKADDR(ifp);
1584 	if (sdl == NULL)
1585 		return (EINVAL);
1586 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1587 		return (EINVAL);
1588 	switch (ifp->if_type) {
1589 	case IFT_ETHER:			/* these types use struct arpcom */
1590 	case IFT_XETHER:
1591 	case IFT_L2VLAN:
1592 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1593 		bcopy(lladdr, LLADDR(sdl), len);
1594 		break;
1595 	default:
1596 		return (ENODEV);
1597 	}
1598 	/*
1599 	 * If the interface is already up, we need
1600 	 * to re-init it in order to reprogram its
1601 	 * address filter.
1602 	 */
1603 	lwkt_serialize_enter(ifp->if_serializer);
1604 	if ((ifp->if_flags & IFF_UP) != 0) {
1605 		ifp->if_flags &= ~IFF_UP;
1606 		ifr.ifr_flags = ifp->if_flags;
1607 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1608 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1609 			      (struct ucred *)NULL);
1610 		ifp->if_flags |= IFF_UP;
1611 		ifr.ifr_flags = ifp->if_flags;
1612 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1613 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1614 				 (struct ucred *)NULL);
1615 #ifdef INET
1616 		/*
1617 		 * Also send gratuitous ARPs to notify other nodes about
1618 		 * the address change.
1619 		 */
1620 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1621 			if (ifa->ifa_addr != NULL &&
1622 			    ifa->ifa_addr->sa_family == AF_INET)
1623 				arp_ifinit(ifp, ifa);
1624 		}
1625 #endif
1626 	}
1627 	lwkt_serialize_exit(ifp->if_serializer);
1628 	return (0);
1629 }
1630 
1631 struct ifmultiaddr *
1632 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1633 {
1634 	struct ifmultiaddr *ifma;
1635 
1636 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1637 		if (sa_equal(ifma->ifma_addr, sa))
1638 			break;
1639 
1640 	return ifma;
1641 }
1642 
1643 /*
1644  * This function locates the first real ethernet MAC from a network
1645  * card and loads it into node, returning 0 on success or ENOENT if
1646  * no suitable interfaces were found.  It is used by the uuid code to
1647  * generate a unique 6-byte number.
1648  */
1649 int
1650 if_getanyethermac(uint16_t *node, int minlen)
1651 {
1652 	struct ifnet *ifp;
1653 	struct sockaddr_dl *sdl;
1654 
1655 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1656 		if (ifp->if_type != IFT_ETHER)
1657 			continue;
1658 		sdl = IF_LLSOCKADDR(ifp);
1659 		if (sdl->sdl_alen < minlen)
1660 			continue;
1661 		bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
1662 		      minlen);
1663 		return(0);
1664 	}
1665 	return (ENOENT);
1666 }
1667 
1668 /*
1669  * The name argument must be a pointer to storage which will last as
1670  * long as the interface does.  For physical devices, the result of
1671  * device_get_name(dev) is a good choice and for pseudo-devices a
1672  * static string works well.
1673  */
1674 void
1675 if_initname(struct ifnet *ifp, const char *name, int unit)
1676 {
1677 	ifp->if_dname = name;
1678 	ifp->if_dunit = unit;
1679 	if (unit != IF_DUNIT_NONE)
1680 		ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1681 	else
1682 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1683 }
1684 
1685 int
1686 if_printf(struct ifnet *ifp, const char *fmt, ...)
1687 {
1688 	__va_list ap;
1689 	int retval;
1690 
1691 	retval = kprintf("%s: ", ifp->if_xname);
1692 	__va_start(ap, fmt);
1693 	retval += kvprintf(fmt, ap);
1694 	__va_end(ap);
1695 	return (retval);
1696 }
1697 
1698 void
1699 ifq_set_classic(struct ifaltq *ifq)
1700 {
1701 	ifq->altq_enqueue = ifq_classic_enqueue;
1702 	ifq->altq_dequeue = ifq_classic_dequeue;
1703 	ifq->altq_request = ifq_classic_request;
1704 }
1705 
1706 static int
1707 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
1708 		    struct altq_pktattr *pa __unused)
1709 {
1710 	crit_enter();
1711 	if (IF_QFULL(ifq)) {
1712 		m_freem(m);
1713 		crit_exit();
1714 		return(ENOBUFS);
1715 	} else {
1716 		IF_ENQUEUE(ifq, m);
1717 		crit_exit();
1718 		return(0);
1719 	}
1720 }
1721 
1722 static struct mbuf *
1723 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
1724 {
1725 	struct mbuf *m;
1726 
1727 	crit_enter();
1728 	switch (op) {
1729 	case ALTDQ_POLL:
1730 		IF_POLL(ifq, m);
1731 		break;
1732 	case ALTDQ_REMOVE:
1733 		IF_DEQUEUE(ifq, m);
1734 		break;
1735 	default:
1736 		panic("unsupported ALTQ dequeue op: %d", op);
1737 	}
1738 	crit_exit();
1739 	KKASSERT(mpolled == NULL || mpolled == m);
1740 	return(m);
1741 }
1742 
1743 static int
1744 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
1745 {
1746 	crit_enter();
1747 	switch (req) {
1748 	case ALTRQ_PURGE:
1749 		IF_DRAIN(ifq);
1750 		break;
1751 	default:
1752 		panic("unsupported ALTQ request: %d", req);
1753 	}
1754 	crit_exit();
1755 	return(0);
1756 }
1757