xref: /dragonfly/sys/net/if.c (revision 38c2ea22)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_polling.h"
41 #include "opt_ifpoll.h"
42 
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/priv.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/socketops.h>
53 #include <sys/protosw.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/mutex.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/serialize.h>
63 #include <sys/bus.h>
64 
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
67 #include <sys/mutex2.h>
68 
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_types.h>
73 #include <net/if_var.h>
74 #include <net/ifq_var.h>
75 #include <net/radix.h>
76 #include <net/route.h>
77 #include <net/if_clone.h>
78 #include <net/netisr.h>
79 #include <net/netmsg2.h>
80 
81 #include <machine/atomic.h>
82 #include <machine/stdarg.h>
83 #include <machine/smp.h>
84 
85 #if defined(INET) || defined(INET6)
86 /*XXX*/
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/if_ether.h>
90 #ifdef INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/in6_ifattach.h>
93 #endif
94 #endif
95 
96 #if defined(COMPAT_43)
97 #include <emulation/43bsd/43bsd_socket.h>
98 #endif /* COMPAT_43 */
99 
100 struct netmsg_ifaddr {
101 	struct netmsg_base base;
102 	struct ifaddr	*ifa;
103 	struct ifnet	*ifp;
104 	int		tail;
105 };
106 
107 /*
108  * System initialization
109  */
110 static void	if_attachdomain(void *);
111 static void	if_attachdomain1(struct ifnet *);
112 static int	ifconf(u_long, caddr_t, struct ucred *);
113 static void	ifinit(void *);
114 static void	ifnetinit(void *);
115 static void	if_slowtimo(void *);
116 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
117 static int	if_rtdel(struct radix_node *, void *);
118 
119 #ifdef INET6
120 /*
121  * XXX: declare here to avoid to include many inet6 related files..
122  * should be more generalized?
123  */
124 extern void	nd6_setmtu(struct ifnet *);
125 #endif
126 
127 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
128 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
129 
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
131 /* Must be after netisr_init */
132 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
133 
134 static  if_com_alloc_t *if_com_alloc[256];
135 static  if_com_free_t *if_com_free[256];
136 
137 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
138 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
139 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
140 
141 int			ifqmaxlen = IFQ_MAXLEN;
142 struct ifnethead	ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
143 
144 struct callout		if_slowtimo_timer;
145 
146 int			if_index = 0;
147 struct ifnet		**ifindex2ifnet = NULL;
148 static struct thread	ifnet_threads[MAXCPU];
149 
150 #define IFQ_KTR_STRING		"ifq=%p"
151 #define IFQ_KTR_ARGS	struct ifaltq *ifq
152 #ifndef KTR_IFQ
153 #define KTR_IFQ			KTR_ALL
154 #endif
155 KTR_INFO_MASTER(ifq);
156 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
157 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
158 #define logifq(name, arg)	KTR_LOG(ifq_ ## name, arg)
159 
160 #define IF_START_KTR_STRING	"ifp=%p"
161 #define IF_START_KTR_ARGS	struct ifnet *ifp
162 #ifndef KTR_IF_START
163 #define KTR_IF_START		KTR_ALL
164 #endif
165 KTR_INFO_MASTER(if_start);
166 KTR_INFO(KTR_IF_START, if_start, run, 0,
167 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
168 KTR_INFO(KTR_IF_START, if_start, sched, 1,
169 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
170 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
171 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
172 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
173 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
174 #ifdef SMP
175 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
176 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
177 #endif
178 #define logifstart(name, arg)	KTR_LOG(if_start_ ## name, arg)
179 
180 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
181 
182 /*
183  * Network interface utility routines.
184  *
185  * Routines with ifa_ifwith* names take sockaddr *'s as
186  * parameters.
187  */
188 /* ARGSUSED*/
189 void
190 ifinit(void *dummy)
191 {
192 	struct ifnet *ifp;
193 
194 	callout_init(&if_slowtimo_timer);
195 
196 	crit_enter();
197 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
198 		if (ifp->if_snd.ifq_maxlen == 0) {
199 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
200 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
201 		}
202 	}
203 	crit_exit();
204 
205 	if_slowtimo(0);
206 }
207 
208 static int
209 if_start_cpuid(struct ifnet *ifp)
210 {
211 	return ifp->if_cpuid;
212 }
213 
214 #ifdef DEVICE_POLLING
215 static int
216 if_start_cpuid_poll(struct ifnet *ifp)
217 {
218 	int poll_cpuid = ifp->if_poll_cpuid;
219 
220 	if (poll_cpuid >= 0)
221 		return poll_cpuid;
222 	else
223 		return ifp->if_cpuid;
224 }
225 #endif
226 
227 static void
228 if_start_ipifunc(void *arg)
229 {
230 	struct ifnet *ifp = arg;
231 	struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
232 
233 	crit_enter();
234 	if (lmsg->ms_flags & MSGF_DONE)
235 		lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
236 	crit_exit();
237 }
238 
239 /*
240  * Schedule ifnet.if_start on ifnet's CPU
241  */
242 static void
243 if_start_schedule(struct ifnet *ifp)
244 {
245 #ifdef SMP
246 	int cpu;
247 
248 	cpu = ifp->if_start_cpuid(ifp);
249 	if (cpu != mycpuid)
250 		lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
251 	else
252 #endif
253 	if_start_ipifunc(ifp);
254 }
255 
256 /*
257  * NOTE:
258  * This function will release ifnet.if_start interlock,
259  * if ifnet.if_start does not need to be scheduled
260  */
261 static __inline int
262 if_start_need_schedule(struct ifaltq *ifq, int running)
263 {
264 	if (!running || ifq_is_empty(ifq)
265 #ifdef ALTQ
266 	    || ifq->altq_tbr != NULL
267 #endif
268 	) {
269 		ALTQ_LOCK(ifq);
270 		/*
271 		 * ifnet.if_start interlock is released, if:
272 		 * 1) Hardware can not take any packets, due to
273 		 *    o  interface is marked down
274 		 *    o  hardware queue is full (IFF_OACTIVE)
275 		 *    Under the second situation, hardware interrupt
276 		 *    or polling(4) will call/schedule ifnet.if_start
277 		 *    when hardware queue is ready
278 		 * 2) There is not packet in the ifnet.if_snd.
279 		 *    Further ifq_dispatch or ifq_handoff will call/
280 		 *    schedule ifnet.if_start
281 		 * 3) TBR is used and it does not allow further
282 		 *    dequeueing.
283 		 *    TBR callout will call ifnet.if_start
284 		 */
285 		if (!running || !ifq_data_ready(ifq)) {
286 			ifq->altq_started = 0;
287 			ALTQ_UNLOCK(ifq);
288 			return 0;
289 		}
290 		ALTQ_UNLOCK(ifq);
291 	}
292 	return 1;
293 }
294 
295 static void
296 if_start_dispatch(netmsg_t msg)
297 {
298 	struct lwkt_msg *lmsg = &msg->base.lmsg;
299 	struct ifnet *ifp = lmsg->u.ms_resultp;
300 	struct ifaltq *ifq = &ifp->if_snd;
301 	int running = 0;
302 
303 	crit_enter();
304 	lwkt_replymsg(lmsg, 0);	/* reply ASAP */
305 	crit_exit();
306 
307 #ifdef SMP
308 	if (mycpuid != ifp->if_start_cpuid(ifp)) {
309 		/*
310 		 * If the ifnet is still up, we need to
311 		 * chase its CPU change.
312 		 */
313 		if (ifp->if_flags & IFF_UP) {
314 			logifstart(chase_sched, ifp);
315 			if_start_schedule(ifp);
316 			return;
317 		} else {
318 			goto check;
319 		}
320 	}
321 #endif
322 
323 	if (ifp->if_flags & IFF_UP) {
324 		ifnet_serialize_tx(ifp); /* XXX try? */
325 		if ((ifp->if_flags & IFF_OACTIVE) == 0) {
326 			logifstart(run, ifp);
327 			ifp->if_start(ifp);
328 			if ((ifp->if_flags &
329 			(IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
330 				running = 1;
331 		}
332 		ifnet_deserialize_tx(ifp);
333 	}
334 #ifdef SMP
335 check:
336 #endif
337 	if (if_start_need_schedule(ifq, running)) {
338 		crit_enter();
339 		if (lmsg->ms_flags & MSGF_DONE)	{ /* XXX necessary? */
340 			logifstart(sched, ifp);
341 			lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
342 		}
343 		crit_exit();
344 	}
345 }
346 
347 /* Device driver ifnet.if_start helper function */
348 void
349 if_devstart(struct ifnet *ifp)
350 {
351 	struct ifaltq *ifq = &ifp->if_snd;
352 	int running = 0;
353 
354 	ASSERT_IFNET_SERIALIZED_TX(ifp);
355 
356 	ALTQ_LOCK(ifq);
357 	if (ifq->altq_started || !ifq_data_ready(ifq)) {
358 		logifstart(avoid, ifp);
359 		ALTQ_UNLOCK(ifq);
360 		return;
361 	}
362 	ifq->altq_started = 1;
363 	ALTQ_UNLOCK(ifq);
364 
365 	logifstart(run, ifp);
366 	ifp->if_start(ifp);
367 
368 	if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
369 		running = 1;
370 
371 	if (if_start_need_schedule(ifq, running)) {
372 		/*
373 		 * More data need to be transmitted, ifnet.if_start is
374 		 * scheduled on ifnet's CPU, and we keep going.
375 		 * NOTE: ifnet.if_start interlock is not released.
376 		 */
377 		logifstart(sched, ifp);
378 		if_start_schedule(ifp);
379 	}
380 }
381 
382 static void
383 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
384 {
385 	lwkt_serialize_enter(ifp->if_serializer);
386 }
387 
388 static void
389 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
390 {
391 	lwkt_serialize_exit(ifp->if_serializer);
392 }
393 
394 static int
395 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
396 {
397 	return lwkt_serialize_try(ifp->if_serializer);
398 }
399 
400 #ifdef INVARIANTS
401 static void
402 if_default_serialize_assert(struct ifnet *ifp,
403 			    enum ifnet_serialize slz __unused,
404 			    boolean_t serialized)
405 {
406 	if (serialized)
407 		ASSERT_SERIALIZED(ifp->if_serializer);
408 	else
409 		ASSERT_NOT_SERIALIZED(ifp->if_serializer);
410 }
411 #endif
412 
413 /*
414  * Attach an interface to the list of "active" interfaces.
415  *
416  * The serializer is optional.  If non-NULL access to the interface
417  * may be MPSAFE.
418  */
419 void
420 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
421 {
422 	unsigned socksize, ifasize;
423 	int namelen, masklen;
424 	struct sockaddr_dl *sdl;
425 	struct ifaddr *ifa;
426 	struct ifaltq *ifq;
427 	int i;
428 
429 	static int if_indexlim = 8;
430 
431 	if (ifp->if_serialize != NULL) {
432 		KASSERT(ifp->if_deserialize != NULL &&
433 			ifp->if_tryserialize != NULL &&
434 			ifp->if_serialize_assert != NULL,
435 			("serialize functions are partially setup"));
436 
437 		/*
438 		 * If the device supplies serialize functions,
439 		 * then clear if_serializer to catch any invalid
440 		 * usage of this field.
441 		 */
442 		KASSERT(serializer == NULL,
443 			("both serialize functions and default serializer "
444 			 "are supplied"));
445 		ifp->if_serializer = NULL;
446 	} else {
447 		KASSERT(ifp->if_deserialize == NULL &&
448 			ifp->if_tryserialize == NULL &&
449 			ifp->if_serialize_assert == NULL,
450 			("serialize functions are partially setup"));
451 		ifp->if_serialize = if_default_serialize;
452 		ifp->if_deserialize = if_default_deserialize;
453 		ifp->if_tryserialize = if_default_tryserialize;
454 #ifdef INVARIANTS
455 		ifp->if_serialize_assert = if_default_serialize_assert;
456 #endif
457 
458 		/*
459 		 * The serializer can be passed in from the device,
460 		 * allowing the same serializer to be used for both
461 		 * the interrupt interlock and the device queue.
462 		 * If not specified, the netif structure will use an
463 		 * embedded serializer.
464 		 */
465 		if (serializer == NULL) {
466 			serializer = &ifp->if_default_serializer;
467 			lwkt_serialize_init(serializer);
468 		}
469 		ifp->if_serializer = serializer;
470 	}
471 
472 	ifp->if_start_cpuid = if_start_cpuid;
473 	ifp->if_cpuid = 0;
474 
475 #ifdef DEVICE_POLLING
476 	/* Device is not in polling mode by default */
477 	ifp->if_poll_cpuid = -1;
478 	if (ifp->if_poll != NULL)
479 		ifp->if_start_cpuid = if_start_cpuid_poll;
480 #endif
481 
482 	ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
483 				     M_LWKTMSG, M_WAITOK);
484 	for (i = 0; i < ncpus; ++i) {
485 		netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
486 			    0, if_start_dispatch);
487 		ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
488 	}
489 
490 	mtx_init(&ifp->if_ioctl_mtx);
491 	mtx_lock(&ifp->if_ioctl_mtx);
492 
493 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
494 	ifp->if_index = ++if_index;
495 
496 	/*
497 	 * XXX -
498 	 * The old code would work if the interface passed a pre-existing
499 	 * chain of ifaddrs to this code.  We don't trust our callers to
500 	 * properly initialize the tailq, however, so we no longer allow
501 	 * this unlikely case.
502 	 */
503 	ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
504 				    M_IFADDR, M_WAITOK | M_ZERO);
505 	for (i = 0; i < ncpus; ++i)
506 		TAILQ_INIT(&ifp->if_addrheads[i]);
507 
508 	TAILQ_INIT(&ifp->if_prefixhead);
509 	TAILQ_INIT(&ifp->if_multiaddrs);
510 	TAILQ_INIT(&ifp->if_groups);
511 	getmicrotime(&ifp->if_lastchange);
512 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
513 		unsigned int n;
514 		struct ifnet **q;
515 
516 		if_indexlim <<= 1;
517 
518 		/* grow ifindex2ifnet */
519 		n = if_indexlim * sizeof(*q);
520 		q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
521 		if (ifindex2ifnet) {
522 			bcopy(ifindex2ifnet, q, n/2);
523 			kfree(ifindex2ifnet, M_IFADDR);
524 		}
525 		ifindex2ifnet = q;
526 	}
527 
528 	ifindex2ifnet[if_index] = ifp;
529 
530 	/*
531 	 * create a Link Level name for this device
532 	 */
533 	namelen = strlen(ifp->if_xname);
534 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
535 	socksize = masklen + ifp->if_addrlen;
536 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
537 	if (socksize < sizeof(*sdl))
538 		socksize = sizeof(*sdl);
539 	socksize = ROUNDUP(socksize);
540 #undef ROUNDUP
541 	ifasize = sizeof(struct ifaddr) + 2 * socksize;
542 	ifa = ifa_create(ifasize, M_WAITOK);
543 	sdl = (struct sockaddr_dl *)(ifa + 1);
544 	sdl->sdl_len = socksize;
545 	sdl->sdl_family = AF_LINK;
546 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
547 	sdl->sdl_nlen = namelen;
548 	sdl->sdl_index = ifp->if_index;
549 	sdl->sdl_type = ifp->if_type;
550 	ifp->if_lladdr = ifa;
551 	ifa->ifa_ifp = ifp;
552 	ifa->ifa_rtrequest = link_rtrequest;
553 	ifa->ifa_addr = (struct sockaddr *)sdl;
554 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
555 	ifa->ifa_netmask = (struct sockaddr *)sdl;
556 	sdl->sdl_len = masklen;
557 	while (namelen != 0)
558 		sdl->sdl_data[--namelen] = 0xff;
559 	ifa_iflink(ifa, ifp, 0 /* Insert head */);
560 
561 	EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
562 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
563 
564 	ifq = &ifp->if_snd;
565 	ifq->altq_type = 0;
566 	ifq->altq_disc = NULL;
567 	ifq->altq_flags &= ALTQF_CANTCHANGE;
568 	ifq->altq_tbr = NULL;
569 	ifq->altq_ifp = ifp;
570 	ifq->altq_started = 0;
571 	ifq->altq_prepended = NULL;
572 	ALTQ_LOCK_INIT(ifq);
573 	ifq_set_classic(ifq);
574 
575 	if (!SLIST_EMPTY(&domains))
576 		if_attachdomain1(ifp);
577 
578 	/* Announce the interface. */
579 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
580 
581 	mtx_unlock(&ifp->if_ioctl_mtx);
582 }
583 
584 static void
585 if_attachdomain(void *dummy)
586 {
587 	struct ifnet *ifp;
588 
589 	crit_enter();
590 	TAILQ_FOREACH(ifp, &ifnet, if_list)
591 		if_attachdomain1(ifp);
592 	crit_exit();
593 }
594 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
595 	if_attachdomain, NULL);
596 
597 static void
598 if_attachdomain1(struct ifnet *ifp)
599 {
600 	struct domain *dp;
601 
602 	crit_enter();
603 
604 	/* address family dependent data region */
605 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
606 	SLIST_FOREACH(dp, &domains, dom_next)
607 		if (dp->dom_ifattach)
608 			ifp->if_afdata[dp->dom_family] =
609 				(*dp->dom_ifattach)(ifp);
610 	crit_exit();
611 }
612 
613 /*
614  * Purge all addresses whose type is _not_ AF_LINK
615  */
616 void
617 if_purgeaddrs_nolink(struct ifnet *ifp)
618 {
619 	struct ifaddr_container *ifac, *next;
620 
621 	TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
622 			      ifa_link, next) {
623 		struct ifaddr *ifa = ifac->ifa;
624 
625 		/* Leave link ifaddr as it is */
626 		if (ifa->ifa_addr->sa_family == AF_LINK)
627 			continue;
628 #ifdef INET
629 		/* XXX: Ugly!! ad hoc just for INET */
630 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
631 			struct ifaliasreq ifr;
632 #ifdef IFADDR_DEBUG_VERBOSE
633 			int i;
634 
635 			kprintf("purge in4 addr %p: ", ifa);
636 			for (i = 0; i < ncpus; ++i)
637 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
638 			kprintf("\n");
639 #endif
640 
641 			bzero(&ifr, sizeof ifr);
642 			ifr.ifra_addr = *ifa->ifa_addr;
643 			if (ifa->ifa_dstaddr)
644 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
645 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
646 				       NULL) == 0)
647 				continue;
648 		}
649 #endif /* INET */
650 #ifdef INET6
651 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
652 #ifdef IFADDR_DEBUG_VERBOSE
653 			int i;
654 
655 			kprintf("purge in6 addr %p: ", ifa);
656 			for (i = 0; i < ncpus; ++i)
657 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
658 			kprintf("\n");
659 #endif
660 
661 			in6_purgeaddr(ifa);
662 			/* ifp_addrhead is already updated */
663 			continue;
664 		}
665 #endif /* INET6 */
666 		ifa_ifunlink(ifa, ifp);
667 		ifa_destroy(ifa);
668 	}
669 }
670 
671 /*
672  * Detach an interface, removing it from the
673  * list of "active" interfaces.
674  */
675 void
676 if_detach(struct ifnet *ifp)
677 {
678 	struct radix_node_head	*rnh;
679 	int i;
680 	int cpu, origcpu;
681 	struct domain *dp;
682 
683 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
684 
685 	/*
686 	 * Remove routes and flush queues.
687 	 */
688 	crit_enter();
689 #ifdef DEVICE_POLLING
690 	if (ifp->if_flags & IFF_POLLING)
691 		ether_poll_deregister(ifp);
692 #endif
693 #ifdef IFPOLL_ENABLE
694 	if (ifp->if_flags & IFF_NPOLLING)
695 		ifpoll_deregister(ifp);
696 #endif
697 	if_down(ifp);
698 
699 #ifdef ALTQ
700 	if (ifq_is_enabled(&ifp->if_snd))
701 		altq_disable(&ifp->if_snd);
702 	if (ifq_is_attached(&ifp->if_snd))
703 		altq_detach(&ifp->if_snd);
704 #endif
705 
706 	/*
707 	 * Clean up all addresses.
708 	 */
709 	ifp->if_lladdr = NULL;
710 
711 	if_purgeaddrs_nolink(ifp);
712 	if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
713 		struct ifaddr *ifa;
714 
715 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
716 		KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
717 			("non-link ifaddr is left on if_addrheads"));
718 
719 		ifa_ifunlink(ifa, ifp);
720 		ifa_destroy(ifa);
721 		KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
722 			("there are still ifaddrs left on if_addrheads"));
723 	}
724 
725 #ifdef INET
726 	/*
727 	 * Remove all IPv4 kernel structures related to ifp.
728 	 */
729 	in_ifdetach(ifp);
730 #endif
731 
732 #ifdef INET6
733 	/*
734 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
735 	 * before removing routing entries below, since IPv6 interface direct
736 	 * routes are expected to be removed by the IPv6-specific kernel API.
737 	 * Otherwise, the kernel will detect some inconsistency and bark it.
738 	 */
739 	in6_ifdetach(ifp);
740 #endif
741 
742 	/*
743 	 * Delete all remaining routes using this interface
744 	 * Unfortuneatly the only way to do this is to slog through
745 	 * the entire routing table looking for routes which point
746 	 * to this interface...oh well...
747 	 */
748 	origcpu = mycpuid;
749 	for (cpu = 0; cpu < ncpus2; cpu++) {
750 		lwkt_migratecpu(cpu);
751 		for (i = 1; i <= AF_MAX; i++) {
752 			if ((rnh = rt_tables[cpu][i]) == NULL)
753 				continue;
754 			rnh->rnh_walktree(rnh, if_rtdel, ifp);
755 		}
756 	}
757 	lwkt_migratecpu(origcpu);
758 
759 	/* Announce that the interface is gone. */
760 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
761 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
762 
763 	SLIST_FOREACH(dp, &domains, dom_next)
764 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
765 			(*dp->dom_ifdetach)(ifp,
766 				ifp->if_afdata[dp->dom_family]);
767 
768 	/*
769 	 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
770 	 */
771 	ifindex2ifnet[ifp->if_index] = NULL;
772 	while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
773 		if_index--;
774 
775 	TAILQ_REMOVE(&ifnet, ifp, if_link);
776 	kfree(ifp->if_addrheads, M_IFADDR);
777 	kfree(ifp->if_start_nmsg, M_LWKTMSG);
778 	crit_exit();
779 }
780 
781 /*
782  * Create interface group without members
783  */
784 struct ifg_group *
785 if_creategroup(const char *groupname)
786 {
787         struct ifg_group        *ifg = NULL;
788 
789         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
790             M_TEMP, M_NOWAIT)) == NULL)
791                 return (NULL);
792 
793         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
794         ifg->ifg_refcnt = 0;
795         ifg->ifg_carp_demoted = 0;
796         TAILQ_INIT(&ifg->ifg_members);
797 #if NPF > 0
798         pfi_attach_ifgroup(ifg);
799 #endif
800         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
801 
802         return (ifg);
803 }
804 
805 /*
806  * Add a group to an interface
807  */
808 int
809 if_addgroup(struct ifnet *ifp, const char *groupname)
810 {
811 	struct ifg_list		*ifgl;
812 	struct ifg_group	*ifg = NULL;
813 	struct ifg_member	*ifgm;
814 
815 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
816 	    groupname[strlen(groupname) - 1] <= '9')
817 		return (EINVAL);
818 
819 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
820 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
821 			return (EEXIST);
822 
823 	if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
824 		return (ENOMEM);
825 
826 	if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
827 		kfree(ifgl, M_TEMP);
828 		return (ENOMEM);
829 	}
830 
831 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
832 		if (!strcmp(ifg->ifg_group, groupname))
833 			break;
834 
835 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
836 		kfree(ifgl, M_TEMP);
837 		kfree(ifgm, M_TEMP);
838 		return (ENOMEM);
839 	}
840 
841 	ifg->ifg_refcnt++;
842 	ifgl->ifgl_group = ifg;
843 	ifgm->ifgm_ifp = ifp;
844 
845 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
846 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
847 
848 #if NPF > 0
849 	pfi_group_change(groupname);
850 #endif
851 
852 	return (0);
853 }
854 
855 /*
856  * Remove a group from an interface
857  */
858 int
859 if_delgroup(struct ifnet *ifp, const char *groupname)
860 {
861 	struct ifg_list		*ifgl;
862 	struct ifg_member	*ifgm;
863 
864 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
865 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
866 			break;
867 	if (ifgl == NULL)
868 		return (ENOENT);
869 
870 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
871 
872 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
873 		if (ifgm->ifgm_ifp == ifp)
874 			break;
875 
876 	if (ifgm != NULL) {
877 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
878 		kfree(ifgm, M_TEMP);
879 	}
880 
881 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
882 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
883 #if NPF > 0
884 		pfi_detach_ifgroup(ifgl->ifgl_group);
885 #endif
886 		kfree(ifgl->ifgl_group, M_TEMP);
887 	}
888 
889 	kfree(ifgl, M_TEMP);
890 
891 #if NPF > 0
892 	pfi_group_change(groupname);
893 #endif
894 
895 	return (0);
896 }
897 
898 /*
899  * Stores all groups from an interface in memory pointed
900  * to by data
901  */
902 int
903 if_getgroup(caddr_t data, struct ifnet *ifp)
904 {
905 	int			 len, error;
906 	struct ifg_list		*ifgl;
907 	struct ifg_req		 ifgrq, *ifgp;
908 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
909 
910 	if (ifgr->ifgr_len == 0) {
911 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
912 			ifgr->ifgr_len += sizeof(struct ifg_req);
913 		return (0);
914 	}
915 
916 	len = ifgr->ifgr_len;
917 	ifgp = ifgr->ifgr_groups;
918 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
919 		if (len < sizeof(ifgrq))
920 			return (EINVAL);
921 		bzero(&ifgrq, sizeof ifgrq);
922 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
923 		    sizeof(ifgrq.ifgrq_group));
924 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
925 		    sizeof(struct ifg_req))))
926 			return (error);
927 		len -= sizeof(ifgrq);
928 		ifgp++;
929 	}
930 
931 	return (0);
932 }
933 
934 /*
935  * Stores all members of a group in memory pointed to by data
936  */
937 int
938 if_getgroupmembers(caddr_t data)
939 {
940 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
941 	struct ifg_group	*ifg;
942 	struct ifg_member	*ifgm;
943 	struct ifg_req		 ifgrq, *ifgp;
944 	int			 len, error;
945 
946 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
947 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
948 			break;
949 	if (ifg == NULL)
950 		return (ENOENT);
951 
952 	if (ifgr->ifgr_len == 0) {
953 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
954 			ifgr->ifgr_len += sizeof(ifgrq);
955 		return (0);
956 	}
957 
958 	len = ifgr->ifgr_len;
959 	ifgp = ifgr->ifgr_groups;
960 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
961 		if (len < sizeof(ifgrq))
962 			return (EINVAL);
963 		bzero(&ifgrq, sizeof ifgrq);
964 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
965 		    sizeof(ifgrq.ifgrq_member));
966 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
967 		    sizeof(struct ifg_req))))
968 			return (error);
969 		len -= sizeof(ifgrq);
970 		ifgp++;
971 	}
972 
973 	return (0);
974 }
975 
976 /*
977  * Delete Routes for a Network Interface
978  *
979  * Called for each routing entry via the rnh->rnh_walktree() call above
980  * to delete all route entries referencing a detaching network interface.
981  *
982  * Arguments:
983  *	rn	pointer to node in the routing table
984  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
985  *
986  * Returns:
987  *	0	successful
988  *	errno	failed - reason indicated
989  *
990  */
991 static int
992 if_rtdel(struct radix_node *rn, void *arg)
993 {
994 	struct rtentry	*rt = (struct rtentry *)rn;
995 	struct ifnet	*ifp = arg;
996 	int		err;
997 
998 	if (rt->rt_ifp == ifp) {
999 
1000 		/*
1001 		 * Protect (sorta) against walktree recursion problems
1002 		 * with cloned routes
1003 		 */
1004 		if (!(rt->rt_flags & RTF_UP))
1005 			return (0);
1006 
1007 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1008 				rt_mask(rt), rt->rt_flags,
1009 				NULL);
1010 		if (err) {
1011 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1012 		}
1013 	}
1014 
1015 	return (0);
1016 }
1017 
1018 /*
1019  * Locate an interface based on a complete address.
1020  */
1021 struct ifaddr *
1022 ifa_ifwithaddr(struct sockaddr *addr)
1023 {
1024 	struct ifnet *ifp;
1025 
1026 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1027 		struct ifaddr_container *ifac;
1028 
1029 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1030 			struct ifaddr *ifa = ifac->ifa;
1031 
1032 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1033 				continue;
1034 			if (sa_equal(addr, ifa->ifa_addr))
1035 				return (ifa);
1036 			if ((ifp->if_flags & IFF_BROADCAST) &&
1037 			    ifa->ifa_broadaddr &&
1038 			    /* IPv6 doesn't have broadcast */
1039 			    ifa->ifa_broadaddr->sa_len != 0 &&
1040 			    sa_equal(ifa->ifa_broadaddr, addr))
1041 				return (ifa);
1042 		}
1043 	}
1044 	return (NULL);
1045 }
1046 /*
1047  * Locate the point to point interface with a given destination address.
1048  */
1049 struct ifaddr *
1050 ifa_ifwithdstaddr(struct sockaddr *addr)
1051 {
1052 	struct ifnet *ifp;
1053 
1054 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1055 		struct ifaddr_container *ifac;
1056 
1057 		if (!(ifp->if_flags & IFF_POINTOPOINT))
1058 			continue;
1059 
1060 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1061 			struct ifaddr *ifa = ifac->ifa;
1062 
1063 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1064 				continue;
1065 			if (ifa->ifa_dstaddr &&
1066 			    sa_equal(addr, ifa->ifa_dstaddr))
1067 				return (ifa);
1068 		}
1069 	}
1070 	return (NULL);
1071 }
1072 
1073 /*
1074  * Find an interface on a specific network.  If many, choice
1075  * is most specific found.
1076  */
1077 struct ifaddr *
1078 ifa_ifwithnet(struct sockaddr *addr)
1079 {
1080 	struct ifnet *ifp;
1081 	struct ifaddr *ifa_maybe = NULL;
1082 	u_int af = addr->sa_family;
1083 	char *addr_data = addr->sa_data, *cplim;
1084 
1085 	/*
1086 	 * AF_LINK addresses can be looked up directly by their index number,
1087 	 * so do that if we can.
1088 	 */
1089 	if (af == AF_LINK) {
1090 		struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1091 
1092 		if (sdl->sdl_index && sdl->sdl_index <= if_index)
1093 			return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1094 	}
1095 
1096 	/*
1097 	 * Scan though each interface, looking for ones that have
1098 	 * addresses in this address family.
1099 	 */
1100 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1101 		struct ifaddr_container *ifac;
1102 
1103 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1104 			struct ifaddr *ifa = ifac->ifa;
1105 			char *cp, *cp2, *cp3;
1106 
1107 			if (ifa->ifa_addr->sa_family != af)
1108 next:				continue;
1109 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1110 				/*
1111 				 * This is a bit broken as it doesn't
1112 				 * take into account that the remote end may
1113 				 * be a single node in the network we are
1114 				 * looking for.
1115 				 * The trouble is that we don't know the
1116 				 * netmask for the remote end.
1117 				 */
1118 				if (ifa->ifa_dstaddr != NULL &&
1119 				    sa_equal(addr, ifa->ifa_dstaddr))
1120 					return (ifa);
1121 			} else {
1122 				/*
1123 				 * if we have a special address handler,
1124 				 * then use it instead of the generic one.
1125 				 */
1126 				if (ifa->ifa_claim_addr) {
1127 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1128 						return (ifa);
1129 					} else {
1130 						continue;
1131 					}
1132 				}
1133 
1134 				/*
1135 				 * Scan all the bits in the ifa's address.
1136 				 * If a bit dissagrees with what we are
1137 				 * looking for, mask it with the netmask
1138 				 * to see if it really matters.
1139 				 * (A byte at a time)
1140 				 */
1141 				if (ifa->ifa_netmask == 0)
1142 					continue;
1143 				cp = addr_data;
1144 				cp2 = ifa->ifa_addr->sa_data;
1145 				cp3 = ifa->ifa_netmask->sa_data;
1146 				cplim = ifa->ifa_netmask->sa_len +
1147 					(char *)ifa->ifa_netmask;
1148 				while (cp3 < cplim)
1149 					if ((*cp++ ^ *cp2++) & *cp3++)
1150 						goto next; /* next address! */
1151 				/*
1152 				 * If the netmask of what we just found
1153 				 * is more specific than what we had before
1154 				 * (if we had one) then remember the new one
1155 				 * before continuing to search
1156 				 * for an even better one.
1157 				 */
1158 				if (ifa_maybe == NULL ||
1159 				    rn_refines((char *)ifa->ifa_netmask,
1160 					       (char *)ifa_maybe->ifa_netmask))
1161 					ifa_maybe = ifa;
1162 			}
1163 		}
1164 	}
1165 	return (ifa_maybe);
1166 }
1167 
1168 /*
1169  * Find an interface address specific to an interface best matching
1170  * a given address.
1171  */
1172 struct ifaddr *
1173 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1174 {
1175 	struct ifaddr_container *ifac;
1176 	char *cp, *cp2, *cp3;
1177 	char *cplim;
1178 	struct ifaddr *ifa_maybe = NULL;
1179 	u_int af = addr->sa_family;
1180 
1181 	if (af >= AF_MAX)
1182 		return (0);
1183 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1184 		struct ifaddr *ifa = ifac->ifa;
1185 
1186 		if (ifa->ifa_addr->sa_family != af)
1187 			continue;
1188 		if (ifa_maybe == NULL)
1189 			ifa_maybe = ifa;
1190 		if (ifa->ifa_netmask == NULL) {
1191 			if (sa_equal(addr, ifa->ifa_addr) ||
1192 			    (ifa->ifa_dstaddr != NULL &&
1193 			     sa_equal(addr, ifa->ifa_dstaddr)))
1194 				return (ifa);
1195 			continue;
1196 		}
1197 		if (ifp->if_flags & IFF_POINTOPOINT) {
1198 			if (sa_equal(addr, ifa->ifa_dstaddr))
1199 				return (ifa);
1200 		} else {
1201 			cp = addr->sa_data;
1202 			cp2 = ifa->ifa_addr->sa_data;
1203 			cp3 = ifa->ifa_netmask->sa_data;
1204 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1205 			for (; cp3 < cplim; cp3++)
1206 				if ((*cp++ ^ *cp2++) & *cp3)
1207 					break;
1208 			if (cp3 == cplim)
1209 				return (ifa);
1210 		}
1211 	}
1212 	return (ifa_maybe);
1213 }
1214 
1215 /*
1216  * Default action when installing a route with a Link Level gateway.
1217  * Lookup an appropriate real ifa to point to.
1218  * This should be moved to /sys/net/link.c eventually.
1219  */
1220 static void
1221 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1222 {
1223 	struct ifaddr *ifa;
1224 	struct sockaddr *dst;
1225 	struct ifnet *ifp;
1226 
1227 	if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1228 	    (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1229 		return;
1230 	ifa = ifaof_ifpforaddr(dst, ifp);
1231 	if (ifa != NULL) {
1232 		IFAFREE(rt->rt_ifa);
1233 		IFAREF(ifa);
1234 		rt->rt_ifa = ifa;
1235 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1236 			ifa->ifa_rtrequest(cmd, rt, info);
1237 	}
1238 }
1239 
1240 /*
1241  * Mark an interface down and notify protocols of
1242  * the transition.
1243  * NOTE: must be called at splnet or eqivalent.
1244  */
1245 void
1246 if_unroute(struct ifnet *ifp, int flag, int fam)
1247 {
1248 	struct ifaddr_container *ifac;
1249 
1250 	ifp->if_flags &= ~flag;
1251 	getmicrotime(&ifp->if_lastchange);
1252 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1253 		struct ifaddr *ifa = ifac->ifa;
1254 
1255 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1256 			kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1257 	}
1258 	ifq_purge(&ifp->if_snd);
1259 	rt_ifmsg(ifp);
1260 }
1261 
1262 /*
1263  * Mark an interface up and notify protocols of
1264  * the transition.
1265  * NOTE: must be called at splnet or eqivalent.
1266  */
1267 void
1268 if_route(struct ifnet *ifp, int flag, int fam)
1269 {
1270 	struct ifaddr_container *ifac;
1271 
1272 	ifq_purge(&ifp->if_snd);
1273 	ifp->if_flags |= flag;
1274 	getmicrotime(&ifp->if_lastchange);
1275 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1276 		struct ifaddr *ifa = ifac->ifa;
1277 
1278 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1279 			kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1280 	}
1281 	rt_ifmsg(ifp);
1282 #ifdef INET6
1283 	in6_if_up(ifp);
1284 #endif
1285 }
1286 
1287 /*
1288  * Mark an interface down and notify protocols of the transition.  An
1289  * interface going down is also considered to be a synchronizing event.
1290  * We must ensure that all packet processing related to the interface
1291  * has completed before we return so e.g. the caller can free the ifnet
1292  * structure that the mbufs may be referencing.
1293  *
1294  * NOTE: must be called at splnet or eqivalent.
1295  */
1296 void
1297 if_down(struct ifnet *ifp)
1298 {
1299 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1300 	netmsg_service_sync();
1301 }
1302 
1303 /*
1304  * Mark an interface up and notify protocols of
1305  * the transition.
1306  * NOTE: must be called at splnet or eqivalent.
1307  */
1308 void
1309 if_up(struct ifnet *ifp)
1310 {
1311 	if_route(ifp, IFF_UP, AF_UNSPEC);
1312 }
1313 
1314 /*
1315  * Process a link state change.
1316  * NOTE: must be called at splsoftnet or equivalent.
1317  */
1318 void
1319 if_link_state_change(struct ifnet *ifp)
1320 {
1321 	int link_state = ifp->if_link_state;
1322 
1323 	rt_ifmsg(ifp);
1324 	devctl_notify("IFNET", ifp->if_xname,
1325 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1326 }
1327 
1328 /*
1329  * Handle interface watchdog timer routines.  Called
1330  * from softclock, we decrement timers (if set) and
1331  * call the appropriate interface routine on expiration.
1332  */
1333 static void
1334 if_slowtimo(void *arg)
1335 {
1336 	struct ifnet *ifp;
1337 
1338 	crit_enter();
1339 
1340 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1341 		if (ifp->if_timer == 0 || --ifp->if_timer)
1342 			continue;
1343 		if (ifp->if_watchdog) {
1344 			if (ifnet_tryserialize_all(ifp)) {
1345 				(*ifp->if_watchdog)(ifp);
1346 				ifnet_deserialize_all(ifp);
1347 			} else {
1348 				/* try again next timeout */
1349 				++ifp->if_timer;
1350 			}
1351 		}
1352 	}
1353 
1354 	crit_exit();
1355 
1356 	callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1357 }
1358 
1359 /*
1360  * Map interface name to
1361  * interface structure pointer.
1362  */
1363 struct ifnet *
1364 ifunit(const char *name)
1365 {
1366 	struct ifnet *ifp;
1367 
1368 	/*
1369 	 * Search all the interfaces for this name/number
1370 	 */
1371 
1372 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1373 		if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1374 			break;
1375 	}
1376 	return (ifp);
1377 }
1378 
1379 
1380 /*
1381  * Map interface name in a sockaddr_dl to
1382  * interface structure pointer.
1383  */
1384 struct ifnet *
1385 if_withname(struct sockaddr *sa)
1386 {
1387 	char ifname[IFNAMSIZ+1];
1388 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1389 
1390 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1391 	     (sdl->sdl_nlen > IFNAMSIZ) )
1392 		return NULL;
1393 
1394 	/*
1395 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1396 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1397 	 * and there might not be room to put the trailing null anyway, so we
1398 	 * make a local copy that we know we can null terminate safely.
1399 	 */
1400 
1401 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1402 	ifname[sdl->sdl_nlen] = '\0';
1403 	return ifunit(ifname);
1404 }
1405 
1406 
1407 /*
1408  * Interface ioctls.
1409  */
1410 int
1411 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1412 {
1413 	struct ifnet *ifp;
1414 	struct ifreq *ifr;
1415 	struct ifstat *ifs;
1416 	int error;
1417 	short oif_flags;
1418 	int new_flags;
1419 #ifdef COMPAT_43
1420 	int ocmd;
1421 #endif
1422 	size_t namelen, onamelen;
1423 	char new_name[IFNAMSIZ];
1424 	struct ifaddr *ifa;
1425 	struct sockaddr_dl *sdl;
1426 
1427 	switch (cmd) {
1428 	case SIOCGIFCONF:
1429 	case OSIOCGIFCONF:
1430 		return (ifconf(cmd, data, cred));
1431 	default:
1432 		break;
1433 	}
1434 
1435 	ifr = (struct ifreq *)data;
1436 
1437 	switch (cmd) {
1438 	case SIOCIFCREATE:
1439 	case SIOCIFCREATE2:
1440 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1441 			return (error);
1442 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1443 		    	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1444 	case SIOCIFDESTROY:
1445 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1446 			return (error);
1447 		return (if_clone_destroy(ifr->ifr_name));
1448 	case SIOCIFGCLONERS:
1449 		return (if_clone_list((struct if_clonereq *)data));
1450 	default:
1451 		break;
1452 	}
1453 
1454 	/*
1455 	 * Nominal ioctl through interface, lookup the ifp and obtain a
1456 	 * lock to serialize the ifconfig ioctl operation.
1457 	 */
1458 	ifp = ifunit(ifr->ifr_name);
1459 	if (ifp == NULL)
1460 		return (ENXIO);
1461 	error = 0;
1462 	mtx_lock(&ifp->if_ioctl_mtx);
1463 
1464 	switch (cmd) {
1465 	case SIOCGIFINDEX:
1466 		ifr->ifr_index = ifp->if_index;
1467 		break;
1468 
1469 	case SIOCGIFFLAGS:
1470 		ifr->ifr_flags = ifp->if_flags;
1471 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1472 		break;
1473 
1474 	case SIOCGIFCAP:
1475 		ifr->ifr_reqcap = ifp->if_capabilities;
1476 		ifr->ifr_curcap = ifp->if_capenable;
1477 		break;
1478 
1479 	case SIOCGIFMETRIC:
1480 		ifr->ifr_metric = ifp->if_metric;
1481 		break;
1482 
1483 	case SIOCGIFMTU:
1484 		ifr->ifr_mtu = ifp->if_mtu;
1485 		break;
1486 
1487 	case SIOCGIFDATA:
1488 		error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1489 				sizeof(ifp->if_data));
1490 		break;
1491 
1492 	case SIOCGIFPHYS:
1493 		ifr->ifr_phys = ifp->if_physical;
1494 		break;
1495 
1496 	case SIOCGIFPOLLCPU:
1497 #ifdef DEVICE_POLLING
1498 		ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1499 #else
1500 		ifr->ifr_pollcpu = -1;
1501 #endif
1502 		break;
1503 
1504 	case SIOCSIFPOLLCPU:
1505 #ifdef DEVICE_POLLING
1506 		if ((ifp->if_flags & IFF_POLLING) == 0)
1507 			ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1508 #endif
1509 		break;
1510 
1511 	case SIOCSIFFLAGS:
1512 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1513 		if (error)
1514 			break;
1515 		new_flags = (ifr->ifr_flags & 0xffff) |
1516 		    (ifr->ifr_flagshigh << 16);
1517 		if (ifp->if_flags & IFF_SMART) {
1518 			/* Smart drivers twiddle their own routes */
1519 		} else if (ifp->if_flags & IFF_UP &&
1520 		    (new_flags & IFF_UP) == 0) {
1521 			crit_enter();
1522 			if_down(ifp);
1523 			crit_exit();
1524 		} else if (new_flags & IFF_UP &&
1525 		    (ifp->if_flags & IFF_UP) == 0) {
1526 			crit_enter();
1527 			if_up(ifp);
1528 			crit_exit();
1529 		}
1530 
1531 #ifdef DEVICE_POLLING
1532 		if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1533 			if (new_flags & IFF_POLLING) {
1534 				ether_poll_register(ifp);
1535 			} else {
1536 				ether_poll_deregister(ifp);
1537 			}
1538 		}
1539 #endif
1540 #ifdef IFPOLL_ENABLE
1541 		if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1542 			if (new_flags & IFF_NPOLLING)
1543 				ifpoll_register(ifp);
1544 			else
1545 				ifpoll_deregister(ifp);
1546 		}
1547 #endif
1548 
1549 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1550 			(new_flags &~ IFF_CANTCHANGE);
1551 		if (new_flags & IFF_PPROMISC) {
1552 			/* Permanently promiscuous mode requested */
1553 			ifp->if_flags |= IFF_PROMISC;
1554 		} else if (ifp->if_pcount == 0) {
1555 			ifp->if_flags &= ~IFF_PROMISC;
1556 		}
1557 		if (ifp->if_ioctl) {
1558 			ifnet_serialize_all(ifp);
1559 			ifp->if_ioctl(ifp, cmd, data, cred);
1560 			ifnet_deserialize_all(ifp);
1561 		}
1562 		getmicrotime(&ifp->if_lastchange);
1563 		break;
1564 
1565 	case SIOCSIFCAP:
1566 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1567 		if (error)
1568 			break;
1569 		if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1570 			error = EINVAL;
1571 			break;
1572 		}
1573 		ifnet_serialize_all(ifp);
1574 		ifp->if_ioctl(ifp, cmd, data, cred);
1575 		ifnet_deserialize_all(ifp);
1576 		break;
1577 
1578 	case SIOCSIFNAME:
1579 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1580 		if (error)
1581 			break;
1582 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1583 		if (error)
1584 			break;
1585 		if (new_name[0] == '\0') {
1586 			error = EINVAL;
1587 			break;
1588 		}
1589 		if (ifunit(new_name) != NULL) {
1590 			error = EEXIST;
1591 			break;
1592 		}
1593 
1594 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1595 
1596 		/* Announce the departure of the interface. */
1597 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1598 
1599 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1600 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1601 		/* XXX IFA_LOCK(ifa); */
1602 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1603 		namelen = strlen(new_name);
1604 		onamelen = sdl->sdl_nlen;
1605 		/*
1606 		 * Move the address if needed.  This is safe because we
1607 		 * allocate space for a name of length IFNAMSIZ when we
1608 		 * create this in if_attach().
1609 		 */
1610 		if (namelen != onamelen) {
1611 			bcopy(sdl->sdl_data + onamelen,
1612 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1613 		}
1614 		bcopy(new_name, sdl->sdl_data, namelen);
1615 		sdl->sdl_nlen = namelen;
1616 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1617 		bzero(sdl->sdl_data, onamelen);
1618 		while (namelen != 0)
1619 			sdl->sdl_data[--namelen] = 0xff;
1620 		/* XXX IFA_UNLOCK(ifa) */
1621 
1622 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1623 
1624 		/* Announce the return of the interface. */
1625 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1626 		break;
1627 
1628 	case SIOCSIFMETRIC:
1629 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1630 		if (error)
1631 			break;
1632 		ifp->if_metric = ifr->ifr_metric;
1633 		getmicrotime(&ifp->if_lastchange);
1634 		break;
1635 
1636 	case SIOCSIFPHYS:
1637 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1638 		if (error)
1639 			break;
1640 		if (ifp->if_ioctl == NULL) {
1641 		        error = EOPNOTSUPP;
1642 			break;
1643 		}
1644 		ifnet_serialize_all(ifp);
1645 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1646 		ifnet_deserialize_all(ifp);
1647 		if (error == 0)
1648 			getmicrotime(&ifp->if_lastchange);
1649 		break;
1650 
1651 	case SIOCSIFMTU:
1652 	{
1653 		u_long oldmtu = ifp->if_mtu;
1654 
1655 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1656 		if (error)
1657 			break;
1658 		if (ifp->if_ioctl == NULL) {
1659 			error = EOPNOTSUPP;
1660 			break;
1661 		}
1662 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1663 			error = EINVAL;
1664 			break;
1665 		}
1666 		ifnet_serialize_all(ifp);
1667 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1668 		ifnet_deserialize_all(ifp);
1669 		if (error == 0) {
1670 			getmicrotime(&ifp->if_lastchange);
1671 			rt_ifmsg(ifp);
1672 		}
1673 		/*
1674 		 * If the link MTU changed, do network layer specific procedure.
1675 		 */
1676 		if (ifp->if_mtu != oldmtu) {
1677 #ifdef INET6
1678 			nd6_setmtu(ifp);
1679 #endif
1680 		}
1681 		break;
1682 	}
1683 
1684 	case SIOCADDMULTI:
1685 	case SIOCDELMULTI:
1686 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1687 		if (error)
1688 			break;
1689 
1690 		/* Don't allow group membership on non-multicast interfaces. */
1691 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1692 			error = EOPNOTSUPP;
1693 			break;
1694 		}
1695 
1696 		/* Don't let users screw up protocols' entries. */
1697 		if (ifr->ifr_addr.sa_family != AF_LINK) {
1698 			error = EINVAL;
1699 			break;
1700 		}
1701 
1702 		if (cmd == SIOCADDMULTI) {
1703 			struct ifmultiaddr *ifma;
1704 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1705 		} else {
1706 			error = if_delmulti(ifp, &ifr->ifr_addr);
1707 		}
1708 		if (error == 0)
1709 			getmicrotime(&ifp->if_lastchange);
1710 		break;
1711 
1712 	case SIOCSIFPHYADDR:
1713 	case SIOCDIFPHYADDR:
1714 #ifdef INET6
1715 	case SIOCSIFPHYADDR_IN6:
1716 #endif
1717 	case SIOCSLIFPHYADDR:
1718         case SIOCSIFMEDIA:
1719 	case SIOCSIFGENERIC:
1720 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1721 		if (error)
1722 			break;
1723 		if (ifp->if_ioctl == 0) {
1724 			error = EOPNOTSUPP;
1725 			break;
1726 		}
1727 		ifnet_serialize_all(ifp);
1728 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1729 		ifnet_deserialize_all(ifp);
1730 		if (error == 0)
1731 			getmicrotime(&ifp->if_lastchange);
1732 		break;
1733 
1734 	case SIOCGIFSTATUS:
1735 		ifs = (struct ifstat *)data;
1736 		ifs->ascii[0] = '\0';
1737 		/* fall through */
1738 	case SIOCGIFPSRCADDR:
1739 	case SIOCGIFPDSTADDR:
1740 	case SIOCGLIFPHYADDR:
1741 	case SIOCGIFMEDIA:
1742 	case SIOCGIFGENERIC:
1743 		if (ifp->if_ioctl == NULL) {
1744 			error = EOPNOTSUPP;
1745 			break;
1746 		}
1747 		ifnet_serialize_all(ifp);
1748 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1749 		ifnet_deserialize_all(ifp);
1750 		break;
1751 
1752 	case SIOCSIFLLADDR:
1753 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1754 		if (error)
1755 			break;
1756 		error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1757 				     ifr->ifr_addr.sa_len);
1758 		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1759 		break;
1760 
1761 	default:
1762 		oif_flags = ifp->if_flags;
1763 		if (so->so_proto == 0) {
1764 			error = EOPNOTSUPP;
1765 			break;
1766 		}
1767 #ifndef COMPAT_43
1768 		error = so_pru_control_direct(so, cmd, data, ifp);
1769 #else
1770 		ocmd = cmd;
1771 
1772 		switch (cmd) {
1773 		case SIOCSIFDSTADDR:
1774 		case SIOCSIFADDR:
1775 		case SIOCSIFBRDADDR:
1776 		case SIOCSIFNETMASK:
1777 #if BYTE_ORDER != BIG_ENDIAN
1778 			if (ifr->ifr_addr.sa_family == 0 &&
1779 			    ifr->ifr_addr.sa_len < 16) {
1780 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1781 				ifr->ifr_addr.sa_len = 16;
1782 			}
1783 #else
1784 			if (ifr->ifr_addr.sa_len == 0)
1785 				ifr->ifr_addr.sa_len = 16;
1786 #endif
1787 			break;
1788 		case OSIOCGIFADDR:
1789 			cmd = SIOCGIFADDR;
1790 			break;
1791 		case OSIOCGIFDSTADDR:
1792 			cmd = SIOCGIFDSTADDR;
1793 			break;
1794 		case OSIOCGIFBRDADDR:
1795 			cmd = SIOCGIFBRDADDR;
1796 			break;
1797 		case OSIOCGIFNETMASK:
1798 			cmd = SIOCGIFNETMASK;
1799 			break;
1800 		default:
1801 			break;
1802 		}
1803 
1804 		error = so_pru_control_direct(so, cmd, data, ifp);
1805 
1806 		switch (ocmd) {
1807 		case OSIOCGIFADDR:
1808 		case OSIOCGIFDSTADDR:
1809 		case OSIOCGIFBRDADDR:
1810 		case OSIOCGIFNETMASK:
1811 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1812 			break;
1813 		}
1814 #endif /* COMPAT_43 */
1815 
1816 		if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1817 #ifdef INET6
1818 			DELAY(100);/* XXX: temporary workaround for fxp issue*/
1819 			if (ifp->if_flags & IFF_UP) {
1820 				crit_enter();
1821 				in6_if_up(ifp);
1822 				crit_exit();
1823 			}
1824 #endif
1825 		}
1826 		break;
1827 	}
1828 
1829 	mtx_unlock(&ifp->if_ioctl_mtx);
1830 	return (error);
1831 }
1832 
1833 /*
1834  * Set/clear promiscuous mode on interface ifp based on the truth value
1835  * of pswitch.  The calls are reference counted so that only the first
1836  * "on" request actually has an effect, as does the final "off" request.
1837  * Results are undefined if the "off" and "on" requests are not matched.
1838  */
1839 int
1840 ifpromisc(struct ifnet *ifp, int pswitch)
1841 {
1842 	struct ifreq ifr;
1843 	int error;
1844 	int oldflags;
1845 
1846 	oldflags = ifp->if_flags;
1847 	if (ifp->if_flags & IFF_PPROMISC) {
1848 		/* Do nothing if device is in permanently promiscuous mode */
1849 		ifp->if_pcount += pswitch ? 1 : -1;
1850 		return (0);
1851 	}
1852 	if (pswitch) {
1853 		/*
1854 		 * If the device is not configured up, we cannot put it in
1855 		 * promiscuous mode.
1856 		 */
1857 		if ((ifp->if_flags & IFF_UP) == 0)
1858 			return (ENETDOWN);
1859 		if (ifp->if_pcount++ != 0)
1860 			return (0);
1861 		ifp->if_flags |= IFF_PROMISC;
1862 		log(LOG_INFO, "%s: promiscuous mode enabled\n",
1863 		    ifp->if_xname);
1864 	} else {
1865 		if (--ifp->if_pcount > 0)
1866 			return (0);
1867 		ifp->if_flags &= ~IFF_PROMISC;
1868 		log(LOG_INFO, "%s: promiscuous mode disabled\n",
1869 		    ifp->if_xname);
1870 	}
1871 	ifr.ifr_flags = ifp->if_flags;
1872 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1873 	ifnet_serialize_all(ifp);
1874 	error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1875 	ifnet_deserialize_all(ifp);
1876 	if (error == 0)
1877 		rt_ifmsg(ifp);
1878 	else
1879 		ifp->if_flags = oldflags;
1880 	return error;
1881 }
1882 
1883 /*
1884  * Return interface configuration
1885  * of system.  List may be used
1886  * in later ioctl's (above) to get
1887  * other information.
1888  */
1889 static int
1890 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1891 {
1892 	struct ifconf *ifc = (struct ifconf *)data;
1893 	struct ifnet *ifp;
1894 	struct sockaddr *sa;
1895 	struct ifreq ifr, *ifrp;
1896 	int space = ifc->ifc_len, error = 0;
1897 
1898 	ifrp = ifc->ifc_req;
1899 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1900 		struct ifaddr_container *ifac;
1901 		int addrs;
1902 
1903 		if (space <= sizeof ifr)
1904 			break;
1905 
1906 		/*
1907 		 * Zero the stack declared structure first to prevent
1908 		 * memory disclosure.
1909 		 */
1910 		bzero(&ifr, sizeof(ifr));
1911 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1912 		    >= sizeof(ifr.ifr_name)) {
1913 			error = ENAMETOOLONG;
1914 			break;
1915 		}
1916 
1917 		addrs = 0;
1918 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1919 			struct ifaddr *ifa = ifac->ifa;
1920 
1921 			if (space <= sizeof ifr)
1922 				break;
1923 			sa = ifa->ifa_addr;
1924 			if (cred->cr_prison &&
1925 			    prison_if(cred, sa))
1926 				continue;
1927 			addrs++;
1928 #ifdef COMPAT_43
1929 			if (cmd == OSIOCGIFCONF) {
1930 				struct osockaddr *osa =
1931 					 (struct osockaddr *)&ifr.ifr_addr;
1932 				ifr.ifr_addr = *sa;
1933 				osa->sa_family = sa->sa_family;
1934 				error = copyout(&ifr, ifrp, sizeof ifr);
1935 				ifrp++;
1936 			} else
1937 #endif
1938 			if (sa->sa_len <= sizeof(*sa)) {
1939 				ifr.ifr_addr = *sa;
1940 				error = copyout(&ifr, ifrp, sizeof ifr);
1941 				ifrp++;
1942 			} else {
1943 				if (space < (sizeof ifr) + sa->sa_len -
1944 					    sizeof(*sa))
1945 					break;
1946 				space -= sa->sa_len - sizeof(*sa);
1947 				error = copyout(&ifr, ifrp,
1948 						sizeof ifr.ifr_name);
1949 				if (error == 0)
1950 					error = copyout(sa, &ifrp->ifr_addr,
1951 							sa->sa_len);
1952 				ifrp = (struct ifreq *)
1953 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1954 			}
1955 			if (error)
1956 				break;
1957 			space -= sizeof ifr;
1958 		}
1959 		if (error)
1960 			break;
1961 		if (!addrs) {
1962 			bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1963 			error = copyout(&ifr, ifrp, sizeof ifr);
1964 			if (error)
1965 				break;
1966 			space -= sizeof ifr;
1967 			ifrp++;
1968 		}
1969 	}
1970 	ifc->ifc_len -= space;
1971 	return (error);
1972 }
1973 
1974 /*
1975  * Just like if_promisc(), but for all-multicast-reception mode.
1976  */
1977 int
1978 if_allmulti(struct ifnet *ifp, int onswitch)
1979 {
1980 	int error = 0;
1981 	struct ifreq ifr;
1982 
1983 	crit_enter();
1984 
1985 	if (onswitch) {
1986 		if (ifp->if_amcount++ == 0) {
1987 			ifp->if_flags |= IFF_ALLMULTI;
1988 			ifr.ifr_flags = ifp->if_flags;
1989 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1990 			ifnet_serialize_all(ifp);
1991 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1992 					      NULL);
1993 			ifnet_deserialize_all(ifp);
1994 		}
1995 	} else {
1996 		if (ifp->if_amcount > 1) {
1997 			ifp->if_amcount--;
1998 		} else {
1999 			ifp->if_amcount = 0;
2000 			ifp->if_flags &= ~IFF_ALLMULTI;
2001 			ifr.ifr_flags = ifp->if_flags;
2002 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2003 			ifnet_serialize_all(ifp);
2004 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2005 					      NULL);
2006 			ifnet_deserialize_all(ifp);
2007 		}
2008 	}
2009 
2010 	crit_exit();
2011 
2012 	if (error == 0)
2013 		rt_ifmsg(ifp);
2014 	return error;
2015 }
2016 
2017 /*
2018  * Add a multicast listenership to the interface in question.
2019  * The link layer provides a routine which converts
2020  */
2021 int
2022 if_addmulti(
2023 	struct ifnet *ifp,	/* interface to manipulate */
2024 	struct sockaddr *sa,	/* address to add */
2025 	struct ifmultiaddr **retifma)
2026 {
2027 	struct sockaddr *llsa, *dupsa;
2028 	int error;
2029 	struct ifmultiaddr *ifma;
2030 
2031 	/*
2032 	 * If the matching multicast address already exists
2033 	 * then don't add a new one, just add a reference
2034 	 */
2035 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2036 		if (sa_equal(sa, ifma->ifma_addr)) {
2037 			ifma->ifma_refcount++;
2038 			if (retifma)
2039 				*retifma = ifma;
2040 			return 0;
2041 		}
2042 	}
2043 
2044 	/*
2045 	 * Give the link layer a chance to accept/reject it, and also
2046 	 * find out which AF_LINK address this maps to, if it isn't one
2047 	 * already.
2048 	 */
2049 	if (ifp->if_resolvemulti) {
2050 		ifnet_serialize_all(ifp);
2051 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2052 		ifnet_deserialize_all(ifp);
2053 		if (error)
2054 			return error;
2055 	} else {
2056 		llsa = NULL;
2057 	}
2058 
2059 	ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2060 	dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2061 	bcopy(sa, dupsa, sa->sa_len);
2062 
2063 	ifma->ifma_addr = dupsa;
2064 	ifma->ifma_lladdr = llsa;
2065 	ifma->ifma_ifp = ifp;
2066 	ifma->ifma_refcount = 1;
2067 	ifma->ifma_protospec = 0;
2068 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2069 
2070 	/*
2071 	 * Some network interfaces can scan the address list at
2072 	 * interrupt time; lock them out.
2073 	 */
2074 	crit_enter();
2075 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2076 	crit_exit();
2077 	if (retifma)
2078 		*retifma = ifma;
2079 
2080 	if (llsa != NULL) {
2081 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2082 			if (sa_equal(ifma->ifma_addr, llsa))
2083 				break;
2084 		}
2085 		if (ifma) {
2086 			ifma->ifma_refcount++;
2087 		} else {
2088 			ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2089 			dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2090 			bcopy(llsa, dupsa, llsa->sa_len);
2091 			ifma->ifma_addr = dupsa;
2092 			ifma->ifma_ifp = ifp;
2093 			ifma->ifma_refcount = 1;
2094 			crit_enter();
2095 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2096 			crit_exit();
2097 		}
2098 	}
2099 	/*
2100 	 * We are certain we have added something, so call down to the
2101 	 * interface to let them know about it.
2102 	 */
2103 	crit_enter();
2104 	ifnet_serialize_all(ifp);
2105 	if (ifp->if_ioctl)
2106 		ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2107 	ifnet_deserialize_all(ifp);
2108 	crit_exit();
2109 
2110 	return 0;
2111 }
2112 
2113 /*
2114  * Remove a reference to a multicast address on this interface.  Yell
2115  * if the request does not match an existing membership.
2116  */
2117 int
2118 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2119 {
2120 	struct ifmultiaddr *ifma;
2121 
2122 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2123 		if (sa_equal(sa, ifma->ifma_addr))
2124 			break;
2125 	if (ifma == NULL)
2126 		return ENOENT;
2127 
2128 	if (ifma->ifma_refcount > 1) {
2129 		ifma->ifma_refcount--;
2130 		return 0;
2131 	}
2132 
2133 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2134 	sa = ifma->ifma_lladdr;
2135 	crit_enter();
2136 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2137 	/*
2138 	 * Make sure the interface driver is notified
2139 	 * in the case of a link layer mcast group being left.
2140 	 */
2141 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2142 		ifnet_serialize_all(ifp);
2143 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2144 		ifnet_deserialize_all(ifp);
2145 	}
2146 	crit_exit();
2147 	kfree(ifma->ifma_addr, M_IFMADDR);
2148 	kfree(ifma, M_IFMADDR);
2149 	if (sa == NULL)
2150 		return 0;
2151 
2152 	/*
2153 	 * Now look for the link-layer address which corresponds to
2154 	 * this network address.  It had been squirreled away in
2155 	 * ifma->ifma_lladdr for this purpose (so we don't have
2156 	 * to call ifp->if_resolvemulti() again), and we saved that
2157 	 * value in sa above.  If some nasty deleted the
2158 	 * link-layer address out from underneath us, we can deal because
2159 	 * the address we stored was is not the same as the one which was
2160 	 * in the record for the link-layer address.  (So we don't complain
2161 	 * in that case.)
2162 	 */
2163 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2164 		if (sa_equal(sa, ifma->ifma_addr))
2165 			break;
2166 	if (ifma == NULL)
2167 		return 0;
2168 
2169 	if (ifma->ifma_refcount > 1) {
2170 		ifma->ifma_refcount--;
2171 		return 0;
2172 	}
2173 
2174 	crit_enter();
2175 	ifnet_serialize_all(ifp);
2176 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2177 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2178 	ifnet_deserialize_all(ifp);
2179 	crit_exit();
2180 	kfree(ifma->ifma_addr, M_IFMADDR);
2181 	kfree(sa, M_IFMADDR);
2182 	kfree(ifma, M_IFMADDR);
2183 
2184 	return 0;
2185 }
2186 
2187 /*
2188  * Delete all multicast group membership for an interface.
2189  * Should be used to quickly flush all multicast filters.
2190  */
2191 void
2192 if_delallmulti(struct ifnet *ifp)
2193 {
2194 	struct ifmultiaddr *ifma;
2195 	struct ifmultiaddr *next;
2196 
2197 	TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2198 		if_delmulti(ifp, ifma->ifma_addr);
2199 }
2200 
2201 
2202 /*
2203  * Set the link layer address on an interface.
2204  *
2205  * At this time we only support certain types of interfaces,
2206  * and we don't allow the length of the address to change.
2207  */
2208 int
2209 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2210 {
2211 	struct sockaddr_dl *sdl;
2212 	struct ifreq ifr;
2213 
2214 	sdl = IF_LLSOCKADDR(ifp);
2215 	if (sdl == NULL)
2216 		return (EINVAL);
2217 	if (len != sdl->sdl_alen)	/* don't allow length to change */
2218 		return (EINVAL);
2219 	switch (ifp->if_type) {
2220 	case IFT_ETHER:			/* these types use struct arpcom */
2221 	case IFT_XETHER:
2222 	case IFT_L2VLAN:
2223 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2224 		bcopy(lladdr, LLADDR(sdl), len);
2225 		break;
2226 	default:
2227 		return (ENODEV);
2228 	}
2229 	/*
2230 	 * If the interface is already up, we need
2231 	 * to re-init it in order to reprogram its
2232 	 * address filter.
2233 	 */
2234 	ifnet_serialize_all(ifp);
2235 	if ((ifp->if_flags & IFF_UP) != 0) {
2236 #ifdef INET
2237 		struct ifaddr_container *ifac;
2238 #endif
2239 
2240 		ifp->if_flags &= ~IFF_UP;
2241 		ifr.ifr_flags = ifp->if_flags;
2242 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2243 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2244 			      NULL);
2245 		ifp->if_flags |= IFF_UP;
2246 		ifr.ifr_flags = ifp->if_flags;
2247 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2248 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2249 				 NULL);
2250 #ifdef INET
2251 		/*
2252 		 * Also send gratuitous ARPs to notify other nodes about
2253 		 * the address change.
2254 		 */
2255 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2256 			struct ifaddr *ifa = ifac->ifa;
2257 
2258 			if (ifa->ifa_addr != NULL &&
2259 			    ifa->ifa_addr->sa_family == AF_INET)
2260 				arp_gratuitous(ifp, ifa);
2261 		}
2262 #endif
2263 	}
2264 	ifnet_deserialize_all(ifp);
2265 	return (0);
2266 }
2267 
2268 struct ifmultiaddr *
2269 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2270 {
2271 	struct ifmultiaddr *ifma;
2272 
2273 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2274 		if (sa_equal(ifma->ifma_addr, sa))
2275 			break;
2276 
2277 	return ifma;
2278 }
2279 
2280 /*
2281  * This function locates the first real ethernet MAC from a network
2282  * card and loads it into node, returning 0 on success or ENOENT if
2283  * no suitable interfaces were found.  It is used by the uuid code to
2284  * generate a unique 6-byte number.
2285  */
2286 int
2287 if_getanyethermac(uint16_t *node, int minlen)
2288 {
2289 	struct ifnet *ifp;
2290 	struct sockaddr_dl *sdl;
2291 
2292 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2293 		if (ifp->if_type != IFT_ETHER)
2294 			continue;
2295 		sdl = IF_LLSOCKADDR(ifp);
2296 		if (sdl->sdl_alen < minlen)
2297 			continue;
2298 		bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2299 		      minlen);
2300 		return(0);
2301 	}
2302 	return (ENOENT);
2303 }
2304 
2305 /*
2306  * The name argument must be a pointer to storage which will last as
2307  * long as the interface does.  For physical devices, the result of
2308  * device_get_name(dev) is a good choice and for pseudo-devices a
2309  * static string works well.
2310  */
2311 void
2312 if_initname(struct ifnet *ifp, const char *name, int unit)
2313 {
2314 	ifp->if_dname = name;
2315 	ifp->if_dunit = unit;
2316 	if (unit != IF_DUNIT_NONE)
2317 		ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2318 	else
2319 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2320 }
2321 
2322 int
2323 if_printf(struct ifnet *ifp, const char *fmt, ...)
2324 {
2325 	__va_list ap;
2326 	int retval;
2327 
2328 	retval = kprintf("%s: ", ifp->if_xname);
2329 	__va_start(ap, fmt);
2330 	retval += kvprintf(fmt, ap);
2331 	__va_end(ap);
2332 	return (retval);
2333 }
2334 
2335 struct ifnet *
2336 if_alloc(uint8_t type)
2337 {
2338         struct ifnet *ifp;
2339 	size_t size;
2340 
2341 	/*
2342 	 * XXX temporary hack until arpcom is setup in if_l2com
2343 	 */
2344 	if (type == IFT_ETHER)
2345 		size = sizeof(struct arpcom);
2346 	else
2347 		size = sizeof(struct ifnet);
2348 
2349 	ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2350 
2351 	ifp->if_type = type;
2352 
2353 	if (if_com_alloc[type] != NULL) {
2354 		ifp->if_l2com = if_com_alloc[type](type, ifp);
2355 		if (ifp->if_l2com == NULL) {
2356 			kfree(ifp, M_IFNET);
2357 			return (NULL);
2358 		}
2359 	}
2360 	return (ifp);
2361 }
2362 
2363 void
2364 if_free(struct ifnet *ifp)
2365 {
2366 	kfree(ifp, M_IFNET);
2367 }
2368 
2369 void
2370 ifq_set_classic(struct ifaltq *ifq)
2371 {
2372 	ifq->altq_enqueue = ifq_classic_enqueue;
2373 	ifq->altq_dequeue = ifq_classic_dequeue;
2374 	ifq->altq_request = ifq_classic_request;
2375 }
2376 
2377 int
2378 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2379 		    struct altq_pktattr *pa __unused)
2380 {
2381 	logifq(enqueue, ifq);
2382 	if (IF_QFULL(ifq)) {
2383 		m_freem(m);
2384 		return(ENOBUFS);
2385 	} else {
2386 		IF_ENQUEUE(ifq, m);
2387 		return(0);
2388 	}
2389 }
2390 
2391 struct mbuf *
2392 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2393 {
2394 	struct mbuf *m;
2395 
2396 	switch (op) {
2397 	case ALTDQ_POLL:
2398 		IF_POLL(ifq, m);
2399 		break;
2400 	case ALTDQ_REMOVE:
2401 		logifq(dequeue, ifq);
2402 		IF_DEQUEUE(ifq, m);
2403 		break;
2404 	default:
2405 		panic("unsupported ALTQ dequeue op: %d", op);
2406 	}
2407 	KKASSERT(mpolled == NULL || mpolled == m);
2408 	return(m);
2409 }
2410 
2411 int
2412 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2413 {
2414 	switch (req) {
2415 	case ALTRQ_PURGE:
2416 		IF_DRAIN(ifq);
2417 		break;
2418 	default:
2419 		panic("unsupported ALTQ request: %d", req);
2420 	}
2421 	return(0);
2422 }
2423 
2424 int
2425 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2426 {
2427 	struct ifaltq *ifq = &ifp->if_snd;
2428 	int running = 0, error, start = 0;
2429 
2430 	ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2431 
2432 	ALTQ_LOCK(ifq);
2433 	error = ifq_enqueue_locked(ifq, m, pa);
2434 	if (error) {
2435 		ALTQ_UNLOCK(ifq);
2436 		return error;
2437 	}
2438 	if (!ifq->altq_started) {
2439 		/*
2440 		 * Hold the interlock of ifnet.if_start
2441 		 */
2442 		ifq->altq_started = 1;
2443 		start = 1;
2444 	}
2445 	ALTQ_UNLOCK(ifq);
2446 
2447 	ifp->if_obytes += m->m_pkthdr.len;
2448 	if (m->m_flags & M_MCAST)
2449 		ifp->if_omcasts++;
2450 
2451 	if (!start) {
2452 		logifstart(avoid, ifp);
2453 		return 0;
2454 	}
2455 
2456 	/*
2457 	 * Try to do direct ifnet.if_start first, if there is
2458 	 * contention on ifnet's serializer, ifnet.if_start will
2459 	 * be scheduled on ifnet's CPU.
2460 	 */
2461 	if (!ifnet_tryserialize_tx(ifp)) {
2462 		/*
2463 		 * ifnet serializer contention happened,
2464 		 * ifnet.if_start is scheduled on ifnet's
2465 		 * CPU, and we keep going.
2466 		 */
2467 		logifstart(contend_sched, ifp);
2468 		if_start_schedule(ifp);
2469 		return 0;
2470 	}
2471 
2472 	if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2473 		logifstart(run, ifp);
2474 		ifp->if_start(ifp);
2475 		if ((ifp->if_flags &
2476 		     (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2477 			running = 1;
2478 	}
2479 
2480 	ifnet_deserialize_tx(ifp);
2481 
2482 	if (if_start_need_schedule(ifq, running)) {
2483 		/*
2484 		 * More data need to be transmitted, ifnet.if_start is
2485 		 * scheduled on ifnet's CPU, and we keep going.
2486 		 * NOTE: ifnet.if_start interlock is not released.
2487 		 */
2488 		logifstart(sched, ifp);
2489 		if_start_schedule(ifp);
2490 	}
2491 	return 0;
2492 }
2493 
2494 void *
2495 ifa_create(int size, int flags)
2496 {
2497 	struct ifaddr *ifa;
2498 	int i;
2499 
2500 	KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2501 
2502 	ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2503 	if (ifa == NULL)
2504 		return NULL;
2505 
2506 	ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2507 				      M_IFADDR, M_WAITOK | M_ZERO);
2508 	ifa->ifa_ncnt = ncpus;
2509 	for (i = 0; i < ncpus; ++i) {
2510 		struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2511 
2512 		ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2513 		ifac->ifa = ifa;
2514 		ifac->ifa_refcnt = 1;
2515 	}
2516 #ifdef IFADDR_DEBUG
2517 	kprintf("alloc ifa %p %d\n", ifa, size);
2518 #endif
2519 	return ifa;
2520 }
2521 
2522 void
2523 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2524 {
2525 	struct ifaddr *ifa = ifac->ifa;
2526 
2527 	KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2528 	KKASSERT(ifac->ifa_refcnt == 0);
2529 	KASSERT(ifac->ifa_listmask == 0,
2530 		("ifa is still on %#x lists", ifac->ifa_listmask));
2531 
2532 	ifac->ifa_magic = IFA_CONTAINER_DEAD;
2533 
2534 #ifdef IFADDR_DEBUG_VERBOSE
2535 	kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2536 #endif
2537 
2538 	KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2539 		("invalid # of ifac, %d", ifa->ifa_ncnt));
2540 	if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2541 #ifdef IFADDR_DEBUG
2542 		kprintf("free ifa %p\n", ifa);
2543 #endif
2544 		kfree(ifa->ifa_containers, M_IFADDR);
2545 		kfree(ifa, M_IFADDR);
2546 	}
2547 }
2548 
2549 static void
2550 ifa_iflink_dispatch(netmsg_t nmsg)
2551 {
2552 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2553 	struct ifaddr *ifa = msg->ifa;
2554 	struct ifnet *ifp = msg->ifp;
2555 	int cpu = mycpuid;
2556 	struct ifaddr_container *ifac;
2557 
2558 	crit_enter();
2559 
2560 	ifac = &ifa->ifa_containers[cpu];
2561 	ASSERT_IFAC_VALID(ifac);
2562 	KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2563 		("ifaddr is on if_addrheads"));
2564 
2565 	ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2566 	if (msg->tail)
2567 		TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2568 	else
2569 		TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2570 
2571 	crit_exit();
2572 
2573 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2574 }
2575 
2576 void
2577 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2578 {
2579 	struct netmsg_ifaddr msg;
2580 
2581 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2582 		    0, ifa_iflink_dispatch);
2583 	msg.ifa = ifa;
2584 	msg.ifp = ifp;
2585 	msg.tail = tail;
2586 
2587 	ifa_domsg(&msg.base.lmsg, 0);
2588 }
2589 
2590 static void
2591 ifa_ifunlink_dispatch(netmsg_t nmsg)
2592 {
2593 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2594 	struct ifaddr *ifa = msg->ifa;
2595 	struct ifnet *ifp = msg->ifp;
2596 	int cpu = mycpuid;
2597 	struct ifaddr_container *ifac;
2598 
2599 	crit_enter();
2600 
2601 	ifac = &ifa->ifa_containers[cpu];
2602 	ASSERT_IFAC_VALID(ifac);
2603 	KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2604 		("ifaddr is not on if_addrhead"));
2605 
2606 	TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2607 	ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2608 
2609 	crit_exit();
2610 
2611 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2612 }
2613 
2614 void
2615 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2616 {
2617 	struct netmsg_ifaddr msg;
2618 
2619 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2620 		    0, ifa_ifunlink_dispatch);
2621 	msg.ifa = ifa;
2622 	msg.ifp = ifp;
2623 
2624 	ifa_domsg(&msg.base.lmsg, 0);
2625 }
2626 
2627 static void
2628 ifa_destroy_dispatch(netmsg_t nmsg)
2629 {
2630 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2631 
2632 	IFAFREE(msg->ifa);
2633 	ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2634 }
2635 
2636 void
2637 ifa_destroy(struct ifaddr *ifa)
2638 {
2639 	struct netmsg_ifaddr msg;
2640 
2641 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2642 		    0, ifa_destroy_dispatch);
2643 	msg.ifa = ifa;
2644 
2645 	ifa_domsg(&msg.base.lmsg, 0);
2646 }
2647 
2648 struct lwkt_port *
2649 ifnet_portfn(int cpu)
2650 {
2651 	return &ifnet_threads[cpu].td_msgport;
2652 }
2653 
2654 void
2655 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2656 {
2657 	KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2658 
2659 	if (next_cpu < ncpus)
2660 		lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2661 	else
2662 		lwkt_replymsg(lmsg, 0);
2663 }
2664 
2665 int
2666 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2667 {
2668 	KKASSERT(cpu < ncpus);
2669 	return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2670 }
2671 
2672 void
2673 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2674 {
2675 	KKASSERT(cpu < ncpus);
2676 	lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2677 }
2678 
2679 /*
2680  * Generic netmsg service loop.  Some protocols may roll their own but all
2681  * must do the basic command dispatch function call done here.
2682  */
2683 static void
2684 ifnet_service_loop(void *arg __unused)
2685 {
2686 	netmsg_t msg;
2687 
2688 	while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2689 		KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2690 		msg->base.nm_dispatch(msg);
2691 	}
2692 }
2693 
2694 static void
2695 ifnetinit(void *dummy __unused)
2696 {
2697 	int i;
2698 
2699 	for (i = 0; i < ncpus; ++i) {
2700 		struct thread *thr = &ifnet_threads[i];
2701 
2702 		lwkt_create(ifnet_service_loop, NULL, NULL,
2703 			    thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2704 			    i, "ifnet %d", i);
2705 		netmsg_service_port_init(&thr->td_msgport);
2706 		lwkt_schedule(thr);
2707 	}
2708 }
2709 
2710 struct ifnet *
2711 ifnet_byindex(unsigned short idx)
2712 {
2713 	if (idx > if_index)
2714 		return NULL;
2715 	return ifindex2ifnet[idx];
2716 }
2717 
2718 struct ifaddr *
2719 ifaddr_byindex(unsigned short idx)
2720 {
2721 	struct ifnet *ifp;
2722 
2723 	ifp = ifnet_byindex(idx);
2724 	if (!ifp)
2725 		return NULL;
2726 	return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2727 }
2728 
2729 void
2730 if_register_com_alloc(u_char type,
2731     if_com_alloc_t *a, if_com_free_t *f)
2732 {
2733 
2734         KASSERT(if_com_alloc[type] == NULL,
2735             ("if_register_com_alloc: %d already registered", type));
2736         KASSERT(if_com_free[type] == NULL,
2737             ("if_register_com_alloc: %d free already registered", type));
2738 
2739         if_com_alloc[type] = a;
2740         if_com_free[type] = f;
2741 }
2742 
2743 void
2744 if_deregister_com_alloc(u_char type)
2745 {
2746 
2747         KASSERT(if_com_alloc[type] != NULL,
2748             ("if_deregister_com_alloc: %d not registered", type));
2749         KASSERT(if_com_free[type] != NULL,
2750             ("if_deregister_com_alloc: %d free not registered", type));
2751         if_com_alloc[type] = NULL;
2752         if_com_free[type] = NULL;
2753 }
2754 
2755 int
2756 if_ring_count2(int cnt, int cnt_max)
2757 {
2758 	int shift = 0;
2759 
2760 	KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2761 	    ("invalid ring count max %d", cnt_max));
2762 
2763 	if (cnt <= 0)
2764 		cnt = cnt_max;
2765 	if (cnt > ncpus2)
2766 		cnt = ncpus2;
2767 	if (cnt > cnt_max)
2768 		cnt = cnt_max;
2769 
2770 	while ((1 << (shift + 1)) <= cnt)
2771 		++shift;
2772 	cnt = 1 << shift;
2773 
2774 	KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2775 	    ("calculate cnt %d, ncpus2 %d, cnt max %d",
2776 	     cnt, ncpus2, cnt_max));
2777 	return cnt;
2778 }
2779