xref: /dragonfly/sys/net/if.c (revision b575ab8a)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_polling.h"
41 #include "opt_ifpoll.h"
42 
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/priv.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/socketops.h>
53 #include <sys/protosw.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/mutex.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/serialize.h>
63 #include <sys/bus.h>
64 
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
67 #include <sys/mutex2.h>
68 
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_types.h>
73 #include <net/if_var.h>
74 #include <net/ifq_var.h>
75 #include <net/radix.h>
76 #include <net/route.h>
77 #include <net/if_clone.h>
78 #include <net/netisr.h>
79 #include <net/netmsg2.h>
80 
81 #include <machine/atomic.h>
82 #include <machine/stdarg.h>
83 #include <machine/smp.h>
84 
85 #if defined(INET) || defined(INET6)
86 /*XXX*/
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/if_ether.h>
90 #ifdef INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/in6_ifattach.h>
93 #endif
94 #endif
95 
96 #if defined(COMPAT_43)
97 #include <emulation/43bsd/43bsd_socket.h>
98 #endif /* COMPAT_43 */
99 
100 struct netmsg_ifaddr {
101 	struct netmsg_base base;
102 	struct ifaddr	*ifa;
103 	struct ifnet	*ifp;
104 	int		tail;
105 };
106 
107 /*
108  * System initialization
109  */
110 static void	if_attachdomain(void *);
111 static void	if_attachdomain1(struct ifnet *);
112 static int	ifconf(u_long, caddr_t, struct ucred *);
113 static void	ifinit(void *);
114 static void	ifnetinit(void *);
115 static void	if_slowtimo(void *);
116 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
117 static int	if_rtdel(struct radix_node *, void *);
118 
119 #ifdef INET6
120 /*
121  * XXX: declare here to avoid to include many inet6 related files..
122  * should be more generalized?
123  */
124 extern void	nd6_setmtu(struct ifnet *);
125 #endif
126 
127 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
128 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
129 
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
131 /* Must be after netisr_init */
132 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
133 
134 static  if_com_alloc_t *if_com_alloc[256];
135 static  if_com_free_t *if_com_free[256];
136 
137 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
138 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
139 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
140 
141 int			ifqmaxlen = IFQ_MAXLEN;
142 struct ifnethead	ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
143 
144 struct callout		if_slowtimo_timer;
145 
146 int			if_index = 0;
147 struct ifnet		**ifindex2ifnet = NULL;
148 static struct thread	ifnet_threads[MAXCPU];
149 
150 #define IFQ_KTR_STRING		"ifq=%p"
151 #define IFQ_KTR_ARGS	struct ifaltq *ifq
152 #ifndef KTR_IFQ
153 #define KTR_IFQ			KTR_ALL
154 #endif
155 KTR_INFO_MASTER(ifq);
156 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
157 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
158 #define logifq(name, arg)	KTR_LOG(ifq_ ## name, arg)
159 
160 #define IF_START_KTR_STRING	"ifp=%p"
161 #define IF_START_KTR_ARGS	struct ifnet *ifp
162 #ifndef KTR_IF_START
163 #define KTR_IF_START		KTR_ALL
164 #endif
165 KTR_INFO_MASTER(if_start);
166 KTR_INFO(KTR_IF_START, if_start, run, 0,
167 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
168 KTR_INFO(KTR_IF_START, if_start, sched, 1,
169 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
170 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
171 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
172 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
173 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
174 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
175 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
176 #define logifstart(name, arg)	KTR_LOG(if_start_ ## name, arg)
177 
178 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
179 
180 /*
181  * Network interface utility routines.
182  *
183  * Routines with ifa_ifwith* names take sockaddr *'s as
184  * parameters.
185  */
186 /* ARGSUSED*/
187 void
188 ifinit(void *dummy)
189 {
190 	struct ifnet *ifp;
191 
192 	callout_init(&if_slowtimo_timer);
193 
194 	crit_enter();
195 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
196 		if (ifp->if_snd.ifq_maxlen == 0) {
197 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
198 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
199 		}
200 	}
201 	crit_exit();
202 
203 	if_slowtimo(0);
204 }
205 
206 static int
207 if_start_cpuid(struct ifnet *ifp)
208 {
209 	return ifp->if_cpuid;
210 }
211 
212 #ifdef DEVICE_POLLING
213 static int
214 if_start_cpuid_poll(struct ifnet *ifp)
215 {
216 	int poll_cpuid = ifp->if_poll_cpuid;
217 
218 	if (poll_cpuid >= 0)
219 		return poll_cpuid;
220 	else
221 		return ifp->if_cpuid;
222 }
223 #endif
224 
225 #ifdef IFPOLL_ENABLE
226 static int
227 if_start_cpuid_npoll(struct ifnet *ifp)
228 {
229 	int poll_cpuid = ifp->if_npoll_cpuid;
230 
231 	if (poll_cpuid >= 0)
232 		return poll_cpuid;
233 	else
234 		return ifp->if_cpuid;
235 }
236 #endif
237 
238 static void
239 if_start_ipifunc(void *arg)
240 {
241 	struct ifnet *ifp = arg;
242 	struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
243 
244 	crit_enter();
245 	if (lmsg->ms_flags & MSGF_DONE)
246 		lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
247 	crit_exit();
248 }
249 
250 /*
251  * Schedule ifnet.if_start on ifnet's CPU
252  */
253 static void
254 if_start_schedule(struct ifnet *ifp)
255 {
256 	int cpu;
257 
258 	cpu = ifp->if_start_cpuid(ifp);
259 	if (cpu != mycpuid)
260 		lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
261 	else
262 	if_start_ipifunc(ifp);
263 }
264 
265 /*
266  * NOTE:
267  * This function will release ifnet.if_start interlock,
268  * if ifnet.if_start does not need to be scheduled
269  */
270 static __inline int
271 if_start_need_schedule(struct ifaltq *ifq, int running)
272 {
273 	if (!running || ifq_is_empty(ifq)
274 #ifdef ALTQ
275 	    || ifq->altq_tbr != NULL
276 #endif
277 	) {
278 		ALTQ_LOCK(ifq);
279 		/*
280 		 * ifnet.if_start interlock is released, if:
281 		 * 1) Hardware can not take any packets, due to
282 		 *    o  interface is marked down
283 		 *    o  hardware queue is full (IFF_OACTIVE)
284 		 *    Under the second situation, hardware interrupt
285 		 *    or polling(4) will call/schedule ifnet.if_start
286 		 *    when hardware queue is ready
287 		 * 2) There is not packet in the ifnet.if_snd.
288 		 *    Further ifq_dispatch or ifq_handoff will call/
289 		 *    schedule ifnet.if_start
290 		 * 3) TBR is used and it does not allow further
291 		 *    dequeueing.
292 		 *    TBR callout will call ifnet.if_start
293 		 */
294 		if (!running || !ifq_data_ready(ifq)) {
295 			ifq->altq_started = 0;
296 			ALTQ_UNLOCK(ifq);
297 			return 0;
298 		}
299 		ALTQ_UNLOCK(ifq);
300 	}
301 	return 1;
302 }
303 
304 static void
305 if_start_dispatch(netmsg_t msg)
306 {
307 	struct lwkt_msg *lmsg = &msg->base.lmsg;
308 	struct ifnet *ifp = lmsg->u.ms_resultp;
309 	struct ifaltq *ifq = &ifp->if_snd;
310 	int running = 0;
311 
312 	crit_enter();
313 	lwkt_replymsg(lmsg, 0);	/* reply ASAP */
314 	crit_exit();
315 
316 	if (mycpuid != ifp->if_start_cpuid(ifp)) {
317 		/*
318 		 * If the ifnet is still up, we need to
319 		 * chase its CPU change.
320 		 */
321 		if (ifp->if_flags & IFF_UP) {
322 			logifstart(chase_sched, ifp);
323 			if_start_schedule(ifp);
324 			return;
325 		} else {
326 			goto check;
327 		}
328 	}
329 
330 	if (ifp->if_flags & IFF_UP) {
331 		ifnet_serialize_tx(ifp); /* XXX try? */
332 		if ((ifp->if_flags & IFF_OACTIVE) == 0) {
333 			logifstart(run, ifp);
334 			ifp->if_start(ifp);
335 			if ((ifp->if_flags &
336 			(IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
337 				running = 1;
338 		}
339 		ifnet_deserialize_tx(ifp);
340 	}
341 check:
342 	if (if_start_need_schedule(ifq, running)) {
343 		crit_enter();
344 		if (lmsg->ms_flags & MSGF_DONE)	{ /* XXX necessary? */
345 			logifstart(sched, ifp);
346 			lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
347 		}
348 		crit_exit();
349 	}
350 }
351 
352 /* Device driver ifnet.if_start helper function */
353 void
354 if_devstart(struct ifnet *ifp)
355 {
356 	struct ifaltq *ifq = &ifp->if_snd;
357 	int running = 0;
358 
359 	ASSERT_IFNET_SERIALIZED_TX(ifp);
360 
361 	ALTQ_LOCK(ifq);
362 	if (ifq->altq_started || !ifq_data_ready(ifq)) {
363 		logifstart(avoid, ifp);
364 		ALTQ_UNLOCK(ifq);
365 		return;
366 	}
367 	ifq->altq_started = 1;
368 	ALTQ_UNLOCK(ifq);
369 
370 	logifstart(run, ifp);
371 	ifp->if_start(ifp);
372 
373 	if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
374 		running = 1;
375 
376 	if (if_start_need_schedule(ifq, running)) {
377 		/*
378 		 * More data need to be transmitted, ifnet.if_start is
379 		 * scheduled on ifnet's CPU, and we keep going.
380 		 * NOTE: ifnet.if_start interlock is not released.
381 		 */
382 		logifstart(sched, ifp);
383 		if_start_schedule(ifp);
384 	}
385 }
386 
387 static void
388 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
389 {
390 	lwkt_serialize_enter(ifp->if_serializer);
391 }
392 
393 static void
394 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
395 {
396 	lwkt_serialize_exit(ifp->if_serializer);
397 }
398 
399 static int
400 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
401 {
402 	return lwkt_serialize_try(ifp->if_serializer);
403 }
404 
405 #ifdef INVARIANTS
406 static void
407 if_default_serialize_assert(struct ifnet *ifp,
408 			    enum ifnet_serialize slz __unused,
409 			    boolean_t serialized)
410 {
411 	if (serialized)
412 		ASSERT_SERIALIZED(ifp->if_serializer);
413 	else
414 		ASSERT_NOT_SERIALIZED(ifp->if_serializer);
415 }
416 #endif
417 
418 /*
419  * Attach an interface to the list of "active" interfaces.
420  *
421  * The serializer is optional.  If non-NULL access to the interface
422  * may be MPSAFE.
423  */
424 void
425 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
426 {
427 	unsigned socksize, ifasize;
428 	int namelen, masklen;
429 	struct sockaddr_dl *sdl;
430 	struct ifaddr *ifa;
431 	struct ifaltq *ifq;
432 	int i;
433 
434 	static int if_indexlim = 8;
435 
436 	if (ifp->if_serialize != NULL) {
437 		KASSERT(ifp->if_deserialize != NULL &&
438 			ifp->if_tryserialize != NULL &&
439 			ifp->if_serialize_assert != NULL,
440 			("serialize functions are partially setup"));
441 
442 		/*
443 		 * If the device supplies serialize functions,
444 		 * then clear if_serializer to catch any invalid
445 		 * usage of this field.
446 		 */
447 		KASSERT(serializer == NULL,
448 			("both serialize functions and default serializer "
449 			 "are supplied"));
450 		ifp->if_serializer = NULL;
451 	} else {
452 		KASSERT(ifp->if_deserialize == NULL &&
453 			ifp->if_tryserialize == NULL &&
454 			ifp->if_serialize_assert == NULL,
455 			("serialize functions are partially setup"));
456 		ifp->if_serialize = if_default_serialize;
457 		ifp->if_deserialize = if_default_deserialize;
458 		ifp->if_tryserialize = if_default_tryserialize;
459 #ifdef INVARIANTS
460 		ifp->if_serialize_assert = if_default_serialize_assert;
461 #endif
462 
463 		/*
464 		 * The serializer can be passed in from the device,
465 		 * allowing the same serializer to be used for both
466 		 * the interrupt interlock and the device queue.
467 		 * If not specified, the netif structure will use an
468 		 * embedded serializer.
469 		 */
470 		if (serializer == NULL) {
471 			serializer = &ifp->if_default_serializer;
472 			lwkt_serialize_init(serializer);
473 		}
474 		ifp->if_serializer = serializer;
475 	}
476 
477 	ifp->if_start_cpuid = if_start_cpuid;
478 	ifp->if_cpuid = 0;
479 
480 #ifdef DEVICE_POLLING
481 	/* Device is not in polling mode by default */
482 	ifp->if_poll_cpuid = -1;
483 	if (ifp->if_poll != NULL)
484 		ifp->if_start_cpuid = if_start_cpuid_poll;
485 #endif
486 #ifdef IFPOLL_ENABLE
487 	/* Device is not in polling mode by default */
488 	ifp->if_npoll_cpuid = -1;
489 	if (ifp->if_npoll != NULL)
490 		ifp->if_start_cpuid = if_start_cpuid_npoll;
491 #endif
492 
493 	ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
494 				     M_LWKTMSG, M_WAITOK);
495 	for (i = 0; i < ncpus; ++i) {
496 		netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
497 			    0, if_start_dispatch);
498 		ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
499 	}
500 
501 	mtx_init(&ifp->if_ioctl_mtx);
502 	mtx_lock(&ifp->if_ioctl_mtx);
503 
504 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
505 	ifp->if_index = ++if_index;
506 
507 	/*
508 	 * XXX -
509 	 * The old code would work if the interface passed a pre-existing
510 	 * chain of ifaddrs to this code.  We don't trust our callers to
511 	 * properly initialize the tailq, however, so we no longer allow
512 	 * this unlikely case.
513 	 */
514 	ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
515 				    M_IFADDR, M_WAITOK | M_ZERO);
516 	for (i = 0; i < ncpus; ++i)
517 		TAILQ_INIT(&ifp->if_addrheads[i]);
518 
519 	TAILQ_INIT(&ifp->if_prefixhead);
520 	TAILQ_INIT(&ifp->if_multiaddrs);
521 	TAILQ_INIT(&ifp->if_groups);
522 	getmicrotime(&ifp->if_lastchange);
523 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
524 		unsigned int n;
525 		struct ifnet **q;
526 
527 		if_indexlim <<= 1;
528 
529 		/* grow ifindex2ifnet */
530 		n = if_indexlim * sizeof(*q);
531 		q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
532 		if (ifindex2ifnet) {
533 			bcopy(ifindex2ifnet, q, n/2);
534 			kfree(ifindex2ifnet, M_IFADDR);
535 		}
536 		ifindex2ifnet = q;
537 	}
538 
539 	ifindex2ifnet[if_index] = ifp;
540 
541 	/*
542 	 * create a Link Level name for this device
543 	 */
544 	namelen = strlen(ifp->if_xname);
545 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
546 	socksize = masklen + ifp->if_addrlen;
547 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
548 	if (socksize < sizeof(*sdl))
549 		socksize = sizeof(*sdl);
550 	socksize = ROUNDUP(socksize);
551 #undef ROUNDUP
552 	ifasize = sizeof(struct ifaddr) + 2 * socksize;
553 	ifa = ifa_create(ifasize, M_WAITOK);
554 	sdl = (struct sockaddr_dl *)(ifa + 1);
555 	sdl->sdl_len = socksize;
556 	sdl->sdl_family = AF_LINK;
557 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
558 	sdl->sdl_nlen = namelen;
559 	sdl->sdl_index = ifp->if_index;
560 	sdl->sdl_type = ifp->if_type;
561 	ifp->if_lladdr = ifa;
562 	ifa->ifa_ifp = ifp;
563 	ifa->ifa_rtrequest = link_rtrequest;
564 	ifa->ifa_addr = (struct sockaddr *)sdl;
565 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
566 	ifa->ifa_netmask = (struct sockaddr *)sdl;
567 	sdl->sdl_len = masklen;
568 	while (namelen != 0)
569 		sdl->sdl_data[--namelen] = 0xff;
570 	ifa_iflink(ifa, ifp, 0 /* Insert head */);
571 
572 	EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
573 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
574 
575 	ifq = &ifp->if_snd;
576 	ifq->altq_type = 0;
577 	ifq->altq_disc = NULL;
578 	ifq->altq_flags &= ALTQF_CANTCHANGE;
579 	ifq->altq_tbr = NULL;
580 	ifq->altq_ifp = ifp;
581 	ifq->altq_started = 0;
582 	ifq->altq_prepended = NULL;
583 	ALTQ_LOCK_INIT(ifq);
584 	ifq_set_classic(ifq);
585 
586 	if (!SLIST_EMPTY(&domains))
587 		if_attachdomain1(ifp);
588 
589 	/* Announce the interface. */
590 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
591 
592 	mtx_unlock(&ifp->if_ioctl_mtx);
593 }
594 
595 static void
596 if_attachdomain(void *dummy)
597 {
598 	struct ifnet *ifp;
599 
600 	crit_enter();
601 	TAILQ_FOREACH(ifp, &ifnet, if_list)
602 		if_attachdomain1(ifp);
603 	crit_exit();
604 }
605 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
606 	if_attachdomain, NULL);
607 
608 static void
609 if_attachdomain1(struct ifnet *ifp)
610 {
611 	struct domain *dp;
612 
613 	crit_enter();
614 
615 	/* address family dependent data region */
616 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
617 	SLIST_FOREACH(dp, &domains, dom_next)
618 		if (dp->dom_ifattach)
619 			ifp->if_afdata[dp->dom_family] =
620 				(*dp->dom_ifattach)(ifp);
621 	crit_exit();
622 }
623 
624 /*
625  * Purge all addresses whose type is _not_ AF_LINK
626  */
627 void
628 if_purgeaddrs_nolink(struct ifnet *ifp)
629 {
630 	struct ifaddr_container *ifac, *next;
631 
632 	TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
633 			      ifa_link, next) {
634 		struct ifaddr *ifa = ifac->ifa;
635 
636 		/* Leave link ifaddr as it is */
637 		if (ifa->ifa_addr->sa_family == AF_LINK)
638 			continue;
639 #ifdef INET
640 		/* XXX: Ugly!! ad hoc just for INET */
641 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
642 			struct ifaliasreq ifr;
643 #ifdef IFADDR_DEBUG_VERBOSE
644 			int i;
645 
646 			kprintf("purge in4 addr %p: ", ifa);
647 			for (i = 0; i < ncpus; ++i)
648 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
649 			kprintf("\n");
650 #endif
651 
652 			bzero(&ifr, sizeof ifr);
653 			ifr.ifra_addr = *ifa->ifa_addr;
654 			if (ifa->ifa_dstaddr)
655 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
656 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
657 				       NULL) == 0)
658 				continue;
659 		}
660 #endif /* INET */
661 #ifdef INET6
662 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
663 #ifdef IFADDR_DEBUG_VERBOSE
664 			int i;
665 
666 			kprintf("purge in6 addr %p: ", ifa);
667 			for (i = 0; i < ncpus; ++i)
668 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
669 			kprintf("\n");
670 #endif
671 
672 			in6_purgeaddr(ifa);
673 			/* ifp_addrhead is already updated */
674 			continue;
675 		}
676 #endif /* INET6 */
677 		ifa_ifunlink(ifa, ifp);
678 		ifa_destroy(ifa);
679 	}
680 }
681 
682 /*
683  * Detach an interface, removing it from the
684  * list of "active" interfaces.
685  */
686 void
687 if_detach(struct ifnet *ifp)
688 {
689 	struct radix_node_head	*rnh;
690 	int i;
691 	int cpu, origcpu;
692 	struct domain *dp;
693 
694 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
695 
696 	/*
697 	 * Remove routes and flush queues.
698 	 */
699 	crit_enter();
700 #ifdef DEVICE_POLLING
701 	if (ifp->if_flags & IFF_POLLING)
702 		ether_poll_deregister(ifp);
703 #endif
704 #ifdef IFPOLL_ENABLE
705 	if (ifp->if_flags & IFF_NPOLLING)
706 		ifpoll_deregister(ifp);
707 #endif
708 	if_down(ifp);
709 
710 #ifdef ALTQ
711 	if (ifq_is_enabled(&ifp->if_snd))
712 		altq_disable(&ifp->if_snd);
713 	if (ifq_is_attached(&ifp->if_snd))
714 		altq_detach(&ifp->if_snd);
715 #endif
716 
717 	/*
718 	 * Clean up all addresses.
719 	 */
720 	ifp->if_lladdr = NULL;
721 
722 	if_purgeaddrs_nolink(ifp);
723 	if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
724 		struct ifaddr *ifa;
725 
726 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
727 		KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
728 			("non-link ifaddr is left on if_addrheads"));
729 
730 		ifa_ifunlink(ifa, ifp);
731 		ifa_destroy(ifa);
732 		KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
733 			("there are still ifaddrs left on if_addrheads"));
734 	}
735 
736 #ifdef INET
737 	/*
738 	 * Remove all IPv4 kernel structures related to ifp.
739 	 */
740 	in_ifdetach(ifp);
741 #endif
742 
743 #ifdef INET6
744 	/*
745 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
746 	 * before removing routing entries below, since IPv6 interface direct
747 	 * routes are expected to be removed by the IPv6-specific kernel API.
748 	 * Otherwise, the kernel will detect some inconsistency and bark it.
749 	 */
750 	in6_ifdetach(ifp);
751 #endif
752 
753 	/*
754 	 * Delete all remaining routes using this interface
755 	 * Unfortuneatly the only way to do this is to slog through
756 	 * the entire routing table looking for routes which point
757 	 * to this interface...oh well...
758 	 */
759 	origcpu = mycpuid;
760 	for (cpu = 0; cpu < ncpus; cpu++) {
761 		lwkt_migratecpu(cpu);
762 		for (i = 1; i <= AF_MAX; i++) {
763 			if ((rnh = rt_tables[cpu][i]) == NULL)
764 				continue;
765 			rnh->rnh_walktree(rnh, if_rtdel, ifp);
766 		}
767 	}
768 	lwkt_migratecpu(origcpu);
769 
770 	/* Announce that the interface is gone. */
771 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
772 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
773 
774 	SLIST_FOREACH(dp, &domains, dom_next)
775 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
776 			(*dp->dom_ifdetach)(ifp,
777 				ifp->if_afdata[dp->dom_family]);
778 
779 	/*
780 	 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
781 	 */
782 	ifindex2ifnet[ifp->if_index] = NULL;
783 	while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
784 		if_index--;
785 
786 	TAILQ_REMOVE(&ifnet, ifp, if_link);
787 	kfree(ifp->if_addrheads, M_IFADDR);
788 	kfree(ifp->if_start_nmsg, M_LWKTMSG);
789 	crit_exit();
790 }
791 
792 /*
793  * Create interface group without members
794  */
795 struct ifg_group *
796 if_creategroup(const char *groupname)
797 {
798         struct ifg_group        *ifg = NULL;
799 
800         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
801             M_TEMP, M_NOWAIT)) == NULL)
802                 return (NULL);
803 
804         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
805         ifg->ifg_refcnt = 0;
806         ifg->ifg_carp_demoted = 0;
807         TAILQ_INIT(&ifg->ifg_members);
808 #if NPF > 0
809         pfi_attach_ifgroup(ifg);
810 #endif
811         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
812 
813         return (ifg);
814 }
815 
816 /*
817  * Add a group to an interface
818  */
819 int
820 if_addgroup(struct ifnet *ifp, const char *groupname)
821 {
822 	struct ifg_list		*ifgl;
823 	struct ifg_group	*ifg = NULL;
824 	struct ifg_member	*ifgm;
825 
826 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
827 	    groupname[strlen(groupname) - 1] <= '9')
828 		return (EINVAL);
829 
830 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
831 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
832 			return (EEXIST);
833 
834 	if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
835 		return (ENOMEM);
836 
837 	if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
838 		kfree(ifgl, M_TEMP);
839 		return (ENOMEM);
840 	}
841 
842 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
843 		if (!strcmp(ifg->ifg_group, groupname))
844 			break;
845 
846 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
847 		kfree(ifgl, M_TEMP);
848 		kfree(ifgm, M_TEMP);
849 		return (ENOMEM);
850 	}
851 
852 	ifg->ifg_refcnt++;
853 	ifgl->ifgl_group = ifg;
854 	ifgm->ifgm_ifp = ifp;
855 
856 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
857 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
858 
859 #if NPF > 0
860 	pfi_group_change(groupname);
861 #endif
862 
863 	return (0);
864 }
865 
866 /*
867  * Remove a group from an interface
868  */
869 int
870 if_delgroup(struct ifnet *ifp, const char *groupname)
871 {
872 	struct ifg_list		*ifgl;
873 	struct ifg_member	*ifgm;
874 
875 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
876 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
877 			break;
878 	if (ifgl == NULL)
879 		return (ENOENT);
880 
881 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
882 
883 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
884 		if (ifgm->ifgm_ifp == ifp)
885 			break;
886 
887 	if (ifgm != NULL) {
888 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
889 		kfree(ifgm, M_TEMP);
890 	}
891 
892 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
893 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
894 #if NPF > 0
895 		pfi_detach_ifgroup(ifgl->ifgl_group);
896 #endif
897 		kfree(ifgl->ifgl_group, M_TEMP);
898 	}
899 
900 	kfree(ifgl, M_TEMP);
901 
902 #if NPF > 0
903 	pfi_group_change(groupname);
904 #endif
905 
906 	return (0);
907 }
908 
909 /*
910  * Stores all groups from an interface in memory pointed
911  * to by data
912  */
913 int
914 if_getgroup(caddr_t data, struct ifnet *ifp)
915 {
916 	int			 len, error;
917 	struct ifg_list		*ifgl;
918 	struct ifg_req		 ifgrq, *ifgp;
919 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
920 
921 	if (ifgr->ifgr_len == 0) {
922 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
923 			ifgr->ifgr_len += sizeof(struct ifg_req);
924 		return (0);
925 	}
926 
927 	len = ifgr->ifgr_len;
928 	ifgp = ifgr->ifgr_groups;
929 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
930 		if (len < sizeof(ifgrq))
931 			return (EINVAL);
932 		bzero(&ifgrq, sizeof ifgrq);
933 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
934 		    sizeof(ifgrq.ifgrq_group));
935 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
936 		    sizeof(struct ifg_req))))
937 			return (error);
938 		len -= sizeof(ifgrq);
939 		ifgp++;
940 	}
941 
942 	return (0);
943 }
944 
945 /*
946  * Stores all members of a group in memory pointed to by data
947  */
948 int
949 if_getgroupmembers(caddr_t data)
950 {
951 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
952 	struct ifg_group	*ifg;
953 	struct ifg_member	*ifgm;
954 	struct ifg_req		 ifgrq, *ifgp;
955 	int			 len, error;
956 
957 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
958 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
959 			break;
960 	if (ifg == NULL)
961 		return (ENOENT);
962 
963 	if (ifgr->ifgr_len == 0) {
964 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
965 			ifgr->ifgr_len += sizeof(ifgrq);
966 		return (0);
967 	}
968 
969 	len = ifgr->ifgr_len;
970 	ifgp = ifgr->ifgr_groups;
971 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
972 		if (len < sizeof(ifgrq))
973 			return (EINVAL);
974 		bzero(&ifgrq, sizeof ifgrq);
975 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
976 		    sizeof(ifgrq.ifgrq_member));
977 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
978 		    sizeof(struct ifg_req))))
979 			return (error);
980 		len -= sizeof(ifgrq);
981 		ifgp++;
982 	}
983 
984 	return (0);
985 }
986 
987 /*
988  * Delete Routes for a Network Interface
989  *
990  * Called for each routing entry via the rnh->rnh_walktree() call above
991  * to delete all route entries referencing a detaching network interface.
992  *
993  * Arguments:
994  *	rn	pointer to node in the routing table
995  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
996  *
997  * Returns:
998  *	0	successful
999  *	errno	failed - reason indicated
1000  *
1001  */
1002 static int
1003 if_rtdel(struct radix_node *rn, void *arg)
1004 {
1005 	struct rtentry	*rt = (struct rtentry *)rn;
1006 	struct ifnet	*ifp = arg;
1007 	int		err;
1008 
1009 	if (rt->rt_ifp == ifp) {
1010 
1011 		/*
1012 		 * Protect (sorta) against walktree recursion problems
1013 		 * with cloned routes
1014 		 */
1015 		if (!(rt->rt_flags & RTF_UP))
1016 			return (0);
1017 
1018 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1019 				rt_mask(rt), rt->rt_flags,
1020 				NULL);
1021 		if (err) {
1022 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1023 		}
1024 	}
1025 
1026 	return (0);
1027 }
1028 
1029 /*
1030  * Locate an interface based on a complete address.
1031  */
1032 struct ifaddr *
1033 ifa_ifwithaddr(struct sockaddr *addr)
1034 {
1035 	struct ifnet *ifp;
1036 
1037 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1038 		struct ifaddr_container *ifac;
1039 
1040 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1041 			struct ifaddr *ifa = ifac->ifa;
1042 
1043 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1044 				continue;
1045 			if (sa_equal(addr, ifa->ifa_addr))
1046 				return (ifa);
1047 			if ((ifp->if_flags & IFF_BROADCAST) &&
1048 			    ifa->ifa_broadaddr &&
1049 			    /* IPv6 doesn't have broadcast */
1050 			    ifa->ifa_broadaddr->sa_len != 0 &&
1051 			    sa_equal(ifa->ifa_broadaddr, addr))
1052 				return (ifa);
1053 		}
1054 	}
1055 	return (NULL);
1056 }
1057 /*
1058  * Locate the point to point interface with a given destination address.
1059  */
1060 struct ifaddr *
1061 ifa_ifwithdstaddr(struct sockaddr *addr)
1062 {
1063 	struct ifnet *ifp;
1064 
1065 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1066 		struct ifaddr_container *ifac;
1067 
1068 		if (!(ifp->if_flags & IFF_POINTOPOINT))
1069 			continue;
1070 
1071 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1072 			struct ifaddr *ifa = ifac->ifa;
1073 
1074 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1075 				continue;
1076 			if (ifa->ifa_dstaddr &&
1077 			    sa_equal(addr, ifa->ifa_dstaddr))
1078 				return (ifa);
1079 		}
1080 	}
1081 	return (NULL);
1082 }
1083 
1084 /*
1085  * Find an interface on a specific network.  If many, choice
1086  * is most specific found.
1087  */
1088 struct ifaddr *
1089 ifa_ifwithnet(struct sockaddr *addr)
1090 {
1091 	struct ifnet *ifp;
1092 	struct ifaddr *ifa_maybe = NULL;
1093 	u_int af = addr->sa_family;
1094 	char *addr_data = addr->sa_data, *cplim;
1095 
1096 	/*
1097 	 * AF_LINK addresses can be looked up directly by their index number,
1098 	 * so do that if we can.
1099 	 */
1100 	if (af == AF_LINK) {
1101 		struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1102 
1103 		if (sdl->sdl_index && sdl->sdl_index <= if_index)
1104 			return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1105 	}
1106 
1107 	/*
1108 	 * Scan though each interface, looking for ones that have
1109 	 * addresses in this address family.
1110 	 */
1111 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1112 		struct ifaddr_container *ifac;
1113 
1114 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1115 			struct ifaddr *ifa = ifac->ifa;
1116 			char *cp, *cp2, *cp3;
1117 
1118 			if (ifa->ifa_addr->sa_family != af)
1119 next:				continue;
1120 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1121 				/*
1122 				 * This is a bit broken as it doesn't
1123 				 * take into account that the remote end may
1124 				 * be a single node in the network we are
1125 				 * looking for.
1126 				 * The trouble is that we don't know the
1127 				 * netmask for the remote end.
1128 				 */
1129 				if (ifa->ifa_dstaddr != NULL &&
1130 				    sa_equal(addr, ifa->ifa_dstaddr))
1131 					return (ifa);
1132 			} else {
1133 				/*
1134 				 * if we have a special address handler,
1135 				 * then use it instead of the generic one.
1136 				 */
1137 				if (ifa->ifa_claim_addr) {
1138 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1139 						return (ifa);
1140 					} else {
1141 						continue;
1142 					}
1143 				}
1144 
1145 				/*
1146 				 * Scan all the bits in the ifa's address.
1147 				 * If a bit dissagrees with what we are
1148 				 * looking for, mask it with the netmask
1149 				 * to see if it really matters.
1150 				 * (A byte at a time)
1151 				 */
1152 				if (ifa->ifa_netmask == 0)
1153 					continue;
1154 				cp = addr_data;
1155 				cp2 = ifa->ifa_addr->sa_data;
1156 				cp3 = ifa->ifa_netmask->sa_data;
1157 				cplim = ifa->ifa_netmask->sa_len +
1158 					(char *)ifa->ifa_netmask;
1159 				while (cp3 < cplim)
1160 					if ((*cp++ ^ *cp2++) & *cp3++)
1161 						goto next; /* next address! */
1162 				/*
1163 				 * If the netmask of what we just found
1164 				 * is more specific than what we had before
1165 				 * (if we had one) then remember the new one
1166 				 * before continuing to search
1167 				 * for an even better one.
1168 				 */
1169 				if (ifa_maybe == NULL ||
1170 				    rn_refines((char *)ifa->ifa_netmask,
1171 					       (char *)ifa_maybe->ifa_netmask))
1172 					ifa_maybe = ifa;
1173 			}
1174 		}
1175 	}
1176 	return (ifa_maybe);
1177 }
1178 
1179 /*
1180  * Find an interface address specific to an interface best matching
1181  * a given address.
1182  */
1183 struct ifaddr *
1184 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1185 {
1186 	struct ifaddr_container *ifac;
1187 	char *cp, *cp2, *cp3;
1188 	char *cplim;
1189 	struct ifaddr *ifa_maybe = NULL;
1190 	u_int af = addr->sa_family;
1191 
1192 	if (af >= AF_MAX)
1193 		return (0);
1194 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1195 		struct ifaddr *ifa = ifac->ifa;
1196 
1197 		if (ifa->ifa_addr->sa_family != af)
1198 			continue;
1199 		if (ifa_maybe == NULL)
1200 			ifa_maybe = ifa;
1201 		if (ifa->ifa_netmask == NULL) {
1202 			if (sa_equal(addr, ifa->ifa_addr) ||
1203 			    (ifa->ifa_dstaddr != NULL &&
1204 			     sa_equal(addr, ifa->ifa_dstaddr)))
1205 				return (ifa);
1206 			continue;
1207 		}
1208 		if (ifp->if_flags & IFF_POINTOPOINT) {
1209 			if (sa_equal(addr, ifa->ifa_dstaddr))
1210 				return (ifa);
1211 		} else {
1212 			cp = addr->sa_data;
1213 			cp2 = ifa->ifa_addr->sa_data;
1214 			cp3 = ifa->ifa_netmask->sa_data;
1215 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1216 			for (; cp3 < cplim; cp3++)
1217 				if ((*cp++ ^ *cp2++) & *cp3)
1218 					break;
1219 			if (cp3 == cplim)
1220 				return (ifa);
1221 		}
1222 	}
1223 	return (ifa_maybe);
1224 }
1225 
1226 /*
1227  * Default action when installing a route with a Link Level gateway.
1228  * Lookup an appropriate real ifa to point to.
1229  * This should be moved to /sys/net/link.c eventually.
1230  */
1231 static void
1232 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1233 {
1234 	struct ifaddr *ifa;
1235 	struct sockaddr *dst;
1236 	struct ifnet *ifp;
1237 
1238 	if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1239 	    (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1240 		return;
1241 	ifa = ifaof_ifpforaddr(dst, ifp);
1242 	if (ifa != NULL) {
1243 		IFAFREE(rt->rt_ifa);
1244 		IFAREF(ifa);
1245 		rt->rt_ifa = ifa;
1246 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1247 			ifa->ifa_rtrequest(cmd, rt, info);
1248 	}
1249 }
1250 
1251 /*
1252  * Mark an interface down and notify protocols of
1253  * the transition.
1254  * NOTE: must be called at splnet or eqivalent.
1255  */
1256 void
1257 if_unroute(struct ifnet *ifp, int flag, int fam)
1258 {
1259 	struct ifaddr_container *ifac;
1260 
1261 	ifp->if_flags &= ~flag;
1262 	getmicrotime(&ifp->if_lastchange);
1263 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1264 		struct ifaddr *ifa = ifac->ifa;
1265 
1266 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1267 			kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1268 	}
1269 	ifq_purge(&ifp->if_snd);
1270 	rt_ifmsg(ifp);
1271 }
1272 
1273 /*
1274  * Mark an interface up and notify protocols of
1275  * the transition.
1276  * NOTE: must be called at splnet or eqivalent.
1277  */
1278 void
1279 if_route(struct ifnet *ifp, int flag, int fam)
1280 {
1281 	struct ifaddr_container *ifac;
1282 
1283 	ifq_purge(&ifp->if_snd);
1284 	ifp->if_flags |= flag;
1285 	getmicrotime(&ifp->if_lastchange);
1286 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1287 		struct ifaddr *ifa = ifac->ifa;
1288 
1289 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1290 			kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1291 	}
1292 	rt_ifmsg(ifp);
1293 #ifdef INET6
1294 	in6_if_up(ifp);
1295 #endif
1296 }
1297 
1298 /*
1299  * Mark an interface down and notify protocols of the transition.  An
1300  * interface going down is also considered to be a synchronizing event.
1301  * We must ensure that all packet processing related to the interface
1302  * has completed before we return so e.g. the caller can free the ifnet
1303  * structure that the mbufs may be referencing.
1304  *
1305  * NOTE: must be called at splnet or eqivalent.
1306  */
1307 void
1308 if_down(struct ifnet *ifp)
1309 {
1310 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1311 	netmsg_service_sync();
1312 }
1313 
1314 /*
1315  * Mark an interface up and notify protocols of
1316  * the transition.
1317  * NOTE: must be called at splnet or eqivalent.
1318  */
1319 void
1320 if_up(struct ifnet *ifp)
1321 {
1322 	if_route(ifp, IFF_UP, AF_UNSPEC);
1323 }
1324 
1325 /*
1326  * Process a link state change.
1327  * NOTE: must be called at splsoftnet or equivalent.
1328  */
1329 void
1330 if_link_state_change(struct ifnet *ifp)
1331 {
1332 	int link_state = ifp->if_link_state;
1333 
1334 	rt_ifmsg(ifp);
1335 	devctl_notify("IFNET", ifp->if_xname,
1336 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1337 }
1338 
1339 /*
1340  * Handle interface watchdog timer routines.  Called
1341  * from softclock, we decrement timers (if set) and
1342  * call the appropriate interface routine on expiration.
1343  */
1344 static void
1345 if_slowtimo(void *arg)
1346 {
1347 	struct ifnet *ifp;
1348 
1349 	crit_enter();
1350 
1351 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1352 		if (ifp->if_timer == 0 || --ifp->if_timer)
1353 			continue;
1354 		if (ifp->if_watchdog) {
1355 			if (ifnet_tryserialize_all(ifp)) {
1356 				(*ifp->if_watchdog)(ifp);
1357 				ifnet_deserialize_all(ifp);
1358 			} else {
1359 				/* try again next timeout */
1360 				++ifp->if_timer;
1361 			}
1362 		}
1363 	}
1364 
1365 	crit_exit();
1366 
1367 	callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1368 }
1369 
1370 /*
1371  * Map interface name to
1372  * interface structure pointer.
1373  */
1374 struct ifnet *
1375 ifunit(const char *name)
1376 {
1377 	struct ifnet *ifp;
1378 
1379 	/*
1380 	 * Search all the interfaces for this name/number
1381 	 */
1382 
1383 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1384 		if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1385 			break;
1386 	}
1387 	return (ifp);
1388 }
1389 
1390 
1391 /*
1392  * Map interface name in a sockaddr_dl to
1393  * interface structure pointer.
1394  */
1395 struct ifnet *
1396 if_withname(struct sockaddr *sa)
1397 {
1398 	char ifname[IFNAMSIZ+1];
1399 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1400 
1401 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1402 	     (sdl->sdl_nlen > IFNAMSIZ) )
1403 		return NULL;
1404 
1405 	/*
1406 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1407 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1408 	 * and there might not be room to put the trailing null anyway, so we
1409 	 * make a local copy that we know we can null terminate safely.
1410 	 */
1411 
1412 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1413 	ifname[sdl->sdl_nlen] = '\0';
1414 	return ifunit(ifname);
1415 }
1416 
1417 
1418 /*
1419  * Interface ioctls.
1420  */
1421 int
1422 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1423 {
1424 	struct ifnet *ifp;
1425 	struct ifreq *ifr;
1426 	struct ifstat *ifs;
1427 	int error;
1428 	short oif_flags;
1429 	int new_flags;
1430 #ifdef COMPAT_43
1431 	int ocmd;
1432 #endif
1433 	size_t namelen, onamelen;
1434 	char new_name[IFNAMSIZ];
1435 	struct ifaddr *ifa;
1436 	struct sockaddr_dl *sdl;
1437 
1438 	switch (cmd) {
1439 	case SIOCGIFCONF:
1440 	case OSIOCGIFCONF:
1441 		return (ifconf(cmd, data, cred));
1442 	default:
1443 		break;
1444 	}
1445 
1446 	ifr = (struct ifreq *)data;
1447 
1448 	switch (cmd) {
1449 	case SIOCIFCREATE:
1450 	case SIOCIFCREATE2:
1451 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1452 			return (error);
1453 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1454 		    	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1455 	case SIOCIFDESTROY:
1456 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1457 			return (error);
1458 		return (if_clone_destroy(ifr->ifr_name));
1459 	case SIOCIFGCLONERS:
1460 		return (if_clone_list((struct if_clonereq *)data));
1461 	default:
1462 		break;
1463 	}
1464 
1465 	/*
1466 	 * Nominal ioctl through interface, lookup the ifp and obtain a
1467 	 * lock to serialize the ifconfig ioctl operation.
1468 	 */
1469 	ifp = ifunit(ifr->ifr_name);
1470 	if (ifp == NULL)
1471 		return (ENXIO);
1472 	error = 0;
1473 	mtx_lock(&ifp->if_ioctl_mtx);
1474 
1475 	switch (cmd) {
1476 	case SIOCGIFINDEX:
1477 		ifr->ifr_index = ifp->if_index;
1478 		break;
1479 
1480 	case SIOCGIFFLAGS:
1481 		ifr->ifr_flags = ifp->if_flags;
1482 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1483 		break;
1484 
1485 	case SIOCGIFCAP:
1486 		ifr->ifr_reqcap = ifp->if_capabilities;
1487 		ifr->ifr_curcap = ifp->if_capenable;
1488 		break;
1489 
1490 	case SIOCGIFMETRIC:
1491 		ifr->ifr_metric = ifp->if_metric;
1492 		break;
1493 
1494 	case SIOCGIFMTU:
1495 		ifr->ifr_mtu = ifp->if_mtu;
1496 		break;
1497 
1498 	case SIOCGIFDATA:
1499 		error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1500 				sizeof(ifp->if_data));
1501 		break;
1502 
1503 	case SIOCGIFPHYS:
1504 		ifr->ifr_phys = ifp->if_physical;
1505 		break;
1506 
1507 	case SIOCGIFPOLLCPU:
1508 #ifdef DEVICE_POLLING
1509 		ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1510 #else
1511 		ifr->ifr_pollcpu = -1;
1512 #endif
1513 		break;
1514 
1515 	case SIOCSIFPOLLCPU:
1516 #ifdef DEVICE_POLLING
1517 		if ((ifp->if_flags & IFF_POLLING) == 0)
1518 			ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1519 #endif
1520 		break;
1521 
1522 	case SIOCSIFFLAGS:
1523 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1524 		if (error)
1525 			break;
1526 		new_flags = (ifr->ifr_flags & 0xffff) |
1527 		    (ifr->ifr_flagshigh << 16);
1528 		if (ifp->if_flags & IFF_SMART) {
1529 			/* Smart drivers twiddle their own routes */
1530 		} else if (ifp->if_flags & IFF_UP &&
1531 		    (new_flags & IFF_UP) == 0) {
1532 			crit_enter();
1533 			if_down(ifp);
1534 			crit_exit();
1535 		} else if (new_flags & IFF_UP &&
1536 		    (ifp->if_flags & IFF_UP) == 0) {
1537 			crit_enter();
1538 			if_up(ifp);
1539 			crit_exit();
1540 		}
1541 
1542 #ifdef DEVICE_POLLING
1543 		if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1544 			if (new_flags & IFF_POLLING) {
1545 				ether_poll_register(ifp);
1546 			} else {
1547 				ether_poll_deregister(ifp);
1548 			}
1549 		}
1550 #endif
1551 #ifdef IFPOLL_ENABLE
1552 		if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1553 			if (new_flags & IFF_NPOLLING)
1554 				ifpoll_register(ifp);
1555 			else
1556 				ifpoll_deregister(ifp);
1557 		}
1558 #endif
1559 
1560 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1561 			(new_flags &~ IFF_CANTCHANGE);
1562 		if (new_flags & IFF_PPROMISC) {
1563 			/* Permanently promiscuous mode requested */
1564 			ifp->if_flags |= IFF_PROMISC;
1565 		} else if (ifp->if_pcount == 0) {
1566 			ifp->if_flags &= ~IFF_PROMISC;
1567 		}
1568 		if (ifp->if_ioctl) {
1569 			ifnet_serialize_all(ifp);
1570 			ifp->if_ioctl(ifp, cmd, data, cred);
1571 			ifnet_deserialize_all(ifp);
1572 		}
1573 		getmicrotime(&ifp->if_lastchange);
1574 		break;
1575 
1576 	case SIOCSIFCAP:
1577 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1578 		if (error)
1579 			break;
1580 		if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1581 			error = EINVAL;
1582 			break;
1583 		}
1584 		ifnet_serialize_all(ifp);
1585 		ifp->if_ioctl(ifp, cmd, data, cred);
1586 		ifnet_deserialize_all(ifp);
1587 		break;
1588 
1589 	case SIOCSIFNAME:
1590 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1591 		if (error)
1592 			break;
1593 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1594 		if (error)
1595 			break;
1596 		if (new_name[0] == '\0') {
1597 			error = EINVAL;
1598 			break;
1599 		}
1600 		if (ifunit(new_name) != NULL) {
1601 			error = EEXIST;
1602 			break;
1603 		}
1604 
1605 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1606 
1607 		/* Announce the departure of the interface. */
1608 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1609 
1610 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1611 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1612 		/* XXX IFA_LOCK(ifa); */
1613 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1614 		namelen = strlen(new_name);
1615 		onamelen = sdl->sdl_nlen;
1616 		/*
1617 		 * Move the address if needed.  This is safe because we
1618 		 * allocate space for a name of length IFNAMSIZ when we
1619 		 * create this in if_attach().
1620 		 */
1621 		if (namelen != onamelen) {
1622 			bcopy(sdl->sdl_data + onamelen,
1623 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1624 		}
1625 		bcopy(new_name, sdl->sdl_data, namelen);
1626 		sdl->sdl_nlen = namelen;
1627 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1628 		bzero(sdl->sdl_data, onamelen);
1629 		while (namelen != 0)
1630 			sdl->sdl_data[--namelen] = 0xff;
1631 		/* XXX IFA_UNLOCK(ifa) */
1632 
1633 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1634 
1635 		/* Announce the return of the interface. */
1636 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1637 		break;
1638 
1639 	case SIOCSIFMETRIC:
1640 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1641 		if (error)
1642 			break;
1643 		ifp->if_metric = ifr->ifr_metric;
1644 		getmicrotime(&ifp->if_lastchange);
1645 		break;
1646 
1647 	case SIOCSIFPHYS:
1648 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1649 		if (error)
1650 			break;
1651 		if (ifp->if_ioctl == NULL) {
1652 		        error = EOPNOTSUPP;
1653 			break;
1654 		}
1655 		ifnet_serialize_all(ifp);
1656 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1657 		ifnet_deserialize_all(ifp);
1658 		if (error == 0)
1659 			getmicrotime(&ifp->if_lastchange);
1660 		break;
1661 
1662 	case SIOCSIFMTU:
1663 	{
1664 		u_long oldmtu = ifp->if_mtu;
1665 
1666 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1667 		if (error)
1668 			break;
1669 		if (ifp->if_ioctl == NULL) {
1670 			error = EOPNOTSUPP;
1671 			break;
1672 		}
1673 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1674 			error = EINVAL;
1675 			break;
1676 		}
1677 		ifnet_serialize_all(ifp);
1678 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1679 		ifnet_deserialize_all(ifp);
1680 		if (error == 0) {
1681 			getmicrotime(&ifp->if_lastchange);
1682 			rt_ifmsg(ifp);
1683 		}
1684 		/*
1685 		 * If the link MTU changed, do network layer specific procedure.
1686 		 */
1687 		if (ifp->if_mtu != oldmtu) {
1688 #ifdef INET6
1689 			nd6_setmtu(ifp);
1690 #endif
1691 		}
1692 		break;
1693 	}
1694 
1695 	case SIOCADDMULTI:
1696 	case SIOCDELMULTI:
1697 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1698 		if (error)
1699 			break;
1700 
1701 		/* Don't allow group membership on non-multicast interfaces. */
1702 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1703 			error = EOPNOTSUPP;
1704 			break;
1705 		}
1706 
1707 		/* Don't let users screw up protocols' entries. */
1708 		if (ifr->ifr_addr.sa_family != AF_LINK) {
1709 			error = EINVAL;
1710 			break;
1711 		}
1712 
1713 		if (cmd == SIOCADDMULTI) {
1714 			struct ifmultiaddr *ifma;
1715 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1716 		} else {
1717 			error = if_delmulti(ifp, &ifr->ifr_addr);
1718 		}
1719 		if (error == 0)
1720 			getmicrotime(&ifp->if_lastchange);
1721 		break;
1722 
1723 	case SIOCSIFPHYADDR:
1724 	case SIOCDIFPHYADDR:
1725 #ifdef INET6
1726 	case SIOCSIFPHYADDR_IN6:
1727 #endif
1728 	case SIOCSLIFPHYADDR:
1729         case SIOCSIFMEDIA:
1730 	case SIOCSIFGENERIC:
1731 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1732 		if (error)
1733 			break;
1734 		if (ifp->if_ioctl == 0) {
1735 			error = EOPNOTSUPP;
1736 			break;
1737 		}
1738 		ifnet_serialize_all(ifp);
1739 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1740 		ifnet_deserialize_all(ifp);
1741 		if (error == 0)
1742 			getmicrotime(&ifp->if_lastchange);
1743 		break;
1744 
1745 	case SIOCGIFSTATUS:
1746 		ifs = (struct ifstat *)data;
1747 		ifs->ascii[0] = '\0';
1748 		/* fall through */
1749 	case SIOCGIFPSRCADDR:
1750 	case SIOCGIFPDSTADDR:
1751 	case SIOCGLIFPHYADDR:
1752 	case SIOCGIFMEDIA:
1753 	case SIOCGIFGENERIC:
1754 		if (ifp->if_ioctl == NULL) {
1755 			error = EOPNOTSUPP;
1756 			break;
1757 		}
1758 		ifnet_serialize_all(ifp);
1759 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1760 		ifnet_deserialize_all(ifp);
1761 		break;
1762 
1763 	case SIOCSIFLLADDR:
1764 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1765 		if (error)
1766 			break;
1767 		error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1768 				     ifr->ifr_addr.sa_len);
1769 		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1770 		break;
1771 
1772 	default:
1773 		oif_flags = ifp->if_flags;
1774 		if (so->so_proto == 0) {
1775 			error = EOPNOTSUPP;
1776 			break;
1777 		}
1778 #ifndef COMPAT_43
1779 		error = so_pru_control_direct(so, cmd, data, ifp);
1780 #else
1781 		ocmd = cmd;
1782 
1783 		switch (cmd) {
1784 		case SIOCSIFDSTADDR:
1785 		case SIOCSIFADDR:
1786 		case SIOCSIFBRDADDR:
1787 		case SIOCSIFNETMASK:
1788 #if BYTE_ORDER != BIG_ENDIAN
1789 			if (ifr->ifr_addr.sa_family == 0 &&
1790 			    ifr->ifr_addr.sa_len < 16) {
1791 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1792 				ifr->ifr_addr.sa_len = 16;
1793 			}
1794 #else
1795 			if (ifr->ifr_addr.sa_len == 0)
1796 				ifr->ifr_addr.sa_len = 16;
1797 #endif
1798 			break;
1799 		case OSIOCGIFADDR:
1800 			cmd = SIOCGIFADDR;
1801 			break;
1802 		case OSIOCGIFDSTADDR:
1803 			cmd = SIOCGIFDSTADDR;
1804 			break;
1805 		case OSIOCGIFBRDADDR:
1806 			cmd = SIOCGIFBRDADDR;
1807 			break;
1808 		case OSIOCGIFNETMASK:
1809 			cmd = SIOCGIFNETMASK;
1810 			break;
1811 		default:
1812 			break;
1813 		}
1814 
1815 		error = so_pru_control_direct(so, cmd, data, ifp);
1816 
1817 		switch (ocmd) {
1818 		case OSIOCGIFADDR:
1819 		case OSIOCGIFDSTADDR:
1820 		case OSIOCGIFBRDADDR:
1821 		case OSIOCGIFNETMASK:
1822 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1823 			break;
1824 		}
1825 #endif /* COMPAT_43 */
1826 
1827 		if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1828 #ifdef INET6
1829 			DELAY(100);/* XXX: temporary workaround for fxp issue*/
1830 			if (ifp->if_flags & IFF_UP) {
1831 				crit_enter();
1832 				in6_if_up(ifp);
1833 				crit_exit();
1834 			}
1835 #endif
1836 		}
1837 		break;
1838 	}
1839 
1840 	mtx_unlock(&ifp->if_ioctl_mtx);
1841 	return (error);
1842 }
1843 
1844 /*
1845  * Set/clear promiscuous mode on interface ifp based on the truth value
1846  * of pswitch.  The calls are reference counted so that only the first
1847  * "on" request actually has an effect, as does the final "off" request.
1848  * Results are undefined if the "off" and "on" requests are not matched.
1849  */
1850 int
1851 ifpromisc(struct ifnet *ifp, int pswitch)
1852 {
1853 	struct ifreq ifr;
1854 	int error;
1855 	int oldflags;
1856 
1857 	oldflags = ifp->if_flags;
1858 	if (ifp->if_flags & IFF_PPROMISC) {
1859 		/* Do nothing if device is in permanently promiscuous mode */
1860 		ifp->if_pcount += pswitch ? 1 : -1;
1861 		return (0);
1862 	}
1863 	if (pswitch) {
1864 		/*
1865 		 * If the device is not configured up, we cannot put it in
1866 		 * promiscuous mode.
1867 		 */
1868 		if ((ifp->if_flags & IFF_UP) == 0)
1869 			return (ENETDOWN);
1870 		if (ifp->if_pcount++ != 0)
1871 			return (0);
1872 		ifp->if_flags |= IFF_PROMISC;
1873 		log(LOG_INFO, "%s: promiscuous mode enabled\n",
1874 		    ifp->if_xname);
1875 	} else {
1876 		if (--ifp->if_pcount > 0)
1877 			return (0);
1878 		ifp->if_flags &= ~IFF_PROMISC;
1879 		log(LOG_INFO, "%s: promiscuous mode disabled\n",
1880 		    ifp->if_xname);
1881 	}
1882 	ifr.ifr_flags = ifp->if_flags;
1883 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1884 	ifnet_serialize_all(ifp);
1885 	error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1886 	ifnet_deserialize_all(ifp);
1887 	if (error == 0)
1888 		rt_ifmsg(ifp);
1889 	else
1890 		ifp->if_flags = oldflags;
1891 	return error;
1892 }
1893 
1894 /*
1895  * Return interface configuration
1896  * of system.  List may be used
1897  * in later ioctl's (above) to get
1898  * other information.
1899  */
1900 static int
1901 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1902 {
1903 	struct ifconf *ifc = (struct ifconf *)data;
1904 	struct ifnet *ifp;
1905 	struct sockaddr *sa;
1906 	struct ifreq ifr, *ifrp;
1907 	int space = ifc->ifc_len, error = 0;
1908 
1909 	ifrp = ifc->ifc_req;
1910 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1911 		struct ifaddr_container *ifac;
1912 		int addrs;
1913 
1914 		if (space <= sizeof ifr)
1915 			break;
1916 
1917 		/*
1918 		 * Zero the stack declared structure first to prevent
1919 		 * memory disclosure.
1920 		 */
1921 		bzero(&ifr, sizeof(ifr));
1922 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1923 		    >= sizeof(ifr.ifr_name)) {
1924 			error = ENAMETOOLONG;
1925 			break;
1926 		}
1927 
1928 		addrs = 0;
1929 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1930 			struct ifaddr *ifa = ifac->ifa;
1931 
1932 			if (space <= sizeof ifr)
1933 				break;
1934 			sa = ifa->ifa_addr;
1935 			if (cred->cr_prison &&
1936 			    prison_if(cred, sa))
1937 				continue;
1938 			addrs++;
1939 #ifdef COMPAT_43
1940 			if (cmd == OSIOCGIFCONF) {
1941 				struct osockaddr *osa =
1942 					 (struct osockaddr *)&ifr.ifr_addr;
1943 				ifr.ifr_addr = *sa;
1944 				osa->sa_family = sa->sa_family;
1945 				error = copyout(&ifr, ifrp, sizeof ifr);
1946 				ifrp++;
1947 			} else
1948 #endif
1949 			if (sa->sa_len <= sizeof(*sa)) {
1950 				ifr.ifr_addr = *sa;
1951 				error = copyout(&ifr, ifrp, sizeof ifr);
1952 				ifrp++;
1953 			} else {
1954 				if (space < (sizeof ifr) + sa->sa_len -
1955 					    sizeof(*sa))
1956 					break;
1957 				space -= sa->sa_len - sizeof(*sa);
1958 				error = copyout(&ifr, ifrp,
1959 						sizeof ifr.ifr_name);
1960 				if (error == 0)
1961 					error = copyout(sa, &ifrp->ifr_addr,
1962 							sa->sa_len);
1963 				ifrp = (struct ifreq *)
1964 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1965 			}
1966 			if (error)
1967 				break;
1968 			space -= sizeof ifr;
1969 		}
1970 		if (error)
1971 			break;
1972 		if (!addrs) {
1973 			bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1974 			error = copyout(&ifr, ifrp, sizeof ifr);
1975 			if (error)
1976 				break;
1977 			space -= sizeof ifr;
1978 			ifrp++;
1979 		}
1980 	}
1981 	ifc->ifc_len -= space;
1982 	return (error);
1983 }
1984 
1985 /*
1986  * Just like if_promisc(), but for all-multicast-reception mode.
1987  */
1988 int
1989 if_allmulti(struct ifnet *ifp, int onswitch)
1990 {
1991 	int error = 0;
1992 	struct ifreq ifr;
1993 
1994 	crit_enter();
1995 
1996 	if (onswitch) {
1997 		if (ifp->if_amcount++ == 0) {
1998 			ifp->if_flags |= IFF_ALLMULTI;
1999 			ifr.ifr_flags = ifp->if_flags;
2000 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2001 			ifnet_serialize_all(ifp);
2002 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2003 					      NULL);
2004 			ifnet_deserialize_all(ifp);
2005 		}
2006 	} else {
2007 		if (ifp->if_amcount > 1) {
2008 			ifp->if_amcount--;
2009 		} else {
2010 			ifp->if_amcount = 0;
2011 			ifp->if_flags &= ~IFF_ALLMULTI;
2012 			ifr.ifr_flags = ifp->if_flags;
2013 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2014 			ifnet_serialize_all(ifp);
2015 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2016 					      NULL);
2017 			ifnet_deserialize_all(ifp);
2018 		}
2019 	}
2020 
2021 	crit_exit();
2022 
2023 	if (error == 0)
2024 		rt_ifmsg(ifp);
2025 	return error;
2026 }
2027 
2028 /*
2029  * Add a multicast listenership to the interface in question.
2030  * The link layer provides a routine which converts
2031  */
2032 int
2033 if_addmulti(
2034 	struct ifnet *ifp,	/* interface to manipulate */
2035 	struct sockaddr *sa,	/* address to add */
2036 	struct ifmultiaddr **retifma)
2037 {
2038 	struct sockaddr *llsa, *dupsa;
2039 	int error;
2040 	struct ifmultiaddr *ifma;
2041 
2042 	/*
2043 	 * If the matching multicast address already exists
2044 	 * then don't add a new one, just add a reference
2045 	 */
2046 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2047 		if (sa_equal(sa, ifma->ifma_addr)) {
2048 			ifma->ifma_refcount++;
2049 			if (retifma)
2050 				*retifma = ifma;
2051 			return 0;
2052 		}
2053 	}
2054 
2055 	/*
2056 	 * Give the link layer a chance to accept/reject it, and also
2057 	 * find out which AF_LINK address this maps to, if it isn't one
2058 	 * already.
2059 	 */
2060 	if (ifp->if_resolvemulti) {
2061 		ifnet_serialize_all(ifp);
2062 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2063 		ifnet_deserialize_all(ifp);
2064 		if (error)
2065 			return error;
2066 	} else {
2067 		llsa = NULL;
2068 	}
2069 
2070 	ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2071 	dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2072 	bcopy(sa, dupsa, sa->sa_len);
2073 
2074 	ifma->ifma_addr = dupsa;
2075 	ifma->ifma_lladdr = llsa;
2076 	ifma->ifma_ifp = ifp;
2077 	ifma->ifma_refcount = 1;
2078 	ifma->ifma_protospec = 0;
2079 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2080 
2081 	/*
2082 	 * Some network interfaces can scan the address list at
2083 	 * interrupt time; lock them out.
2084 	 */
2085 	crit_enter();
2086 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2087 	crit_exit();
2088 	if (retifma)
2089 		*retifma = ifma;
2090 
2091 	if (llsa != NULL) {
2092 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2093 			if (sa_equal(ifma->ifma_addr, llsa))
2094 				break;
2095 		}
2096 		if (ifma) {
2097 			ifma->ifma_refcount++;
2098 		} else {
2099 			ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2100 			dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2101 			bcopy(llsa, dupsa, llsa->sa_len);
2102 			ifma->ifma_addr = dupsa;
2103 			ifma->ifma_ifp = ifp;
2104 			ifma->ifma_refcount = 1;
2105 			crit_enter();
2106 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2107 			crit_exit();
2108 		}
2109 	}
2110 	/*
2111 	 * We are certain we have added something, so call down to the
2112 	 * interface to let them know about it.
2113 	 */
2114 	crit_enter();
2115 	ifnet_serialize_all(ifp);
2116 	if (ifp->if_ioctl)
2117 		ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2118 	ifnet_deserialize_all(ifp);
2119 	crit_exit();
2120 
2121 	return 0;
2122 }
2123 
2124 /*
2125  * Remove a reference to a multicast address on this interface.  Yell
2126  * if the request does not match an existing membership.
2127  */
2128 int
2129 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2130 {
2131 	struct ifmultiaddr *ifma;
2132 
2133 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2134 		if (sa_equal(sa, ifma->ifma_addr))
2135 			break;
2136 	if (ifma == NULL)
2137 		return ENOENT;
2138 
2139 	if (ifma->ifma_refcount > 1) {
2140 		ifma->ifma_refcount--;
2141 		return 0;
2142 	}
2143 
2144 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2145 	sa = ifma->ifma_lladdr;
2146 	crit_enter();
2147 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2148 	/*
2149 	 * Make sure the interface driver is notified
2150 	 * in the case of a link layer mcast group being left.
2151 	 */
2152 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2153 		ifnet_serialize_all(ifp);
2154 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2155 		ifnet_deserialize_all(ifp);
2156 	}
2157 	crit_exit();
2158 	kfree(ifma->ifma_addr, M_IFMADDR);
2159 	kfree(ifma, M_IFMADDR);
2160 	if (sa == NULL)
2161 		return 0;
2162 
2163 	/*
2164 	 * Now look for the link-layer address which corresponds to
2165 	 * this network address.  It had been squirreled away in
2166 	 * ifma->ifma_lladdr for this purpose (so we don't have
2167 	 * to call ifp->if_resolvemulti() again), and we saved that
2168 	 * value in sa above.  If some nasty deleted the
2169 	 * link-layer address out from underneath us, we can deal because
2170 	 * the address we stored was is not the same as the one which was
2171 	 * in the record for the link-layer address.  (So we don't complain
2172 	 * in that case.)
2173 	 */
2174 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2175 		if (sa_equal(sa, ifma->ifma_addr))
2176 			break;
2177 	if (ifma == NULL)
2178 		return 0;
2179 
2180 	if (ifma->ifma_refcount > 1) {
2181 		ifma->ifma_refcount--;
2182 		return 0;
2183 	}
2184 
2185 	crit_enter();
2186 	ifnet_serialize_all(ifp);
2187 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2188 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2189 	ifnet_deserialize_all(ifp);
2190 	crit_exit();
2191 	kfree(ifma->ifma_addr, M_IFMADDR);
2192 	kfree(sa, M_IFMADDR);
2193 	kfree(ifma, M_IFMADDR);
2194 
2195 	return 0;
2196 }
2197 
2198 /*
2199  * Delete all multicast group membership for an interface.
2200  * Should be used to quickly flush all multicast filters.
2201  */
2202 void
2203 if_delallmulti(struct ifnet *ifp)
2204 {
2205 	struct ifmultiaddr *ifma;
2206 	struct ifmultiaddr *next;
2207 
2208 	TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2209 		if_delmulti(ifp, ifma->ifma_addr);
2210 }
2211 
2212 
2213 /*
2214  * Set the link layer address on an interface.
2215  *
2216  * At this time we only support certain types of interfaces,
2217  * and we don't allow the length of the address to change.
2218  */
2219 int
2220 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2221 {
2222 	struct sockaddr_dl *sdl;
2223 	struct ifreq ifr;
2224 
2225 	sdl = IF_LLSOCKADDR(ifp);
2226 	if (sdl == NULL)
2227 		return (EINVAL);
2228 	if (len != sdl->sdl_alen)	/* don't allow length to change */
2229 		return (EINVAL);
2230 	switch (ifp->if_type) {
2231 	case IFT_ETHER:			/* these types use struct arpcom */
2232 	case IFT_XETHER:
2233 	case IFT_L2VLAN:
2234 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2235 		bcopy(lladdr, LLADDR(sdl), len);
2236 		break;
2237 	default:
2238 		return (ENODEV);
2239 	}
2240 	/*
2241 	 * If the interface is already up, we need
2242 	 * to re-init it in order to reprogram its
2243 	 * address filter.
2244 	 */
2245 	ifnet_serialize_all(ifp);
2246 	if ((ifp->if_flags & IFF_UP) != 0) {
2247 #ifdef INET
2248 		struct ifaddr_container *ifac;
2249 #endif
2250 
2251 		ifp->if_flags &= ~IFF_UP;
2252 		ifr.ifr_flags = ifp->if_flags;
2253 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2254 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2255 			      NULL);
2256 		ifp->if_flags |= IFF_UP;
2257 		ifr.ifr_flags = ifp->if_flags;
2258 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2259 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2260 				 NULL);
2261 #ifdef INET
2262 		/*
2263 		 * Also send gratuitous ARPs to notify other nodes about
2264 		 * the address change.
2265 		 */
2266 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2267 			struct ifaddr *ifa = ifac->ifa;
2268 
2269 			if (ifa->ifa_addr != NULL &&
2270 			    ifa->ifa_addr->sa_family == AF_INET)
2271 				arp_gratuitous(ifp, ifa);
2272 		}
2273 #endif
2274 	}
2275 	ifnet_deserialize_all(ifp);
2276 	return (0);
2277 }
2278 
2279 struct ifmultiaddr *
2280 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2281 {
2282 	struct ifmultiaddr *ifma;
2283 
2284 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2285 		if (sa_equal(ifma->ifma_addr, sa))
2286 			break;
2287 
2288 	return ifma;
2289 }
2290 
2291 /*
2292  * This function locates the first real ethernet MAC from a network
2293  * card and loads it into node, returning 0 on success or ENOENT if
2294  * no suitable interfaces were found.  It is used by the uuid code to
2295  * generate a unique 6-byte number.
2296  */
2297 int
2298 if_getanyethermac(uint16_t *node, int minlen)
2299 {
2300 	struct ifnet *ifp;
2301 	struct sockaddr_dl *sdl;
2302 
2303 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2304 		if (ifp->if_type != IFT_ETHER)
2305 			continue;
2306 		sdl = IF_LLSOCKADDR(ifp);
2307 		if (sdl->sdl_alen < minlen)
2308 			continue;
2309 		bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2310 		      minlen);
2311 		return(0);
2312 	}
2313 	return (ENOENT);
2314 }
2315 
2316 /*
2317  * The name argument must be a pointer to storage which will last as
2318  * long as the interface does.  For physical devices, the result of
2319  * device_get_name(dev) is a good choice and for pseudo-devices a
2320  * static string works well.
2321  */
2322 void
2323 if_initname(struct ifnet *ifp, const char *name, int unit)
2324 {
2325 	ifp->if_dname = name;
2326 	ifp->if_dunit = unit;
2327 	if (unit != IF_DUNIT_NONE)
2328 		ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2329 	else
2330 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2331 }
2332 
2333 int
2334 if_printf(struct ifnet *ifp, const char *fmt, ...)
2335 {
2336 	__va_list ap;
2337 	int retval;
2338 
2339 	retval = kprintf("%s: ", ifp->if_xname);
2340 	__va_start(ap, fmt);
2341 	retval += kvprintf(fmt, ap);
2342 	__va_end(ap);
2343 	return (retval);
2344 }
2345 
2346 struct ifnet *
2347 if_alloc(uint8_t type)
2348 {
2349         struct ifnet *ifp;
2350 	size_t size;
2351 
2352 	/*
2353 	 * XXX temporary hack until arpcom is setup in if_l2com
2354 	 */
2355 	if (type == IFT_ETHER)
2356 		size = sizeof(struct arpcom);
2357 	else
2358 		size = sizeof(struct ifnet);
2359 
2360 	ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2361 
2362 	ifp->if_type = type;
2363 
2364 	if (if_com_alloc[type] != NULL) {
2365 		ifp->if_l2com = if_com_alloc[type](type, ifp);
2366 		if (ifp->if_l2com == NULL) {
2367 			kfree(ifp, M_IFNET);
2368 			return (NULL);
2369 		}
2370 	}
2371 	return (ifp);
2372 }
2373 
2374 void
2375 if_free(struct ifnet *ifp)
2376 {
2377 	kfree(ifp, M_IFNET);
2378 }
2379 
2380 void
2381 ifq_set_classic(struct ifaltq *ifq)
2382 {
2383 	ifq->altq_enqueue = ifq_classic_enqueue;
2384 	ifq->altq_dequeue = ifq_classic_dequeue;
2385 	ifq->altq_request = ifq_classic_request;
2386 }
2387 
2388 int
2389 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2390 		    struct altq_pktattr *pa __unused)
2391 {
2392 	logifq(enqueue, ifq);
2393 	if (IF_QFULL(ifq)) {
2394 		m_freem(m);
2395 		return(ENOBUFS);
2396 	} else {
2397 		IF_ENQUEUE(ifq, m);
2398 		return(0);
2399 	}
2400 }
2401 
2402 struct mbuf *
2403 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2404 {
2405 	struct mbuf *m;
2406 
2407 	switch (op) {
2408 	case ALTDQ_POLL:
2409 		IF_POLL(ifq, m);
2410 		break;
2411 	case ALTDQ_REMOVE:
2412 		logifq(dequeue, ifq);
2413 		IF_DEQUEUE(ifq, m);
2414 		break;
2415 	default:
2416 		panic("unsupported ALTQ dequeue op: %d", op);
2417 	}
2418 	KKASSERT(mpolled == NULL || mpolled == m);
2419 	return(m);
2420 }
2421 
2422 int
2423 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2424 {
2425 	switch (req) {
2426 	case ALTRQ_PURGE:
2427 		IF_DRAIN(ifq);
2428 		break;
2429 	default:
2430 		panic("unsupported ALTQ request: %d", req);
2431 	}
2432 	return(0);
2433 }
2434 
2435 int
2436 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2437 {
2438 	struct ifaltq *ifq = &ifp->if_snd;
2439 	int running = 0, error, start = 0;
2440 
2441 	ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2442 
2443 	ALTQ_LOCK(ifq);
2444 	error = ifq_enqueue_locked(ifq, m, pa);
2445 	if (error) {
2446 		ALTQ_UNLOCK(ifq);
2447 		return error;
2448 	}
2449 	if (!ifq->altq_started) {
2450 		/*
2451 		 * Hold the interlock of ifnet.if_start
2452 		 */
2453 		ifq->altq_started = 1;
2454 		start = 1;
2455 	}
2456 	ALTQ_UNLOCK(ifq);
2457 
2458 	ifp->if_obytes += m->m_pkthdr.len;
2459 	if (m->m_flags & M_MCAST)
2460 		ifp->if_omcasts++;
2461 
2462 	if (!start) {
2463 		logifstart(avoid, ifp);
2464 		return 0;
2465 	}
2466 
2467 	/*
2468 	 * Try to do direct ifnet.if_start first, if there is
2469 	 * contention on ifnet's serializer, ifnet.if_start will
2470 	 * be scheduled on ifnet's CPU.
2471 	 */
2472 	if (!ifnet_tryserialize_tx(ifp)) {
2473 		/*
2474 		 * ifnet serializer contention happened,
2475 		 * ifnet.if_start is scheduled on ifnet's
2476 		 * CPU, and we keep going.
2477 		 */
2478 		logifstart(contend_sched, ifp);
2479 		if_start_schedule(ifp);
2480 		return 0;
2481 	}
2482 
2483 	if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2484 		logifstart(run, ifp);
2485 		ifp->if_start(ifp);
2486 		if ((ifp->if_flags &
2487 		     (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2488 			running = 1;
2489 	}
2490 
2491 	ifnet_deserialize_tx(ifp);
2492 
2493 	if (if_start_need_schedule(ifq, running)) {
2494 		/*
2495 		 * More data need to be transmitted, ifnet.if_start is
2496 		 * scheduled on ifnet's CPU, and we keep going.
2497 		 * NOTE: ifnet.if_start interlock is not released.
2498 		 */
2499 		logifstart(sched, ifp);
2500 		if_start_schedule(ifp);
2501 	}
2502 	return 0;
2503 }
2504 
2505 void *
2506 ifa_create(int size, int flags)
2507 {
2508 	struct ifaddr *ifa;
2509 	int i;
2510 
2511 	KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2512 
2513 	ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2514 	if (ifa == NULL)
2515 		return NULL;
2516 
2517 	ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2518 				      M_IFADDR, M_WAITOK | M_ZERO);
2519 	ifa->ifa_ncnt = ncpus;
2520 	for (i = 0; i < ncpus; ++i) {
2521 		struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2522 
2523 		ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2524 		ifac->ifa = ifa;
2525 		ifac->ifa_refcnt = 1;
2526 	}
2527 #ifdef IFADDR_DEBUG
2528 	kprintf("alloc ifa %p %d\n", ifa, size);
2529 #endif
2530 	return ifa;
2531 }
2532 
2533 void
2534 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2535 {
2536 	struct ifaddr *ifa = ifac->ifa;
2537 
2538 	KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2539 	KKASSERT(ifac->ifa_refcnt == 0);
2540 	KASSERT(ifac->ifa_listmask == 0,
2541 		("ifa is still on %#x lists", ifac->ifa_listmask));
2542 
2543 	ifac->ifa_magic = IFA_CONTAINER_DEAD;
2544 
2545 #ifdef IFADDR_DEBUG_VERBOSE
2546 	kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2547 #endif
2548 
2549 	KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2550 		("invalid # of ifac, %d", ifa->ifa_ncnt));
2551 	if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2552 #ifdef IFADDR_DEBUG
2553 		kprintf("free ifa %p\n", ifa);
2554 #endif
2555 		kfree(ifa->ifa_containers, M_IFADDR);
2556 		kfree(ifa, M_IFADDR);
2557 	}
2558 }
2559 
2560 static void
2561 ifa_iflink_dispatch(netmsg_t nmsg)
2562 {
2563 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2564 	struct ifaddr *ifa = msg->ifa;
2565 	struct ifnet *ifp = msg->ifp;
2566 	int cpu = mycpuid;
2567 	struct ifaddr_container *ifac;
2568 
2569 	crit_enter();
2570 
2571 	ifac = &ifa->ifa_containers[cpu];
2572 	ASSERT_IFAC_VALID(ifac);
2573 	KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2574 		("ifaddr is on if_addrheads"));
2575 
2576 	ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2577 	if (msg->tail)
2578 		TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2579 	else
2580 		TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2581 
2582 	crit_exit();
2583 
2584 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2585 }
2586 
2587 void
2588 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2589 {
2590 	struct netmsg_ifaddr msg;
2591 
2592 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2593 		    0, ifa_iflink_dispatch);
2594 	msg.ifa = ifa;
2595 	msg.ifp = ifp;
2596 	msg.tail = tail;
2597 
2598 	ifa_domsg(&msg.base.lmsg, 0);
2599 }
2600 
2601 static void
2602 ifa_ifunlink_dispatch(netmsg_t nmsg)
2603 {
2604 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2605 	struct ifaddr *ifa = msg->ifa;
2606 	struct ifnet *ifp = msg->ifp;
2607 	int cpu = mycpuid;
2608 	struct ifaddr_container *ifac;
2609 
2610 	crit_enter();
2611 
2612 	ifac = &ifa->ifa_containers[cpu];
2613 	ASSERT_IFAC_VALID(ifac);
2614 	KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2615 		("ifaddr is not on if_addrhead"));
2616 
2617 	TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2618 	ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2619 
2620 	crit_exit();
2621 
2622 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2623 }
2624 
2625 void
2626 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2627 {
2628 	struct netmsg_ifaddr msg;
2629 
2630 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2631 		    0, ifa_ifunlink_dispatch);
2632 	msg.ifa = ifa;
2633 	msg.ifp = ifp;
2634 
2635 	ifa_domsg(&msg.base.lmsg, 0);
2636 }
2637 
2638 static void
2639 ifa_destroy_dispatch(netmsg_t nmsg)
2640 {
2641 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2642 
2643 	IFAFREE(msg->ifa);
2644 	ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2645 }
2646 
2647 void
2648 ifa_destroy(struct ifaddr *ifa)
2649 {
2650 	struct netmsg_ifaddr msg;
2651 
2652 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2653 		    0, ifa_destroy_dispatch);
2654 	msg.ifa = ifa;
2655 
2656 	ifa_domsg(&msg.base.lmsg, 0);
2657 }
2658 
2659 struct lwkt_port *
2660 ifnet_portfn(int cpu)
2661 {
2662 	return &ifnet_threads[cpu].td_msgport;
2663 }
2664 
2665 void
2666 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2667 {
2668 	KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2669 
2670 	if (next_cpu < ncpus)
2671 		lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2672 	else
2673 		lwkt_replymsg(lmsg, 0);
2674 }
2675 
2676 int
2677 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2678 {
2679 	KKASSERT(cpu < ncpus);
2680 	return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2681 }
2682 
2683 void
2684 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2685 {
2686 	KKASSERT(cpu < ncpus);
2687 	lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2688 }
2689 
2690 /*
2691  * Generic netmsg service loop.  Some protocols may roll their own but all
2692  * must do the basic command dispatch function call done here.
2693  */
2694 static void
2695 ifnet_service_loop(void *arg __unused)
2696 {
2697 	netmsg_t msg;
2698 
2699 	while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2700 		KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2701 		msg->base.nm_dispatch(msg);
2702 	}
2703 }
2704 
2705 static void
2706 ifnetinit(void *dummy __unused)
2707 {
2708 	int i;
2709 
2710 	for (i = 0; i < ncpus; ++i) {
2711 		struct thread *thr = &ifnet_threads[i];
2712 
2713 		lwkt_create(ifnet_service_loop, NULL, NULL,
2714 			    thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2715 			    i, "ifnet %d", i);
2716 		netmsg_service_port_init(&thr->td_msgport);
2717 		lwkt_schedule(thr);
2718 	}
2719 }
2720 
2721 struct ifnet *
2722 ifnet_byindex(unsigned short idx)
2723 {
2724 	if (idx > if_index)
2725 		return NULL;
2726 	return ifindex2ifnet[idx];
2727 }
2728 
2729 struct ifaddr *
2730 ifaddr_byindex(unsigned short idx)
2731 {
2732 	struct ifnet *ifp;
2733 
2734 	ifp = ifnet_byindex(idx);
2735 	if (!ifp)
2736 		return NULL;
2737 	return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2738 }
2739 
2740 void
2741 if_register_com_alloc(u_char type,
2742     if_com_alloc_t *a, if_com_free_t *f)
2743 {
2744 
2745         KASSERT(if_com_alloc[type] == NULL,
2746             ("if_register_com_alloc: %d already registered", type));
2747         KASSERT(if_com_free[type] == NULL,
2748             ("if_register_com_alloc: %d free already registered", type));
2749 
2750         if_com_alloc[type] = a;
2751         if_com_free[type] = f;
2752 }
2753 
2754 void
2755 if_deregister_com_alloc(u_char type)
2756 {
2757 
2758         KASSERT(if_com_alloc[type] != NULL,
2759             ("if_deregister_com_alloc: %d not registered", type));
2760         KASSERT(if_com_free[type] != NULL,
2761             ("if_deregister_com_alloc: %d free not registered", type));
2762         if_com_alloc[type] = NULL;
2763         if_com_free[type] = NULL;
2764 }
2765 
2766 int
2767 if_ring_count2(int cnt, int cnt_max)
2768 {
2769 	int shift = 0;
2770 
2771 	KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2772 	    ("invalid ring count max %d", cnt_max));
2773 
2774 	if (cnt <= 0)
2775 		cnt = cnt_max;
2776 	if (cnt > ncpus2)
2777 		cnt = ncpus2;
2778 	if (cnt > cnt_max)
2779 		cnt = cnt_max;
2780 
2781 	while ((1 << (shift + 1)) <= cnt)
2782 		++shift;
2783 	cnt = 1 << shift;
2784 
2785 	KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2786 	    ("calculate cnt %d, ncpus2 %d, cnt max %d",
2787 	     cnt, ncpus2, cnt_max));
2788 	return cnt;
2789 }
2790