xref: /dragonfly/sys/net/if.c (revision 82730a9c)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.3 (Berkeley) 1/4/94
30  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_ifpoll.h"
37 
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/priv.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/socketops.h>
48 #include <sys/kernel.h>
49 #include <sys/ktr.h>
50 #include <sys/mutex.h>
51 #include <sys/sockio.h>
52 #include <sys/syslog.h>
53 #include <sys/sysctl.h>
54 #include <sys/domain.h>
55 #include <sys/thread.h>
56 #include <sys/serialize.h>
57 #include <sys/bus.h>
58 
59 #include <sys/thread2.h>
60 #include <sys/msgport2.h>
61 #include <sys/mutex2.h>
62 
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_types.h>
67 #include <net/if_var.h>
68 #include <net/ifq_var.h>
69 #include <net/radix.h>
70 #include <net/route.h>
71 #include <net/if_clone.h>
72 #include <net/netisr2.h>
73 #include <net/netmsg2.h>
74 
75 #include <machine/atomic.h>
76 #include <machine/stdarg.h>
77 #include <machine/smp.h>
78 
79 #if defined(INET) || defined(INET6)
80 /*XXX*/
81 #include <netinet/in.h>
82 #include <netinet/in_var.h>
83 #include <netinet/if_ether.h>
84 #ifdef INET6
85 #include <netinet6/in6_var.h>
86 #include <netinet6/in6_ifattach.h>
87 #endif
88 #endif
89 
90 #if defined(COMPAT_43)
91 #include <emulation/43bsd/43bsd_socket.h>
92 #endif /* COMPAT_43 */
93 
94 struct netmsg_ifaddr {
95 	struct netmsg_base base;
96 	struct ifaddr	*ifa;
97 	struct ifnet	*ifp;
98 	int		tail;
99 };
100 
101 struct ifsubq_stage_head {
102 	TAILQ_HEAD(, ifsubq_stage)	stg_head;
103 } __cachealign;
104 
105 /*
106  * System initialization
107  */
108 static void	if_attachdomain(void *);
109 static void	if_attachdomain1(struct ifnet *);
110 static int	ifconf(u_long, caddr_t, struct ucred *);
111 static void	ifinit(void *);
112 static void	ifnetinit(void *);
113 static void	if_slowtimo(void *);
114 static void	link_rtrequest(int, struct rtentry *);
115 static int	if_rtdel(struct radix_node *, void *);
116 
117 /* Helper functions */
118 static void	ifsq_watchdog_reset(struct ifsubq_watchdog *);
119 
120 #ifdef INET6
121 /*
122  * XXX: declare here to avoid to include many inet6 related files..
123  * should be more generalized?
124  */
125 extern void	nd6_setmtu(struct ifnet *);
126 #endif
127 
128 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
129 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
130 
131 static int ifsq_stage_cntmax = 4;
132 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax);
133 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
134     &ifsq_stage_cntmax, 0, "ifq staging packet count max");
135 
136 static int if_stats_compat = 0;
137 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW,
138     &if_stats_compat, 0, "Compat the old ifnet stats");
139 
140 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
141 /* Must be after netisr_init */
142 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
143 
144 static  if_com_alloc_t *if_com_alloc[256];
145 static  if_com_free_t *if_com_free[256];
146 
147 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
148 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
149 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
150 
151 int			ifqmaxlen = IFQ_MAXLEN;
152 struct ifnethead	ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
153 
154 struct callout		if_slowtimo_timer;
155 
156 int			if_index = 0;
157 struct ifnet		**ifindex2ifnet = NULL;
158 static struct thread	ifnet_threads[MAXCPU];
159 
160 static struct ifsubq_stage_head	ifsubq_stage_heads[MAXCPU];
161 
162 #ifdef notyet
163 #define IFQ_KTR_STRING		"ifq=%p"
164 #define IFQ_KTR_ARGS	struct ifaltq *ifq
165 #ifndef KTR_IFQ
166 #define KTR_IFQ			KTR_ALL
167 #endif
168 KTR_INFO_MASTER(ifq);
169 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
170 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
171 #define logifq(name, arg)	KTR_LOG(ifq_ ## name, arg)
172 
173 #define IF_START_KTR_STRING	"ifp=%p"
174 #define IF_START_KTR_ARGS	struct ifnet *ifp
175 #ifndef KTR_IF_START
176 #define KTR_IF_START		KTR_ALL
177 #endif
178 KTR_INFO_MASTER(if_start);
179 KTR_INFO(KTR_IF_START, if_start, run, 0,
180 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
181 KTR_INFO(KTR_IF_START, if_start, sched, 1,
182 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
183 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
184 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
185 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
186 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
187 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
188 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
189 #define logifstart(name, arg)	KTR_LOG(if_start_ ## name, arg)
190 #endif
191 
192 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
193 
194 /*
195  * Network interface utility routines.
196  *
197  * Routines with ifa_ifwith* names take sockaddr *'s as
198  * parameters.
199  */
200 /* ARGSUSED*/
201 void
202 ifinit(void *dummy)
203 {
204 	struct ifnet *ifp;
205 
206 	callout_init(&if_slowtimo_timer);
207 
208 	crit_enter();
209 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
210 		if (ifp->if_snd.altq_maxlen == 0) {
211 			if_printf(ifp, "XXX: driver didn't set altq_maxlen\n");
212 			ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
213 		}
214 	}
215 	crit_exit();
216 
217 	if_slowtimo(0);
218 }
219 
220 static void
221 ifsq_ifstart_ipifunc(void *arg)
222 {
223 	struct ifaltq_subque *ifsq = arg;
224 	struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid);
225 
226 	crit_enter();
227 	if (lmsg->ms_flags & MSGF_DONE)
228 		lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg);
229 	crit_exit();
230 }
231 
232 static __inline void
233 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
234 {
235 	KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
236 	TAILQ_REMOVE(&head->stg_head, stage, stg_link);
237 	stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED);
238 	stage->stg_cnt = 0;
239 	stage->stg_len = 0;
240 }
241 
242 static __inline void
243 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
244 {
245 	KKASSERT((stage->stg_flags &
246 	    (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
247 	stage->stg_flags |= IFSQ_STAGE_FLAG_QUED;
248 	TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link);
249 }
250 
251 /*
252  * Schedule ifnet.if_start on the subqueue owner CPU
253  */
254 static void
255 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force)
256 {
257 	int cpu;
258 
259 	if (!force && curthread->td_type == TD_TYPE_NETISR &&
260 	    ifsq_stage_cntmax > 0) {
261 		struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
262 
263 		stage->stg_cnt = 0;
264 		stage->stg_len = 0;
265 		if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
266 			ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage);
267 		stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED;
268 		return;
269 	}
270 
271 	cpu = ifsq_get_cpuid(ifsq);
272 	if (cpu != mycpuid)
273 		lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq);
274 	else
275 		ifsq_ifstart_ipifunc(ifsq);
276 }
277 
278 /*
279  * NOTE:
280  * This function will release ifnet.if_start subqueue interlock,
281  * if ifnet.if_start for the subqueue does not need to be scheduled
282  */
283 static __inline int
284 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running)
285 {
286 	if (!running || ifsq_is_empty(ifsq)
287 #ifdef ALTQ
288 	    || ifsq->ifsq_altq->altq_tbr != NULL
289 #endif
290 	) {
291 		ALTQ_SQ_LOCK(ifsq);
292 		/*
293 		 * ifnet.if_start subqueue interlock is released, if:
294 		 * 1) Hardware can not take any packets, due to
295 		 *    o  interface is marked down
296 		 *    o  hardware queue is full (ifsq_is_oactive)
297 		 *    Under the second situation, hardware interrupt
298 		 *    or polling(4) will call/schedule ifnet.if_start
299 		 *    on the subqueue when hardware queue is ready
300 		 * 2) There is no packet in the subqueue.
301 		 *    Further ifq_dispatch or ifq_handoff will call/
302 		 *    schedule ifnet.if_start on the subqueue.
303 		 * 3) TBR is used and it does not allow further
304 		 *    dequeueing.
305 		 *    TBR callout will call ifnet.if_start on the
306 		 *    subqueue.
307 		 */
308 		if (!running || !ifsq_data_ready(ifsq)) {
309 			ifsq_clr_started(ifsq);
310 			ALTQ_SQ_UNLOCK(ifsq);
311 			return 0;
312 		}
313 		ALTQ_SQ_UNLOCK(ifsq);
314 	}
315 	return 1;
316 }
317 
318 static void
319 ifsq_ifstart_dispatch(netmsg_t msg)
320 {
321 	struct lwkt_msg *lmsg = &msg->base.lmsg;
322 	struct ifaltq_subque *ifsq = lmsg->u.ms_resultp;
323 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
324 	struct globaldata *gd = mycpu;
325 	int running = 0, need_sched;
326 
327 	crit_enter_gd(gd);
328 
329 	lwkt_replymsg(lmsg, 0);	/* reply ASAP */
330 
331 	if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) {
332 		/*
333 		 * We need to chase the subqueue owner CPU change.
334 		 */
335 		ifsq_ifstart_schedule(ifsq, 1);
336 		crit_exit_gd(gd);
337 		return;
338 	}
339 
340 	ifsq_serialize_hw(ifsq);
341 	if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
342 		ifp->if_start(ifp, ifsq);
343 		if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
344 			running = 1;
345 	}
346 	need_sched = ifsq_ifstart_need_schedule(ifsq, running);
347 	ifsq_deserialize_hw(ifsq);
348 
349 	if (need_sched) {
350 		/*
351 		 * More data need to be transmitted, ifnet.if_start is
352 		 * scheduled on the subqueue owner CPU, and we keep going.
353 		 * NOTE: ifnet.if_start subqueue interlock is not released.
354 		 */
355 		ifsq_ifstart_schedule(ifsq, 0);
356 	}
357 
358 	crit_exit_gd(gd);
359 }
360 
361 /* Device driver ifnet.if_start helper function */
362 void
363 ifsq_devstart(struct ifaltq_subque *ifsq)
364 {
365 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
366 	int running = 0;
367 
368 	ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
369 
370 	ALTQ_SQ_LOCK(ifsq);
371 	if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) {
372 		ALTQ_SQ_UNLOCK(ifsq);
373 		return;
374 	}
375 	ifsq_set_started(ifsq);
376 	ALTQ_SQ_UNLOCK(ifsq);
377 
378 	ifp->if_start(ifp, ifsq);
379 
380 	if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
381 		running = 1;
382 
383 	if (ifsq_ifstart_need_schedule(ifsq, running)) {
384 		/*
385 		 * More data need to be transmitted, ifnet.if_start is
386 		 * scheduled on ifnet's CPU, and we keep going.
387 		 * NOTE: ifnet.if_start interlock is not released.
388 		 */
389 		ifsq_ifstart_schedule(ifsq, 0);
390 	}
391 }
392 
393 void
394 if_devstart(struct ifnet *ifp)
395 {
396 	ifsq_devstart(ifq_get_subq_default(&ifp->if_snd));
397 }
398 
399 /* Device driver ifnet.if_start schedule helper function */
400 void
401 ifsq_devstart_sched(struct ifaltq_subque *ifsq)
402 {
403 	ifsq_ifstart_schedule(ifsq, 1);
404 }
405 
406 void
407 if_devstart_sched(struct ifnet *ifp)
408 {
409 	ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd));
410 }
411 
412 static void
413 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
414 {
415 	lwkt_serialize_enter(ifp->if_serializer);
416 }
417 
418 static void
419 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
420 {
421 	lwkt_serialize_exit(ifp->if_serializer);
422 }
423 
424 static int
425 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
426 {
427 	return lwkt_serialize_try(ifp->if_serializer);
428 }
429 
430 #ifdef INVARIANTS
431 static void
432 if_default_serialize_assert(struct ifnet *ifp,
433 			    enum ifnet_serialize slz __unused,
434 			    boolean_t serialized)
435 {
436 	if (serialized)
437 		ASSERT_SERIALIZED(ifp->if_serializer);
438 	else
439 		ASSERT_NOT_SERIALIZED(ifp->if_serializer);
440 }
441 #endif
442 
443 /*
444  * Attach an interface to the list of "active" interfaces.
445  *
446  * The serializer is optional.
447  */
448 void
449 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
450 {
451 	unsigned socksize, ifasize;
452 	int namelen, masklen;
453 	struct sockaddr_dl *sdl;
454 	struct ifaddr *ifa;
455 	struct ifaltq *ifq;
456 	int i, q;
457 
458 	static int if_indexlim = 8;
459 
460 	if (ifp->if_serialize != NULL) {
461 		KASSERT(ifp->if_deserialize != NULL &&
462 			ifp->if_tryserialize != NULL &&
463 			ifp->if_serialize_assert != NULL,
464 			("serialize functions are partially setup"));
465 
466 		/*
467 		 * If the device supplies serialize functions,
468 		 * then clear if_serializer to catch any invalid
469 		 * usage of this field.
470 		 */
471 		KASSERT(serializer == NULL,
472 			("both serialize functions and default serializer "
473 			 "are supplied"));
474 		ifp->if_serializer = NULL;
475 	} else {
476 		KASSERT(ifp->if_deserialize == NULL &&
477 			ifp->if_tryserialize == NULL &&
478 			ifp->if_serialize_assert == NULL,
479 			("serialize functions are partially setup"));
480 		ifp->if_serialize = if_default_serialize;
481 		ifp->if_deserialize = if_default_deserialize;
482 		ifp->if_tryserialize = if_default_tryserialize;
483 #ifdef INVARIANTS
484 		ifp->if_serialize_assert = if_default_serialize_assert;
485 #endif
486 
487 		/*
488 		 * The serializer can be passed in from the device,
489 		 * allowing the same serializer to be used for both
490 		 * the interrupt interlock and the device queue.
491 		 * If not specified, the netif structure will use an
492 		 * embedded serializer.
493 		 */
494 		if (serializer == NULL) {
495 			serializer = &ifp->if_default_serializer;
496 			lwkt_serialize_init(serializer);
497 		}
498 		ifp->if_serializer = serializer;
499 	}
500 
501 	mtx_init(&ifp->if_ioctl_mtx);
502 	mtx_lock(&ifp->if_ioctl_mtx);
503 
504 	lwkt_gettoken(&ifnet_token);	/* protect if_index and ifnet tailq */
505 	ifp->if_index = ++if_index;
506 
507 	/*
508 	 * XXX -
509 	 * The old code would work if the interface passed a pre-existing
510 	 * chain of ifaddrs to this code.  We don't trust our callers to
511 	 * properly initialize the tailq, however, so we no longer allow
512 	 * this unlikely case.
513 	 */
514 	ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
515 				    M_IFADDR, M_WAITOK | M_ZERO);
516 	for (i = 0; i < ncpus; ++i)
517 		TAILQ_INIT(&ifp->if_addrheads[i]);
518 
519 	TAILQ_INIT(&ifp->if_prefixhead);
520 	TAILQ_INIT(&ifp->if_multiaddrs);
521 	TAILQ_INIT(&ifp->if_groups);
522 	getmicrotime(&ifp->if_lastchange);
523 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
524 		unsigned int n;
525 		struct ifnet **q;
526 
527 		if_indexlim <<= 1;
528 
529 		/* grow ifindex2ifnet */
530 		n = if_indexlim * sizeof(*q);
531 		q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
532 		if (ifindex2ifnet) {
533 			bcopy(ifindex2ifnet, q, n/2);
534 			kfree(ifindex2ifnet, M_IFADDR);
535 		}
536 		ifindex2ifnet = q;
537 	}
538 
539 	ifindex2ifnet[if_index] = ifp;
540 
541 	/*
542 	 * create a Link Level name for this device
543 	 */
544 	namelen = strlen(ifp->if_xname);
545 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
546 	socksize = masklen + ifp->if_addrlen;
547 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
548 	if (socksize < sizeof(*sdl))
549 		socksize = sizeof(*sdl);
550 	socksize = ROUNDUP(socksize);
551 #undef ROUNDUP
552 	ifasize = sizeof(struct ifaddr) + 2 * socksize;
553 	ifa = ifa_create(ifasize, M_WAITOK);
554 	sdl = (struct sockaddr_dl *)(ifa + 1);
555 	sdl->sdl_len = socksize;
556 	sdl->sdl_family = AF_LINK;
557 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
558 	sdl->sdl_nlen = namelen;
559 	sdl->sdl_index = ifp->if_index;
560 	sdl->sdl_type = ifp->if_type;
561 	ifp->if_lladdr = ifa;
562 	ifa->ifa_ifp = ifp;
563 	ifa->ifa_rtrequest = link_rtrequest;
564 	ifa->ifa_addr = (struct sockaddr *)sdl;
565 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
566 	ifa->ifa_netmask = (struct sockaddr *)sdl;
567 	sdl->sdl_len = masklen;
568 	while (namelen != 0)
569 		sdl->sdl_data[--namelen] = 0xff;
570 	ifa_iflink(ifa, ifp, 0 /* Insert head */);
571 
572 	ifp->if_data_pcpu = kmalloc_cachealign(
573 	    ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO);
574 
575 	if (ifp->if_mapsubq == NULL)
576 		ifp->if_mapsubq = ifq_mapsubq_default;
577 
578 	ifq = &ifp->if_snd;
579 	ifq->altq_type = 0;
580 	ifq->altq_disc = NULL;
581 	ifq->altq_flags &= ALTQF_CANTCHANGE;
582 	ifq->altq_tbr = NULL;
583 	ifq->altq_ifp = ifp;
584 
585 	if (ifq->altq_subq_cnt <= 0)
586 		ifq->altq_subq_cnt = 1;
587 	ifq->altq_subq = kmalloc_cachealign(
588 	    ifq->altq_subq_cnt * sizeof(struct ifaltq_subque),
589 	    M_DEVBUF, M_WAITOK | M_ZERO);
590 
591 	if (ifq->altq_maxlen == 0) {
592 		if_printf(ifp, "driver didn't set altq_maxlen\n");
593 		ifq_set_maxlen(ifq, ifqmaxlen);
594 	}
595 
596 	for (q = 0; q < ifq->altq_subq_cnt; ++q) {
597 		struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
598 
599 		ALTQ_SQ_LOCK_INIT(ifsq);
600 		ifsq->ifsq_index = q;
601 
602 		ifsq->ifsq_altq = ifq;
603 		ifsq->ifsq_ifp = ifp;
604 
605 		ifsq->ifsq_maxlen = ifq->altq_maxlen;
606 		ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES;
607 		ifsq->ifsq_prepended = NULL;
608 		ifsq->ifsq_started = 0;
609 		ifsq->ifsq_hw_oactive = 0;
610 		ifsq_set_cpuid(ifsq, 0);
611 		if (ifp->if_serializer != NULL)
612 			ifsq_set_hw_serialize(ifsq, ifp->if_serializer);
613 
614 		ifsq->ifsq_stage =
615 		    kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage),
616 		    M_DEVBUF, M_WAITOK | M_ZERO);
617 		for (i = 0; i < ncpus; ++i)
618 			ifsq->ifsq_stage[i].stg_subq = ifsq;
619 
620 		ifsq->ifsq_ifstart_nmsg =
621 		    kmalloc(ncpus * sizeof(struct netmsg_base),
622 		    M_LWKTMSG, M_WAITOK);
623 		for (i = 0; i < ncpus; ++i) {
624 			netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL,
625 			    &netisr_adone_rport, 0, ifsq_ifstart_dispatch);
626 			ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq;
627 		}
628 	}
629 	ifq_set_classic(ifq);
630 
631 	if (!SLIST_EMPTY(&domains))
632 		if_attachdomain1(ifp);
633 
634 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
635 	lwkt_reltoken(&ifnet_token);
636 
637 	/* Announce the interface. */
638 	EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
639 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
640 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
641 
642 	mtx_unlock(&ifp->if_ioctl_mtx);
643 }
644 
645 static void
646 if_attachdomain(void *dummy)
647 {
648 	struct ifnet *ifp;
649 
650 	crit_enter();
651 	TAILQ_FOREACH(ifp, &ifnet, if_list)
652 		if_attachdomain1(ifp);
653 	crit_exit();
654 }
655 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
656 	if_attachdomain, NULL);
657 
658 static void
659 if_attachdomain1(struct ifnet *ifp)
660 {
661 	struct domain *dp;
662 
663 	crit_enter();
664 
665 	/* address family dependent data region */
666 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
667 	SLIST_FOREACH(dp, &domains, dom_next)
668 		if (dp->dom_ifattach)
669 			ifp->if_afdata[dp->dom_family] =
670 				(*dp->dom_ifattach)(ifp);
671 	crit_exit();
672 }
673 
674 /*
675  * Purge all addresses whose type is _not_ AF_LINK
676  */
677 void
678 if_purgeaddrs_nolink(struct ifnet *ifp)
679 {
680 	struct ifaddr_container *ifac, *next;
681 
682 	TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
683 			      ifa_link, next) {
684 		struct ifaddr *ifa = ifac->ifa;
685 
686 		/* Leave link ifaddr as it is */
687 		if (ifa->ifa_addr->sa_family == AF_LINK)
688 			continue;
689 #ifdef INET
690 		/* XXX: Ugly!! ad hoc just for INET */
691 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
692 			struct ifaliasreq ifr;
693 #ifdef IFADDR_DEBUG_VERBOSE
694 			int i;
695 
696 			kprintf("purge in4 addr %p: ", ifa);
697 			for (i = 0; i < ncpus; ++i)
698 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
699 			kprintf("\n");
700 #endif
701 
702 			bzero(&ifr, sizeof ifr);
703 			ifr.ifra_addr = *ifa->ifa_addr;
704 			if (ifa->ifa_dstaddr)
705 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
706 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
707 				       NULL) == 0)
708 				continue;
709 		}
710 #endif /* INET */
711 #ifdef INET6
712 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
713 #ifdef IFADDR_DEBUG_VERBOSE
714 			int i;
715 
716 			kprintf("purge in6 addr %p: ", ifa);
717 			for (i = 0; i < ncpus; ++i)
718 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
719 			kprintf("\n");
720 #endif
721 
722 			in6_purgeaddr(ifa);
723 			/* ifp_addrhead is already updated */
724 			continue;
725 		}
726 #endif /* INET6 */
727 		ifa_ifunlink(ifa, ifp);
728 		ifa_destroy(ifa);
729 	}
730 }
731 
732 static void
733 ifq_stage_detach_handler(netmsg_t nmsg)
734 {
735 	struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
736 	int q;
737 
738 	for (q = 0; q < ifq->altq_subq_cnt; ++q) {
739 		struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
740 		struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
741 
742 		if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED)
743 			ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage);
744 	}
745 	lwkt_replymsg(&nmsg->lmsg, 0);
746 }
747 
748 static void
749 ifq_stage_detach(struct ifaltq *ifq)
750 {
751 	struct netmsg_base base;
752 	int cpu;
753 
754 	netmsg_init(&base, NULL, &curthread->td_msgport, 0,
755 	    ifq_stage_detach_handler);
756 	base.lmsg.u.ms_resultp = ifq;
757 
758 	for (cpu = 0; cpu < ncpus; ++cpu)
759 		lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0);
760 }
761 
762 struct netmsg_if_rtdel {
763 	struct netmsg_base	base;
764 	struct ifnet		*ifp;
765 };
766 
767 static void
768 if_rtdel_dispatch(netmsg_t msg)
769 {
770 	struct netmsg_if_rtdel *rmsg = (void *)msg;
771 	int i, nextcpu, cpu;
772 
773 	cpu = mycpuid;
774 	for (i = 1; i <= AF_MAX; i++) {
775 		struct radix_node_head	*rnh;
776 
777 		if ((rnh = rt_tables[cpu][i]) == NULL)
778 			continue;
779 		rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp);
780 	}
781 
782 	nextcpu = cpu + 1;
783 	if (nextcpu < ncpus)
784 		lwkt_forwardmsg(netisr_cpuport(nextcpu), &rmsg->base.lmsg);
785 	else
786 		lwkt_replymsg(&rmsg->base.lmsg, 0);
787 }
788 
789 /*
790  * Detach an interface, removing it from the
791  * list of "active" interfaces.
792  */
793 void
794 if_detach(struct ifnet *ifp)
795 {
796 	struct netmsg_if_rtdel msg;
797 	struct domain *dp;
798 	int q;
799 
800 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
801 
802 	/*
803 	 * Remove routes and flush queues.
804 	 */
805 	crit_enter();
806 #ifdef IFPOLL_ENABLE
807 	if (ifp->if_flags & IFF_NPOLLING)
808 		ifpoll_deregister(ifp);
809 #endif
810 	if_down(ifp);
811 
812 #ifdef ALTQ
813 	if (ifq_is_enabled(&ifp->if_snd))
814 		altq_disable(&ifp->if_snd);
815 	if (ifq_is_attached(&ifp->if_snd))
816 		altq_detach(&ifp->if_snd);
817 #endif
818 
819 	/*
820 	 * Clean up all addresses.
821 	 */
822 	ifp->if_lladdr = NULL;
823 
824 	if_purgeaddrs_nolink(ifp);
825 	if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
826 		struct ifaddr *ifa;
827 
828 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
829 		KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
830 			("non-link ifaddr is left on if_addrheads"));
831 
832 		ifa_ifunlink(ifa, ifp);
833 		ifa_destroy(ifa);
834 		KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
835 			("there are still ifaddrs left on if_addrheads"));
836 	}
837 
838 #ifdef INET
839 	/*
840 	 * Remove all IPv4 kernel structures related to ifp.
841 	 */
842 	in_ifdetach(ifp);
843 #endif
844 
845 #ifdef INET6
846 	/*
847 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
848 	 * before removing routing entries below, since IPv6 interface direct
849 	 * routes are expected to be removed by the IPv6-specific kernel API.
850 	 * Otherwise, the kernel will detect some inconsistency and bark it.
851 	 */
852 	in6_ifdetach(ifp);
853 #endif
854 
855 	/*
856 	 * Delete all remaining routes using this interface
857 	 */
858 	netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
859 	    if_rtdel_dispatch);
860 	msg.ifp = ifp;
861 	rt_domsg_global(&msg.base);
862 
863 	/* Announce that the interface is gone. */
864 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
865 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
866 
867 	SLIST_FOREACH(dp, &domains, dom_next)
868 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
869 			(*dp->dom_ifdetach)(ifp,
870 				ifp->if_afdata[dp->dom_family]);
871 
872 	/*
873 	 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
874 	 */
875 	lwkt_gettoken(&ifnet_token);
876 	ifindex2ifnet[ifp->if_index] = NULL;
877 	while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
878 		if_index--;
879 	TAILQ_REMOVE(&ifnet, ifp, if_link);
880 	lwkt_reltoken(&ifnet_token);
881 
882 	kfree(ifp->if_addrheads, M_IFADDR);
883 
884 	lwkt_synchronize_ipiqs("if_detach");
885 	ifq_stage_detach(&ifp->if_snd);
886 
887 	for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) {
888 		struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q];
889 
890 		kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG);
891 		kfree(ifsq->ifsq_stage, M_DEVBUF);
892 	}
893 	kfree(ifp->if_snd.altq_subq, M_DEVBUF);
894 
895 	kfree(ifp->if_data_pcpu, M_DEVBUF);
896 
897 	crit_exit();
898 }
899 
900 /*
901  * Create interface group without members
902  */
903 struct ifg_group *
904 if_creategroup(const char *groupname)
905 {
906         struct ifg_group        *ifg = NULL;
907 
908         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
909             M_TEMP, M_NOWAIT)) == NULL)
910                 return (NULL);
911 
912         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
913         ifg->ifg_refcnt = 0;
914         ifg->ifg_carp_demoted = 0;
915         TAILQ_INIT(&ifg->ifg_members);
916 #if NPF > 0
917         pfi_attach_ifgroup(ifg);
918 #endif
919         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
920 
921         return (ifg);
922 }
923 
924 /*
925  * Add a group to an interface
926  */
927 int
928 if_addgroup(struct ifnet *ifp, const char *groupname)
929 {
930 	struct ifg_list		*ifgl;
931 	struct ifg_group	*ifg = NULL;
932 	struct ifg_member	*ifgm;
933 
934 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
935 	    groupname[strlen(groupname) - 1] <= '9')
936 		return (EINVAL);
937 
938 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
939 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
940 			return (EEXIST);
941 
942 	if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
943 		return (ENOMEM);
944 
945 	if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
946 		kfree(ifgl, M_TEMP);
947 		return (ENOMEM);
948 	}
949 
950 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
951 		if (!strcmp(ifg->ifg_group, groupname))
952 			break;
953 
954 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
955 		kfree(ifgl, M_TEMP);
956 		kfree(ifgm, M_TEMP);
957 		return (ENOMEM);
958 	}
959 
960 	ifg->ifg_refcnt++;
961 	ifgl->ifgl_group = ifg;
962 	ifgm->ifgm_ifp = ifp;
963 
964 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
965 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
966 
967 #if NPF > 0
968 	pfi_group_change(groupname);
969 #endif
970 
971 	return (0);
972 }
973 
974 /*
975  * Remove a group from an interface
976  */
977 int
978 if_delgroup(struct ifnet *ifp, const char *groupname)
979 {
980 	struct ifg_list		*ifgl;
981 	struct ifg_member	*ifgm;
982 
983 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
984 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
985 			break;
986 	if (ifgl == NULL)
987 		return (ENOENT);
988 
989 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
990 
991 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
992 		if (ifgm->ifgm_ifp == ifp)
993 			break;
994 
995 	if (ifgm != NULL) {
996 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
997 		kfree(ifgm, M_TEMP);
998 	}
999 
1000 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1001 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
1002 #if NPF > 0
1003 		pfi_detach_ifgroup(ifgl->ifgl_group);
1004 #endif
1005 		kfree(ifgl->ifgl_group, M_TEMP);
1006 	}
1007 
1008 	kfree(ifgl, M_TEMP);
1009 
1010 #if NPF > 0
1011 	pfi_group_change(groupname);
1012 #endif
1013 
1014 	return (0);
1015 }
1016 
1017 /*
1018  * Stores all groups from an interface in memory pointed
1019  * to by data
1020  */
1021 int
1022 if_getgroup(caddr_t data, struct ifnet *ifp)
1023 {
1024 	int			 len, error;
1025 	struct ifg_list		*ifgl;
1026 	struct ifg_req		 ifgrq, *ifgp;
1027 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
1028 
1029 	if (ifgr->ifgr_len == 0) {
1030 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1031 			ifgr->ifgr_len += sizeof(struct ifg_req);
1032 		return (0);
1033 	}
1034 
1035 	len = ifgr->ifgr_len;
1036 	ifgp = ifgr->ifgr_groups;
1037 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1038 		if (len < sizeof(ifgrq))
1039 			return (EINVAL);
1040 		bzero(&ifgrq, sizeof ifgrq);
1041 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1042 		    sizeof(ifgrq.ifgrq_group));
1043 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1044 		    sizeof(struct ifg_req))))
1045 			return (error);
1046 		len -= sizeof(ifgrq);
1047 		ifgp++;
1048 	}
1049 
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Stores all members of a group in memory pointed to by data
1055  */
1056 int
1057 if_getgroupmembers(caddr_t data)
1058 {
1059 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
1060 	struct ifg_group	*ifg;
1061 	struct ifg_member	*ifgm;
1062 	struct ifg_req		 ifgrq, *ifgp;
1063 	int			 len, error;
1064 
1065 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
1066 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1067 			break;
1068 	if (ifg == NULL)
1069 		return (ENOENT);
1070 
1071 	if (ifgr->ifgr_len == 0) {
1072 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1073 			ifgr->ifgr_len += sizeof(ifgrq);
1074 		return (0);
1075 	}
1076 
1077 	len = ifgr->ifgr_len;
1078 	ifgp = ifgr->ifgr_groups;
1079 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1080 		if (len < sizeof(ifgrq))
1081 			return (EINVAL);
1082 		bzero(&ifgrq, sizeof ifgrq);
1083 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1084 		    sizeof(ifgrq.ifgrq_member));
1085 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1086 		    sizeof(struct ifg_req))))
1087 			return (error);
1088 		len -= sizeof(ifgrq);
1089 		ifgp++;
1090 	}
1091 
1092 	return (0);
1093 }
1094 
1095 /*
1096  * Delete Routes for a Network Interface
1097  *
1098  * Called for each routing entry via the rnh->rnh_walktree() call above
1099  * to delete all route entries referencing a detaching network interface.
1100  *
1101  * Arguments:
1102  *	rn	pointer to node in the routing table
1103  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
1104  *
1105  * Returns:
1106  *	0	successful
1107  *	errno	failed - reason indicated
1108  *
1109  */
1110 static int
1111 if_rtdel(struct radix_node *rn, void *arg)
1112 {
1113 	struct rtentry	*rt = (struct rtentry *)rn;
1114 	struct ifnet	*ifp = arg;
1115 	int		err;
1116 
1117 	if (rt->rt_ifp == ifp) {
1118 
1119 		/*
1120 		 * Protect (sorta) against walktree recursion problems
1121 		 * with cloned routes
1122 		 */
1123 		if (!(rt->rt_flags & RTF_UP))
1124 			return (0);
1125 
1126 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1127 				rt_mask(rt), rt->rt_flags,
1128 				NULL);
1129 		if (err) {
1130 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
1131 		}
1132 	}
1133 
1134 	return (0);
1135 }
1136 
1137 /*
1138  * Locate an interface based on a complete address.
1139  */
1140 struct ifaddr *
1141 ifa_ifwithaddr(struct sockaddr *addr)
1142 {
1143 	struct ifnet *ifp;
1144 
1145 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1146 		struct ifaddr_container *ifac;
1147 
1148 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1149 			struct ifaddr *ifa = ifac->ifa;
1150 
1151 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1152 				continue;
1153 			if (sa_equal(addr, ifa->ifa_addr))
1154 				return (ifa);
1155 			if ((ifp->if_flags & IFF_BROADCAST) &&
1156 			    ifa->ifa_broadaddr &&
1157 			    /* IPv6 doesn't have broadcast */
1158 			    ifa->ifa_broadaddr->sa_len != 0 &&
1159 			    sa_equal(ifa->ifa_broadaddr, addr))
1160 				return (ifa);
1161 		}
1162 	}
1163 	return (NULL);
1164 }
1165 /*
1166  * Locate the point to point interface with a given destination address.
1167  */
1168 struct ifaddr *
1169 ifa_ifwithdstaddr(struct sockaddr *addr)
1170 {
1171 	struct ifnet *ifp;
1172 
1173 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1174 		struct ifaddr_container *ifac;
1175 
1176 		if (!(ifp->if_flags & IFF_POINTOPOINT))
1177 			continue;
1178 
1179 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1180 			struct ifaddr *ifa = ifac->ifa;
1181 
1182 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1183 				continue;
1184 			if (ifa->ifa_dstaddr &&
1185 			    sa_equal(addr, ifa->ifa_dstaddr))
1186 				return (ifa);
1187 		}
1188 	}
1189 	return (NULL);
1190 }
1191 
1192 /*
1193  * Find an interface on a specific network.  If many, choice
1194  * is most specific found.
1195  */
1196 struct ifaddr *
1197 ifa_ifwithnet(struct sockaddr *addr)
1198 {
1199 	struct ifnet *ifp;
1200 	struct ifaddr *ifa_maybe = NULL;
1201 	u_int af = addr->sa_family;
1202 	char *addr_data = addr->sa_data, *cplim;
1203 
1204 	/*
1205 	 * AF_LINK addresses can be looked up directly by their index number,
1206 	 * so do that if we can.
1207 	 */
1208 	if (af == AF_LINK) {
1209 		struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1210 
1211 		if (sdl->sdl_index && sdl->sdl_index <= if_index)
1212 			return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1213 	}
1214 
1215 	/*
1216 	 * Scan though each interface, looking for ones that have
1217 	 * addresses in this address family.
1218 	 */
1219 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1220 		struct ifaddr_container *ifac;
1221 
1222 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1223 			struct ifaddr *ifa = ifac->ifa;
1224 			char *cp, *cp2, *cp3;
1225 
1226 			if (ifa->ifa_addr->sa_family != af)
1227 next:				continue;
1228 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1229 				/*
1230 				 * This is a bit broken as it doesn't
1231 				 * take into account that the remote end may
1232 				 * be a single node in the network we are
1233 				 * looking for.
1234 				 * The trouble is that we don't know the
1235 				 * netmask for the remote end.
1236 				 */
1237 				if (ifa->ifa_dstaddr != NULL &&
1238 				    sa_equal(addr, ifa->ifa_dstaddr))
1239 					return (ifa);
1240 			} else {
1241 				/*
1242 				 * if we have a special address handler,
1243 				 * then use it instead of the generic one.
1244 				 */
1245 				if (ifa->ifa_claim_addr) {
1246 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1247 						return (ifa);
1248 					} else {
1249 						continue;
1250 					}
1251 				}
1252 
1253 				/*
1254 				 * Scan all the bits in the ifa's address.
1255 				 * If a bit dissagrees with what we are
1256 				 * looking for, mask it with the netmask
1257 				 * to see if it really matters.
1258 				 * (A byte at a time)
1259 				 */
1260 				if (ifa->ifa_netmask == 0)
1261 					continue;
1262 				cp = addr_data;
1263 				cp2 = ifa->ifa_addr->sa_data;
1264 				cp3 = ifa->ifa_netmask->sa_data;
1265 				cplim = ifa->ifa_netmask->sa_len +
1266 					(char *)ifa->ifa_netmask;
1267 				while (cp3 < cplim)
1268 					if ((*cp++ ^ *cp2++) & *cp3++)
1269 						goto next; /* next address! */
1270 				/*
1271 				 * If the netmask of what we just found
1272 				 * is more specific than what we had before
1273 				 * (if we had one) then remember the new one
1274 				 * before continuing to search
1275 				 * for an even better one.
1276 				 */
1277 				if (ifa_maybe == NULL ||
1278 				    rn_refines((char *)ifa->ifa_netmask,
1279 					       (char *)ifa_maybe->ifa_netmask))
1280 					ifa_maybe = ifa;
1281 			}
1282 		}
1283 	}
1284 	return (ifa_maybe);
1285 }
1286 
1287 /*
1288  * Find an interface address specific to an interface best matching
1289  * a given address.
1290  */
1291 struct ifaddr *
1292 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1293 {
1294 	struct ifaddr_container *ifac;
1295 	char *cp, *cp2, *cp3;
1296 	char *cplim;
1297 	struct ifaddr *ifa_maybe = NULL;
1298 	u_int af = addr->sa_family;
1299 
1300 	if (af >= AF_MAX)
1301 		return (0);
1302 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1303 		struct ifaddr *ifa = ifac->ifa;
1304 
1305 		if (ifa->ifa_addr->sa_family != af)
1306 			continue;
1307 		if (ifa_maybe == NULL)
1308 			ifa_maybe = ifa;
1309 		if (ifa->ifa_netmask == NULL) {
1310 			if (sa_equal(addr, ifa->ifa_addr) ||
1311 			    (ifa->ifa_dstaddr != NULL &&
1312 			     sa_equal(addr, ifa->ifa_dstaddr)))
1313 				return (ifa);
1314 			continue;
1315 		}
1316 		if (ifp->if_flags & IFF_POINTOPOINT) {
1317 			if (sa_equal(addr, ifa->ifa_dstaddr))
1318 				return (ifa);
1319 		} else {
1320 			cp = addr->sa_data;
1321 			cp2 = ifa->ifa_addr->sa_data;
1322 			cp3 = ifa->ifa_netmask->sa_data;
1323 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1324 			for (; cp3 < cplim; cp3++)
1325 				if ((*cp++ ^ *cp2++) & *cp3)
1326 					break;
1327 			if (cp3 == cplim)
1328 				return (ifa);
1329 		}
1330 	}
1331 	return (ifa_maybe);
1332 }
1333 
1334 /*
1335  * Default action when installing a route with a Link Level gateway.
1336  * Lookup an appropriate real ifa to point to.
1337  * This should be moved to /sys/net/link.c eventually.
1338  */
1339 static void
1340 link_rtrequest(int cmd, struct rtentry *rt)
1341 {
1342 	struct ifaddr *ifa;
1343 	struct sockaddr *dst;
1344 	struct ifnet *ifp;
1345 
1346 	if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1347 	    (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1348 		return;
1349 	ifa = ifaof_ifpforaddr(dst, ifp);
1350 	if (ifa != NULL) {
1351 		IFAFREE(rt->rt_ifa);
1352 		IFAREF(ifa);
1353 		rt->rt_ifa = ifa;
1354 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1355 			ifa->ifa_rtrequest(cmd, rt);
1356 	}
1357 }
1358 
1359 /*
1360  * Mark an interface down and notify protocols of
1361  * the transition.
1362  * NOTE: must be called at splnet or eqivalent.
1363  */
1364 void
1365 if_unroute(struct ifnet *ifp, int flag, int fam)
1366 {
1367 	struct ifaddr_container *ifac;
1368 
1369 	ifp->if_flags &= ~flag;
1370 	getmicrotime(&ifp->if_lastchange);
1371 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1372 		struct ifaddr *ifa = ifac->ifa;
1373 
1374 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1375 			kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1376 	}
1377 	ifq_purge_all(&ifp->if_snd);
1378 	rt_ifmsg(ifp);
1379 }
1380 
1381 /*
1382  * Mark an interface up and notify protocols of
1383  * the transition.
1384  * NOTE: must be called at splnet or eqivalent.
1385  */
1386 void
1387 if_route(struct ifnet *ifp, int flag, int fam)
1388 {
1389 	struct ifaddr_container *ifac;
1390 
1391 	ifq_purge_all(&ifp->if_snd);
1392 	ifp->if_flags |= flag;
1393 	getmicrotime(&ifp->if_lastchange);
1394 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1395 		struct ifaddr *ifa = ifac->ifa;
1396 
1397 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1398 			kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1399 	}
1400 	rt_ifmsg(ifp);
1401 #ifdef INET6
1402 	in6_if_up(ifp);
1403 #endif
1404 }
1405 
1406 /*
1407  * Mark an interface down and notify protocols of the transition.  An
1408  * interface going down is also considered to be a synchronizing event.
1409  * We must ensure that all packet processing related to the interface
1410  * has completed before we return so e.g. the caller can free the ifnet
1411  * structure that the mbufs may be referencing.
1412  *
1413  * NOTE: must be called at splnet or eqivalent.
1414  */
1415 void
1416 if_down(struct ifnet *ifp)
1417 {
1418 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1419 	netmsg_service_sync();
1420 }
1421 
1422 /*
1423  * Mark an interface up and notify protocols of
1424  * the transition.
1425  * NOTE: must be called at splnet or eqivalent.
1426  */
1427 void
1428 if_up(struct ifnet *ifp)
1429 {
1430 	if_route(ifp, IFF_UP, AF_UNSPEC);
1431 }
1432 
1433 /*
1434  * Process a link state change.
1435  * NOTE: must be called at splsoftnet or equivalent.
1436  */
1437 void
1438 if_link_state_change(struct ifnet *ifp)
1439 {
1440 	int link_state = ifp->if_link_state;
1441 
1442 	rt_ifmsg(ifp);
1443 	devctl_notify("IFNET", ifp->if_xname,
1444 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1445 }
1446 
1447 /*
1448  * Handle interface watchdog timer routines.  Called
1449  * from softclock, we decrement timers (if set) and
1450  * call the appropriate interface routine on expiration.
1451  */
1452 static void
1453 if_slowtimo(void *arg)
1454 {
1455 	struct ifnet *ifp;
1456 
1457 	crit_enter();
1458 
1459 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1460 		if (if_stats_compat) {
1461 			IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
1462 			IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
1463 			IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
1464 			IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors);
1465 			IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
1466 			IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
1467 			IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
1468 			IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
1469 			IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
1470 			IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
1471 			IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
1472 		}
1473 
1474 		if (ifp->if_timer == 0 || --ifp->if_timer)
1475 			continue;
1476 		if (ifp->if_watchdog) {
1477 			if (ifnet_tryserialize_all(ifp)) {
1478 				(*ifp->if_watchdog)(ifp);
1479 				ifnet_deserialize_all(ifp);
1480 			} else {
1481 				/* try again next timeout */
1482 				++ifp->if_timer;
1483 			}
1484 		}
1485 	}
1486 
1487 	crit_exit();
1488 
1489 	callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1490 }
1491 
1492 /*
1493  * Map interface name to
1494  * interface structure pointer.
1495  */
1496 struct ifnet *
1497 ifunit(const char *name)
1498 {
1499 	struct ifnet *ifp;
1500 
1501 	/*
1502 	 * Search all the interfaces for this name/number
1503 	 */
1504 
1505 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1506 		if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1507 			break;
1508 	}
1509 	return (ifp);
1510 }
1511 
1512 
1513 /*
1514  * Map interface name in a sockaddr_dl to
1515  * interface structure pointer.
1516  */
1517 struct ifnet *
1518 if_withname(struct sockaddr *sa)
1519 {
1520 	char ifname[IFNAMSIZ+1];
1521 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1522 
1523 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1524 	     (sdl->sdl_nlen > IFNAMSIZ) )
1525 		return NULL;
1526 
1527 	/*
1528 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1529 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1530 	 * and there might not be room to put the trailing null anyway, so we
1531 	 * make a local copy that we know we can null terminate safely.
1532 	 */
1533 
1534 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1535 	ifname[sdl->sdl_nlen] = '\0';
1536 	return ifunit(ifname);
1537 }
1538 
1539 
1540 /*
1541  * Interface ioctls.
1542  */
1543 int
1544 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1545 {
1546 	struct ifnet *ifp;
1547 	struct ifreq *ifr;
1548 	struct ifstat *ifs;
1549 	int error;
1550 	short oif_flags;
1551 	int new_flags;
1552 #ifdef COMPAT_43
1553 	int ocmd;
1554 #endif
1555 	size_t namelen, onamelen;
1556 	char new_name[IFNAMSIZ];
1557 	struct ifaddr *ifa;
1558 	struct sockaddr_dl *sdl;
1559 
1560 	switch (cmd) {
1561 	case SIOCGIFCONF:
1562 	case OSIOCGIFCONF:
1563 		return (ifconf(cmd, data, cred));
1564 	default:
1565 		break;
1566 	}
1567 
1568 	ifr = (struct ifreq *)data;
1569 
1570 	switch (cmd) {
1571 	case SIOCIFCREATE:
1572 	case SIOCIFCREATE2:
1573 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1574 			return (error);
1575 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1576 		    	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1577 	case SIOCIFDESTROY:
1578 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1579 			return (error);
1580 		return (if_clone_destroy(ifr->ifr_name));
1581 	case SIOCIFGCLONERS:
1582 		return (if_clone_list((struct if_clonereq *)data));
1583 	default:
1584 		break;
1585 	}
1586 
1587 	/*
1588 	 * Nominal ioctl through interface, lookup the ifp and obtain a
1589 	 * lock to serialize the ifconfig ioctl operation.
1590 	 */
1591 	ifp = ifunit(ifr->ifr_name);
1592 	if (ifp == NULL)
1593 		return (ENXIO);
1594 	error = 0;
1595 	mtx_lock(&ifp->if_ioctl_mtx);
1596 
1597 	switch (cmd) {
1598 	case SIOCGIFINDEX:
1599 		ifr->ifr_index = ifp->if_index;
1600 		break;
1601 
1602 	case SIOCGIFFLAGS:
1603 		ifr->ifr_flags = ifp->if_flags;
1604 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1605 		break;
1606 
1607 	case SIOCGIFCAP:
1608 		ifr->ifr_reqcap = ifp->if_capabilities;
1609 		ifr->ifr_curcap = ifp->if_capenable;
1610 		break;
1611 
1612 	case SIOCGIFMETRIC:
1613 		ifr->ifr_metric = ifp->if_metric;
1614 		break;
1615 
1616 	case SIOCGIFMTU:
1617 		ifr->ifr_mtu = ifp->if_mtu;
1618 		break;
1619 
1620 	case SIOCGIFTSOLEN:
1621 		ifr->ifr_tsolen = ifp->if_tsolen;
1622 		break;
1623 
1624 	case SIOCGIFDATA:
1625 		error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1626 				sizeof(ifp->if_data));
1627 		break;
1628 
1629 	case SIOCGIFPHYS:
1630 		ifr->ifr_phys = ifp->if_physical;
1631 		break;
1632 
1633 	case SIOCGIFPOLLCPU:
1634 		ifr->ifr_pollcpu = -1;
1635 		break;
1636 
1637 	case SIOCSIFPOLLCPU:
1638 		break;
1639 
1640 	case SIOCSIFFLAGS:
1641 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1642 		if (error)
1643 			break;
1644 		new_flags = (ifr->ifr_flags & 0xffff) |
1645 		    (ifr->ifr_flagshigh << 16);
1646 		if (ifp->if_flags & IFF_SMART) {
1647 			/* Smart drivers twiddle their own routes */
1648 		} else if (ifp->if_flags & IFF_UP &&
1649 		    (new_flags & IFF_UP) == 0) {
1650 			crit_enter();
1651 			if_down(ifp);
1652 			crit_exit();
1653 		} else if (new_flags & IFF_UP &&
1654 		    (ifp->if_flags & IFF_UP) == 0) {
1655 			crit_enter();
1656 			if_up(ifp);
1657 			crit_exit();
1658 		}
1659 
1660 #ifdef IFPOLL_ENABLE
1661 		if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1662 			if (new_flags & IFF_NPOLLING)
1663 				ifpoll_register(ifp);
1664 			else
1665 				ifpoll_deregister(ifp);
1666 		}
1667 #endif
1668 
1669 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1670 			(new_flags &~ IFF_CANTCHANGE);
1671 		if (new_flags & IFF_PPROMISC) {
1672 			/* Permanently promiscuous mode requested */
1673 			ifp->if_flags |= IFF_PROMISC;
1674 		} else if (ifp->if_pcount == 0) {
1675 			ifp->if_flags &= ~IFF_PROMISC;
1676 		}
1677 		if (ifp->if_ioctl) {
1678 			ifnet_serialize_all(ifp);
1679 			ifp->if_ioctl(ifp, cmd, data, cred);
1680 			ifnet_deserialize_all(ifp);
1681 		}
1682 		getmicrotime(&ifp->if_lastchange);
1683 		break;
1684 
1685 	case SIOCSIFCAP:
1686 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1687 		if (error)
1688 			break;
1689 		if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1690 			error = EINVAL;
1691 			break;
1692 		}
1693 		ifnet_serialize_all(ifp);
1694 		ifp->if_ioctl(ifp, cmd, data, cred);
1695 		ifnet_deserialize_all(ifp);
1696 		break;
1697 
1698 	case SIOCSIFNAME:
1699 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1700 		if (error)
1701 			break;
1702 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1703 		if (error)
1704 			break;
1705 		if (new_name[0] == '\0') {
1706 			error = EINVAL;
1707 			break;
1708 		}
1709 		if (ifunit(new_name) != NULL) {
1710 			error = EEXIST;
1711 			break;
1712 		}
1713 
1714 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1715 
1716 		/* Announce the departure of the interface. */
1717 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1718 
1719 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1720 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1721 		/* XXX IFA_LOCK(ifa); */
1722 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1723 		namelen = strlen(new_name);
1724 		onamelen = sdl->sdl_nlen;
1725 		/*
1726 		 * Move the address if needed.  This is safe because we
1727 		 * allocate space for a name of length IFNAMSIZ when we
1728 		 * create this in if_attach().
1729 		 */
1730 		if (namelen != onamelen) {
1731 			bcopy(sdl->sdl_data + onamelen,
1732 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1733 		}
1734 		bcopy(new_name, sdl->sdl_data, namelen);
1735 		sdl->sdl_nlen = namelen;
1736 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1737 		bzero(sdl->sdl_data, onamelen);
1738 		while (namelen != 0)
1739 			sdl->sdl_data[--namelen] = 0xff;
1740 		/* XXX IFA_UNLOCK(ifa) */
1741 
1742 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1743 
1744 		/* Announce the return of the interface. */
1745 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1746 		break;
1747 
1748 	case SIOCSIFMETRIC:
1749 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1750 		if (error)
1751 			break;
1752 		ifp->if_metric = ifr->ifr_metric;
1753 		getmicrotime(&ifp->if_lastchange);
1754 		break;
1755 
1756 	case SIOCSIFPHYS:
1757 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1758 		if (error)
1759 			break;
1760 		if (ifp->if_ioctl == NULL) {
1761 		        error = EOPNOTSUPP;
1762 			break;
1763 		}
1764 		ifnet_serialize_all(ifp);
1765 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1766 		ifnet_deserialize_all(ifp);
1767 		if (error == 0)
1768 			getmicrotime(&ifp->if_lastchange);
1769 		break;
1770 
1771 	case SIOCSIFMTU:
1772 	{
1773 		u_long oldmtu = ifp->if_mtu;
1774 
1775 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1776 		if (error)
1777 			break;
1778 		if (ifp->if_ioctl == NULL) {
1779 			error = EOPNOTSUPP;
1780 			break;
1781 		}
1782 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1783 			error = EINVAL;
1784 			break;
1785 		}
1786 		ifnet_serialize_all(ifp);
1787 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1788 		ifnet_deserialize_all(ifp);
1789 		if (error == 0) {
1790 			getmicrotime(&ifp->if_lastchange);
1791 			rt_ifmsg(ifp);
1792 		}
1793 		/*
1794 		 * If the link MTU changed, do network layer specific procedure.
1795 		 */
1796 		if (ifp->if_mtu != oldmtu) {
1797 #ifdef INET6
1798 			nd6_setmtu(ifp);
1799 #endif
1800 		}
1801 		break;
1802 	}
1803 
1804 	case SIOCSIFTSOLEN:
1805 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1806 		if (error)
1807 			break;
1808 
1809 		/* XXX need driver supplied upper limit */
1810 		if (ifr->ifr_tsolen <= 0) {
1811 			error = EINVAL;
1812 			break;
1813 		}
1814 		ifp->if_tsolen = ifr->ifr_tsolen;
1815 		break;
1816 
1817 	case SIOCADDMULTI:
1818 	case SIOCDELMULTI:
1819 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1820 		if (error)
1821 			break;
1822 
1823 		/* Don't allow group membership on non-multicast interfaces. */
1824 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1825 			error = EOPNOTSUPP;
1826 			break;
1827 		}
1828 
1829 		/* Don't let users screw up protocols' entries. */
1830 		if (ifr->ifr_addr.sa_family != AF_LINK) {
1831 			error = EINVAL;
1832 			break;
1833 		}
1834 
1835 		if (cmd == SIOCADDMULTI) {
1836 			struct ifmultiaddr *ifma;
1837 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1838 		} else {
1839 			error = if_delmulti(ifp, &ifr->ifr_addr);
1840 		}
1841 		if (error == 0)
1842 			getmicrotime(&ifp->if_lastchange);
1843 		break;
1844 
1845 	case SIOCSIFPHYADDR:
1846 	case SIOCDIFPHYADDR:
1847 #ifdef INET6
1848 	case SIOCSIFPHYADDR_IN6:
1849 #endif
1850 	case SIOCSLIFPHYADDR:
1851         case SIOCSIFMEDIA:
1852 	case SIOCSIFGENERIC:
1853 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1854 		if (error)
1855 			break;
1856 		if (ifp->if_ioctl == 0) {
1857 			error = EOPNOTSUPP;
1858 			break;
1859 		}
1860 		ifnet_serialize_all(ifp);
1861 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1862 		ifnet_deserialize_all(ifp);
1863 		if (error == 0)
1864 			getmicrotime(&ifp->if_lastchange);
1865 		break;
1866 
1867 	case SIOCGIFSTATUS:
1868 		ifs = (struct ifstat *)data;
1869 		ifs->ascii[0] = '\0';
1870 		/* fall through */
1871 	case SIOCGIFPSRCADDR:
1872 	case SIOCGIFPDSTADDR:
1873 	case SIOCGLIFPHYADDR:
1874 	case SIOCGIFMEDIA:
1875 	case SIOCGIFGENERIC:
1876 		if (ifp->if_ioctl == NULL) {
1877 			error = EOPNOTSUPP;
1878 			break;
1879 		}
1880 		ifnet_serialize_all(ifp);
1881 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1882 		ifnet_deserialize_all(ifp);
1883 		break;
1884 
1885 	case SIOCSIFLLADDR:
1886 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1887 		if (error)
1888 			break;
1889 		error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1890 				     ifr->ifr_addr.sa_len);
1891 		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1892 		break;
1893 
1894 	default:
1895 		oif_flags = ifp->if_flags;
1896 		if (so->so_proto == 0) {
1897 			error = EOPNOTSUPP;
1898 			break;
1899 		}
1900 #ifndef COMPAT_43
1901 		error = so_pru_control_direct(so, cmd, data, ifp);
1902 #else
1903 		ocmd = cmd;
1904 
1905 		switch (cmd) {
1906 		case SIOCSIFDSTADDR:
1907 		case SIOCSIFADDR:
1908 		case SIOCSIFBRDADDR:
1909 		case SIOCSIFNETMASK:
1910 #if BYTE_ORDER != BIG_ENDIAN
1911 			if (ifr->ifr_addr.sa_family == 0 &&
1912 			    ifr->ifr_addr.sa_len < 16) {
1913 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1914 				ifr->ifr_addr.sa_len = 16;
1915 			}
1916 #else
1917 			if (ifr->ifr_addr.sa_len == 0)
1918 				ifr->ifr_addr.sa_len = 16;
1919 #endif
1920 			break;
1921 		case OSIOCGIFADDR:
1922 			cmd = SIOCGIFADDR;
1923 			break;
1924 		case OSIOCGIFDSTADDR:
1925 			cmd = SIOCGIFDSTADDR;
1926 			break;
1927 		case OSIOCGIFBRDADDR:
1928 			cmd = SIOCGIFBRDADDR;
1929 			break;
1930 		case OSIOCGIFNETMASK:
1931 			cmd = SIOCGIFNETMASK;
1932 			break;
1933 		default:
1934 			break;
1935 		}
1936 
1937 		error = so_pru_control_direct(so, cmd, data, ifp);
1938 
1939 		switch (ocmd) {
1940 		case OSIOCGIFADDR:
1941 		case OSIOCGIFDSTADDR:
1942 		case OSIOCGIFBRDADDR:
1943 		case OSIOCGIFNETMASK:
1944 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1945 			break;
1946 		}
1947 #endif /* COMPAT_43 */
1948 
1949 		if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1950 #ifdef INET6
1951 			DELAY(100);/* XXX: temporary workaround for fxp issue*/
1952 			if (ifp->if_flags & IFF_UP) {
1953 				crit_enter();
1954 				in6_if_up(ifp);
1955 				crit_exit();
1956 			}
1957 #endif
1958 		}
1959 		break;
1960 	}
1961 
1962 	mtx_unlock(&ifp->if_ioctl_mtx);
1963 	return (error);
1964 }
1965 
1966 /*
1967  * Set/clear promiscuous mode on interface ifp based on the truth value
1968  * of pswitch.  The calls are reference counted so that only the first
1969  * "on" request actually has an effect, as does the final "off" request.
1970  * Results are undefined if the "off" and "on" requests are not matched.
1971  */
1972 int
1973 ifpromisc(struct ifnet *ifp, int pswitch)
1974 {
1975 	struct ifreq ifr;
1976 	int error;
1977 	int oldflags;
1978 
1979 	oldflags = ifp->if_flags;
1980 	if (ifp->if_flags & IFF_PPROMISC) {
1981 		/* Do nothing if device is in permanently promiscuous mode */
1982 		ifp->if_pcount += pswitch ? 1 : -1;
1983 		return (0);
1984 	}
1985 	if (pswitch) {
1986 		/*
1987 		 * If the device is not configured up, we cannot put it in
1988 		 * promiscuous mode.
1989 		 */
1990 		if ((ifp->if_flags & IFF_UP) == 0)
1991 			return (ENETDOWN);
1992 		if (ifp->if_pcount++ != 0)
1993 			return (0);
1994 		ifp->if_flags |= IFF_PROMISC;
1995 		log(LOG_INFO, "%s: promiscuous mode enabled\n",
1996 		    ifp->if_xname);
1997 	} else {
1998 		if (--ifp->if_pcount > 0)
1999 			return (0);
2000 		ifp->if_flags &= ~IFF_PROMISC;
2001 		log(LOG_INFO, "%s: promiscuous mode disabled\n",
2002 		    ifp->if_xname);
2003 	}
2004 	ifr.ifr_flags = ifp->if_flags;
2005 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
2006 	ifnet_serialize_all(ifp);
2007 	error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
2008 	ifnet_deserialize_all(ifp);
2009 	if (error == 0)
2010 		rt_ifmsg(ifp);
2011 	else
2012 		ifp->if_flags = oldflags;
2013 	return error;
2014 }
2015 
2016 /*
2017  * Return interface configuration
2018  * of system.  List may be used
2019  * in later ioctl's (above) to get
2020  * other information.
2021  */
2022 static int
2023 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
2024 {
2025 	struct ifconf *ifc = (struct ifconf *)data;
2026 	struct ifnet *ifp;
2027 	struct sockaddr *sa;
2028 	struct ifreq ifr, *ifrp;
2029 	int space = ifc->ifc_len, error = 0;
2030 
2031 	ifrp = ifc->ifc_req;
2032 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2033 		struct ifaddr_container *ifac;
2034 		int addrs;
2035 
2036 		if (space <= sizeof ifr)
2037 			break;
2038 
2039 		/*
2040 		 * Zero the stack declared structure first to prevent
2041 		 * memory disclosure.
2042 		 */
2043 		bzero(&ifr, sizeof(ifr));
2044 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2045 		    >= sizeof(ifr.ifr_name)) {
2046 			error = ENAMETOOLONG;
2047 			break;
2048 		}
2049 
2050 		addrs = 0;
2051 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2052 			struct ifaddr *ifa = ifac->ifa;
2053 
2054 			if (space <= sizeof ifr)
2055 				break;
2056 			sa = ifa->ifa_addr;
2057 			if (cred->cr_prison &&
2058 			    prison_if(cred, sa))
2059 				continue;
2060 			addrs++;
2061 #ifdef COMPAT_43
2062 			if (cmd == OSIOCGIFCONF) {
2063 				struct osockaddr *osa =
2064 					 (struct osockaddr *)&ifr.ifr_addr;
2065 				ifr.ifr_addr = *sa;
2066 				osa->sa_family = sa->sa_family;
2067 				error = copyout(&ifr, ifrp, sizeof ifr);
2068 				ifrp++;
2069 			} else
2070 #endif
2071 			if (sa->sa_len <= sizeof(*sa)) {
2072 				ifr.ifr_addr = *sa;
2073 				error = copyout(&ifr, ifrp, sizeof ifr);
2074 				ifrp++;
2075 			} else {
2076 				if (space < (sizeof ifr) + sa->sa_len -
2077 					    sizeof(*sa))
2078 					break;
2079 				space -= sa->sa_len - sizeof(*sa);
2080 				error = copyout(&ifr, ifrp,
2081 						sizeof ifr.ifr_name);
2082 				if (error == 0)
2083 					error = copyout(sa, &ifrp->ifr_addr,
2084 							sa->sa_len);
2085 				ifrp = (struct ifreq *)
2086 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
2087 			}
2088 			if (error)
2089 				break;
2090 			space -= sizeof ifr;
2091 		}
2092 		if (error)
2093 			break;
2094 		if (!addrs) {
2095 			bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2096 			error = copyout(&ifr, ifrp, sizeof ifr);
2097 			if (error)
2098 				break;
2099 			space -= sizeof ifr;
2100 			ifrp++;
2101 		}
2102 	}
2103 	ifc->ifc_len -= space;
2104 	return (error);
2105 }
2106 
2107 /*
2108  * Just like if_promisc(), but for all-multicast-reception mode.
2109  */
2110 int
2111 if_allmulti(struct ifnet *ifp, int onswitch)
2112 {
2113 	int error = 0;
2114 	struct ifreq ifr;
2115 
2116 	crit_enter();
2117 
2118 	if (onswitch) {
2119 		if (ifp->if_amcount++ == 0) {
2120 			ifp->if_flags |= IFF_ALLMULTI;
2121 			ifr.ifr_flags = ifp->if_flags;
2122 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2123 			ifnet_serialize_all(ifp);
2124 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2125 					      NULL);
2126 			ifnet_deserialize_all(ifp);
2127 		}
2128 	} else {
2129 		if (ifp->if_amcount > 1) {
2130 			ifp->if_amcount--;
2131 		} else {
2132 			ifp->if_amcount = 0;
2133 			ifp->if_flags &= ~IFF_ALLMULTI;
2134 			ifr.ifr_flags = ifp->if_flags;
2135 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
2136 			ifnet_serialize_all(ifp);
2137 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2138 					      NULL);
2139 			ifnet_deserialize_all(ifp);
2140 		}
2141 	}
2142 
2143 	crit_exit();
2144 
2145 	if (error == 0)
2146 		rt_ifmsg(ifp);
2147 	return error;
2148 }
2149 
2150 /*
2151  * Add a multicast listenership to the interface in question.
2152  * The link layer provides a routine which converts
2153  */
2154 int
2155 if_addmulti(
2156 	struct ifnet *ifp,	/* interface to manipulate */
2157 	struct sockaddr *sa,	/* address to add */
2158 	struct ifmultiaddr **retifma)
2159 {
2160 	struct sockaddr *llsa, *dupsa;
2161 	int error;
2162 	struct ifmultiaddr *ifma;
2163 
2164 	/*
2165 	 * If the matching multicast address already exists
2166 	 * then don't add a new one, just add a reference
2167 	 */
2168 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2169 		if (sa_equal(sa, ifma->ifma_addr)) {
2170 			ifma->ifma_refcount++;
2171 			if (retifma)
2172 				*retifma = ifma;
2173 			return 0;
2174 		}
2175 	}
2176 
2177 	/*
2178 	 * Give the link layer a chance to accept/reject it, and also
2179 	 * find out which AF_LINK address this maps to, if it isn't one
2180 	 * already.
2181 	 */
2182 	if (ifp->if_resolvemulti) {
2183 		ifnet_serialize_all(ifp);
2184 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2185 		ifnet_deserialize_all(ifp);
2186 		if (error)
2187 			return error;
2188 	} else {
2189 		llsa = NULL;
2190 	}
2191 
2192 	ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2193 	dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2194 	bcopy(sa, dupsa, sa->sa_len);
2195 
2196 	ifma->ifma_addr = dupsa;
2197 	ifma->ifma_lladdr = llsa;
2198 	ifma->ifma_ifp = ifp;
2199 	ifma->ifma_refcount = 1;
2200 	ifma->ifma_protospec = 0;
2201 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2202 
2203 	/*
2204 	 * Some network interfaces can scan the address list at
2205 	 * interrupt time; lock them out.
2206 	 */
2207 	crit_enter();
2208 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2209 	crit_exit();
2210 	if (retifma)
2211 		*retifma = ifma;
2212 
2213 	if (llsa != NULL) {
2214 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2215 			if (sa_equal(ifma->ifma_addr, llsa))
2216 				break;
2217 		}
2218 		if (ifma) {
2219 			ifma->ifma_refcount++;
2220 		} else {
2221 			ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2222 			dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2223 			bcopy(llsa, dupsa, llsa->sa_len);
2224 			ifma->ifma_addr = dupsa;
2225 			ifma->ifma_ifp = ifp;
2226 			ifma->ifma_refcount = 1;
2227 			crit_enter();
2228 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2229 			crit_exit();
2230 		}
2231 	}
2232 	/*
2233 	 * We are certain we have added something, so call down to the
2234 	 * interface to let them know about it.
2235 	 */
2236 	crit_enter();
2237 	ifnet_serialize_all(ifp);
2238 	if (ifp->if_ioctl)
2239 		ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2240 	ifnet_deserialize_all(ifp);
2241 	crit_exit();
2242 
2243 	return 0;
2244 }
2245 
2246 /*
2247  * Remove a reference to a multicast address on this interface.  Yell
2248  * if the request does not match an existing membership.
2249  */
2250 int
2251 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2252 {
2253 	struct ifmultiaddr *ifma;
2254 
2255 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2256 		if (sa_equal(sa, ifma->ifma_addr))
2257 			break;
2258 	if (ifma == NULL)
2259 		return ENOENT;
2260 
2261 	if (ifma->ifma_refcount > 1) {
2262 		ifma->ifma_refcount--;
2263 		return 0;
2264 	}
2265 
2266 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2267 	sa = ifma->ifma_lladdr;
2268 	crit_enter();
2269 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2270 	/*
2271 	 * Make sure the interface driver is notified
2272 	 * in the case of a link layer mcast group being left.
2273 	 */
2274 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2275 		ifnet_serialize_all(ifp);
2276 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2277 		ifnet_deserialize_all(ifp);
2278 	}
2279 	crit_exit();
2280 	kfree(ifma->ifma_addr, M_IFMADDR);
2281 	kfree(ifma, M_IFMADDR);
2282 	if (sa == NULL)
2283 		return 0;
2284 
2285 	/*
2286 	 * Now look for the link-layer address which corresponds to
2287 	 * this network address.  It had been squirreled away in
2288 	 * ifma->ifma_lladdr for this purpose (so we don't have
2289 	 * to call ifp->if_resolvemulti() again), and we saved that
2290 	 * value in sa above.  If some nasty deleted the
2291 	 * link-layer address out from underneath us, we can deal because
2292 	 * the address we stored was is not the same as the one which was
2293 	 * in the record for the link-layer address.  (So we don't complain
2294 	 * in that case.)
2295 	 */
2296 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2297 		if (sa_equal(sa, ifma->ifma_addr))
2298 			break;
2299 	if (ifma == NULL)
2300 		return 0;
2301 
2302 	if (ifma->ifma_refcount > 1) {
2303 		ifma->ifma_refcount--;
2304 		return 0;
2305 	}
2306 
2307 	crit_enter();
2308 	ifnet_serialize_all(ifp);
2309 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2310 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2311 	ifnet_deserialize_all(ifp);
2312 	crit_exit();
2313 	kfree(ifma->ifma_addr, M_IFMADDR);
2314 	kfree(sa, M_IFMADDR);
2315 	kfree(ifma, M_IFMADDR);
2316 
2317 	return 0;
2318 }
2319 
2320 /*
2321  * Delete all multicast group membership for an interface.
2322  * Should be used to quickly flush all multicast filters.
2323  */
2324 void
2325 if_delallmulti(struct ifnet *ifp)
2326 {
2327 	struct ifmultiaddr *ifma;
2328 	struct ifmultiaddr *next;
2329 
2330 	TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2331 		if_delmulti(ifp, ifma->ifma_addr);
2332 }
2333 
2334 
2335 /*
2336  * Set the link layer address on an interface.
2337  *
2338  * At this time we only support certain types of interfaces,
2339  * and we don't allow the length of the address to change.
2340  */
2341 int
2342 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2343 {
2344 	struct sockaddr_dl *sdl;
2345 	struct ifreq ifr;
2346 
2347 	sdl = IF_LLSOCKADDR(ifp);
2348 	if (sdl == NULL)
2349 		return (EINVAL);
2350 	if (len != sdl->sdl_alen)	/* don't allow length to change */
2351 		return (EINVAL);
2352 	switch (ifp->if_type) {
2353 	case IFT_ETHER:			/* these types use struct arpcom */
2354 	case IFT_XETHER:
2355 	case IFT_L2VLAN:
2356 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2357 		bcopy(lladdr, LLADDR(sdl), len);
2358 		break;
2359 	default:
2360 		return (ENODEV);
2361 	}
2362 	/*
2363 	 * If the interface is already up, we need
2364 	 * to re-init it in order to reprogram its
2365 	 * address filter.
2366 	 */
2367 	ifnet_serialize_all(ifp);
2368 	if ((ifp->if_flags & IFF_UP) != 0) {
2369 #ifdef INET
2370 		struct ifaddr_container *ifac;
2371 #endif
2372 
2373 		ifp->if_flags &= ~IFF_UP;
2374 		ifr.ifr_flags = ifp->if_flags;
2375 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2376 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2377 			      NULL);
2378 		ifp->if_flags |= IFF_UP;
2379 		ifr.ifr_flags = ifp->if_flags;
2380 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2381 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2382 				 NULL);
2383 #ifdef INET
2384 		/*
2385 		 * Also send gratuitous ARPs to notify other nodes about
2386 		 * the address change.
2387 		 */
2388 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2389 			struct ifaddr *ifa = ifac->ifa;
2390 
2391 			if (ifa->ifa_addr != NULL &&
2392 			    ifa->ifa_addr->sa_family == AF_INET)
2393 				arp_gratuitous(ifp, ifa);
2394 		}
2395 #endif
2396 	}
2397 	ifnet_deserialize_all(ifp);
2398 	return (0);
2399 }
2400 
2401 struct ifmultiaddr *
2402 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2403 {
2404 	struct ifmultiaddr *ifma;
2405 
2406 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2407 		if (sa_equal(ifma->ifma_addr, sa))
2408 			break;
2409 
2410 	return ifma;
2411 }
2412 
2413 /*
2414  * This function locates the first real ethernet MAC from a network
2415  * card and loads it into node, returning 0 on success or ENOENT if
2416  * no suitable interfaces were found.  It is used by the uuid code to
2417  * generate a unique 6-byte number.
2418  */
2419 int
2420 if_getanyethermac(uint16_t *node, int minlen)
2421 {
2422 	struct ifnet *ifp;
2423 	struct sockaddr_dl *sdl;
2424 
2425 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2426 		if (ifp->if_type != IFT_ETHER)
2427 			continue;
2428 		sdl = IF_LLSOCKADDR(ifp);
2429 		if (sdl->sdl_alen < minlen)
2430 			continue;
2431 		bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2432 		      minlen);
2433 		return(0);
2434 	}
2435 	return (ENOENT);
2436 }
2437 
2438 /*
2439  * The name argument must be a pointer to storage which will last as
2440  * long as the interface does.  For physical devices, the result of
2441  * device_get_name(dev) is a good choice and for pseudo-devices a
2442  * static string works well.
2443  */
2444 void
2445 if_initname(struct ifnet *ifp, const char *name, int unit)
2446 {
2447 	ifp->if_dname = name;
2448 	ifp->if_dunit = unit;
2449 	if (unit != IF_DUNIT_NONE)
2450 		ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2451 	else
2452 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2453 }
2454 
2455 int
2456 if_printf(struct ifnet *ifp, const char *fmt, ...)
2457 {
2458 	__va_list ap;
2459 	int retval;
2460 
2461 	retval = kprintf("%s: ", ifp->if_xname);
2462 	__va_start(ap, fmt);
2463 	retval += kvprintf(fmt, ap);
2464 	__va_end(ap);
2465 	return (retval);
2466 }
2467 
2468 struct ifnet *
2469 if_alloc(uint8_t type)
2470 {
2471         struct ifnet *ifp;
2472 	size_t size;
2473 
2474 	/*
2475 	 * XXX temporary hack until arpcom is setup in if_l2com
2476 	 */
2477 	if (type == IFT_ETHER)
2478 		size = sizeof(struct arpcom);
2479 	else
2480 		size = sizeof(struct ifnet);
2481 
2482 	ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2483 
2484 	ifp->if_type = type;
2485 
2486 	if (if_com_alloc[type] != NULL) {
2487 		ifp->if_l2com = if_com_alloc[type](type, ifp);
2488 		if (ifp->if_l2com == NULL) {
2489 			kfree(ifp, M_IFNET);
2490 			return (NULL);
2491 		}
2492 	}
2493 	return (ifp);
2494 }
2495 
2496 void
2497 if_free(struct ifnet *ifp)
2498 {
2499 	kfree(ifp, M_IFNET);
2500 }
2501 
2502 void
2503 ifq_set_classic(struct ifaltq *ifq)
2504 {
2505 	ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq,
2506 	    ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request);
2507 }
2508 
2509 void
2510 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq,
2511     ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request)
2512 {
2513 	int q;
2514 
2515 	KASSERT(mapsubq != NULL, ("mapsubq is not specified"));
2516 	KASSERT(enqueue != NULL, ("enqueue is not specified"));
2517 	KASSERT(dequeue != NULL, ("dequeue is not specified"));
2518 	KASSERT(request != NULL, ("request is not specified"));
2519 
2520 	ifq->altq_mapsubq = mapsubq;
2521 	for (q = 0; q < ifq->altq_subq_cnt; ++q) {
2522 		struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
2523 
2524 		ifsq->ifsq_enqueue = enqueue;
2525 		ifsq->ifsq_dequeue = dequeue;
2526 		ifsq->ifsq_request = request;
2527 	}
2528 }
2529 
2530 static void
2531 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
2532 {
2533 	m->m_nextpkt = NULL;
2534 	if (ifsq->ifsq_norm_tail == NULL)
2535 		ifsq->ifsq_norm_head = m;
2536 	else
2537 		ifsq->ifsq_norm_tail->m_nextpkt = m;
2538 	ifsq->ifsq_norm_tail = m;
2539 	ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
2540 }
2541 
2542 static void
2543 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
2544 {
2545 	m->m_nextpkt = NULL;
2546 	if (ifsq->ifsq_prio_tail == NULL)
2547 		ifsq->ifsq_prio_head = m;
2548 	else
2549 		ifsq->ifsq_prio_tail->m_nextpkt = m;
2550 	ifsq->ifsq_prio_tail = m;
2551 	ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
2552 	ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len);
2553 }
2554 
2555 static struct mbuf *
2556 ifsq_norm_dequeue(struct ifaltq_subque *ifsq)
2557 {
2558 	struct mbuf *m;
2559 
2560 	m = ifsq->ifsq_norm_head;
2561 	if (m != NULL) {
2562 		if ((ifsq->ifsq_norm_head = m->m_nextpkt) == NULL)
2563 			ifsq->ifsq_norm_tail = NULL;
2564 		m->m_nextpkt = NULL;
2565 		ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
2566 	}
2567 	return m;
2568 }
2569 
2570 static struct mbuf *
2571 ifsq_prio_dequeue(struct ifaltq_subque *ifsq)
2572 {
2573 	struct mbuf *m;
2574 
2575 	m = ifsq->ifsq_prio_head;
2576 	if (m != NULL) {
2577 		if ((ifsq->ifsq_prio_head = m->m_nextpkt) == NULL)
2578 			ifsq->ifsq_prio_tail = NULL;
2579 		m->m_nextpkt = NULL;
2580 		ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
2581 		ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len);
2582 	}
2583 	return m;
2584 }
2585 
2586 int
2587 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
2588     struct altq_pktattr *pa __unused)
2589 {
2590 	M_ASSERTPKTHDR(m);
2591 	if (ifsq->ifsq_len >= ifsq->ifsq_maxlen ||
2592 	    ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) {
2593 		if ((m->m_flags & M_PRIO) &&
2594 		    ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen / 2) &&
2595 		    ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt / 2)) {
2596 			struct mbuf *m_drop;
2597 
2598 			/*
2599 			 * Perform drop-head on normal queue
2600 			 */
2601 			m_drop = ifsq_norm_dequeue(ifsq);
2602 			if (m_drop != NULL) {
2603 				m_freem(m_drop);
2604 				ifsq_prio_enqueue(ifsq, m);
2605 				return 0;
2606 			}
2607 			/* XXX nothing could be dropped? */
2608 		}
2609 		m_freem(m);
2610 		return ENOBUFS;
2611 	} else {
2612 		if (m->m_flags & M_PRIO)
2613 			ifsq_prio_enqueue(ifsq, m);
2614 		else
2615 			ifsq_norm_enqueue(ifsq, m);
2616 		return 0;
2617 	}
2618 }
2619 
2620 struct mbuf *
2621 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op)
2622 {
2623 	struct mbuf *m;
2624 
2625 	switch (op) {
2626 	case ALTDQ_POLL:
2627 		m = ifsq->ifsq_prio_head;
2628 		if (m == NULL)
2629 			m = ifsq->ifsq_norm_head;
2630 		break;
2631 
2632 	case ALTDQ_REMOVE:
2633 		m = ifsq_prio_dequeue(ifsq);
2634 		if (m == NULL)
2635 			m = ifsq_norm_dequeue(ifsq);
2636 		break;
2637 
2638 	default:
2639 		panic("unsupported ALTQ dequeue op: %d", op);
2640 	}
2641 	return m;
2642 }
2643 
2644 int
2645 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg)
2646 {
2647 	switch (req) {
2648 	case ALTRQ_PURGE:
2649 		for (;;) {
2650 			struct mbuf *m;
2651 
2652 			m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE);
2653 			if (m == NULL)
2654 				break;
2655 			m_freem(m);
2656 		}
2657 		break;
2658 
2659 	default:
2660 		panic("unsupported ALTQ request: %d", req);
2661 	}
2662 	return 0;
2663 }
2664 
2665 static void
2666 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched)
2667 {
2668 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
2669 	int running = 0, need_sched;
2670 
2671 	/*
2672 	 * Try to do direct ifnet.if_start on the subqueue first, if there is
2673 	 * contention on the subqueue hardware serializer, ifnet.if_start on
2674 	 * the subqueue will be scheduled on the subqueue owner CPU.
2675 	 */
2676 	if (!ifsq_tryserialize_hw(ifsq)) {
2677 		/*
2678 		 * Subqueue hardware serializer contention happened,
2679 		 * ifnet.if_start on the subqueue is scheduled on
2680 		 * the subqueue owner CPU, and we keep going.
2681 		 */
2682 		ifsq_ifstart_schedule(ifsq, 1);
2683 		return;
2684 	}
2685 
2686 	if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
2687 		ifp->if_start(ifp, ifsq);
2688 		if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
2689 			running = 1;
2690 	}
2691 	need_sched = ifsq_ifstart_need_schedule(ifsq, running);
2692 
2693 	ifsq_deserialize_hw(ifsq);
2694 
2695 	if (need_sched) {
2696 		/*
2697 		 * More data need to be transmitted, ifnet.if_start on the
2698 		 * subqueue is scheduled on the subqueue owner CPU, and we
2699 		 * keep going.
2700 		 * NOTE: ifnet.if_start subqueue interlock is not released.
2701 		 */
2702 		ifsq_ifstart_schedule(ifsq, force_sched);
2703 	}
2704 }
2705 
2706 /*
2707  * Subqeue packets staging mechanism:
2708  *
2709  * The packets enqueued into the subqueue are staged to a certain amount
2710  * before the ifnet.if_start on the subqueue is called.  In this way, the
2711  * driver could avoid writing to hardware registers upon every packet,
2712  * instead, hardware registers could be written when certain amount of
2713  * packets are put onto hardware TX ring.  The measurement on several modern
2714  * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware
2715  * registers writing aggregation could save ~20% CPU time when 18bytes UDP
2716  * datagrams are transmitted at 1.48Mpps.  The performance improvement by
2717  * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's
2718  * netmap paper (http://info.iet.unipi.it/~luigi/netmap/).
2719  *
2720  * Subqueue packets staging is performed for two entry points into drivers'
2721  * transmission function:
2722  * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try()
2723  * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule()
2724  *
2725  * Subqueue packets staging will be stopped upon any of the following
2726  * conditions:
2727  * - If the count of packets enqueued on the current CPU is great than or
2728  *   equal to ifsq_stage_cntmax. (XXX this should be per-interface)
2729  * - If the total length of packets enqueued on the current CPU is great
2730  *   than or equal to the hardware's MTU - max_protohdr.  max_protohdr is
2731  *   cut from the hardware's MTU mainly bacause a full TCP segment's size
2732  *   is usually less than hardware's MTU.
2733  * - ifsq_ifstart_schedule() is not pending on the current CPU and
2734  *   ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not
2735  *   released.
2736  * - The if_start_rollup(), which is registered as low priority netisr
2737  *   rollup function, is called; probably because no more work is pending
2738  *   for netisr.
2739  *
2740  * NOTE:
2741  * Currently subqueue packet staging is only performed in netisr threads.
2742  */
2743 int
2744 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2745 {
2746 	struct ifaltq *ifq = &ifp->if_snd;
2747 	struct ifaltq_subque *ifsq;
2748 	int error, start = 0, len, mcast = 0, avoid_start = 0;
2749 	struct ifsubq_stage_head *head = NULL;
2750 	struct ifsubq_stage *stage = NULL;
2751 	struct globaldata *gd = mycpu;
2752 	struct thread *td = gd->gd_curthread;
2753 
2754 	crit_enter_quick(td);
2755 
2756 	ifsq = ifq_map_subq(ifq, gd->gd_cpuid);
2757 	ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq);
2758 
2759 	len = m->m_pkthdr.len;
2760 	if (m->m_flags & M_MCAST)
2761 		mcast = 1;
2762 
2763 	if (td->td_type == TD_TYPE_NETISR) {
2764 		head = &ifsubq_stage_heads[mycpuid];
2765 		stage = ifsq_get_stage(ifsq, mycpuid);
2766 
2767 		stage->stg_cnt++;
2768 		stage->stg_len += len;
2769 		if (stage->stg_cnt < ifsq_stage_cntmax &&
2770 		    stage->stg_len < (ifp->if_mtu - max_protohdr))
2771 			avoid_start = 1;
2772 	}
2773 
2774 	ALTQ_SQ_LOCK(ifsq);
2775 	error = ifsq_enqueue_locked(ifsq, m, pa);
2776 	if (error) {
2777 		if (!ifsq_data_ready(ifsq)) {
2778 			ALTQ_SQ_UNLOCK(ifsq);
2779 			crit_exit_quick(td);
2780 			return error;
2781 		}
2782 		avoid_start = 0;
2783 	}
2784 	if (!ifsq_is_started(ifsq)) {
2785 		if (avoid_start) {
2786 			ALTQ_SQ_UNLOCK(ifsq);
2787 
2788 			KKASSERT(!error);
2789 			if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
2790 				ifsq_stage_insert(head, stage);
2791 
2792 			IFNET_STAT_INC(ifp, obytes, len);
2793 			if (mcast)
2794 				IFNET_STAT_INC(ifp, omcasts, 1);
2795 			crit_exit_quick(td);
2796 			return error;
2797 		}
2798 
2799 		/*
2800 		 * Hold the subqueue interlock of ifnet.if_start
2801 		 */
2802 		ifsq_set_started(ifsq);
2803 		start = 1;
2804 	}
2805 	ALTQ_SQ_UNLOCK(ifsq);
2806 
2807 	if (!error) {
2808 		IFNET_STAT_INC(ifp, obytes, len);
2809 		if (mcast)
2810 			IFNET_STAT_INC(ifp, omcasts, 1);
2811 	}
2812 
2813 	if (stage != NULL) {
2814 		if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) {
2815 			KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
2816 			if (!avoid_start) {
2817 				ifsq_stage_remove(head, stage);
2818 				ifsq_ifstart_schedule(ifsq, 1);
2819 			}
2820 			crit_exit_quick(td);
2821 			return error;
2822 		}
2823 
2824 		if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) {
2825 			ifsq_stage_remove(head, stage);
2826 		} else {
2827 			stage->stg_cnt = 0;
2828 			stage->stg_len = 0;
2829 		}
2830 	}
2831 
2832 	if (!start) {
2833 		crit_exit_quick(td);
2834 		return error;
2835 	}
2836 
2837 	ifsq_ifstart_try(ifsq, 0);
2838 
2839 	crit_exit_quick(td);
2840 	return error;
2841 }
2842 
2843 void *
2844 ifa_create(int size, int flags)
2845 {
2846 	struct ifaddr *ifa;
2847 	int i;
2848 
2849 	KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2850 
2851 	ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2852 	if (ifa == NULL)
2853 		return NULL;
2854 
2855 	ifa->ifa_containers =
2856 	    kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container),
2857 	        M_IFADDR, M_WAITOK | M_ZERO);
2858 	ifa->ifa_ncnt = ncpus;
2859 	for (i = 0; i < ncpus; ++i) {
2860 		struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2861 
2862 		ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2863 		ifac->ifa = ifa;
2864 		ifac->ifa_refcnt = 1;
2865 	}
2866 #ifdef IFADDR_DEBUG
2867 	kprintf("alloc ifa %p %d\n", ifa, size);
2868 #endif
2869 	return ifa;
2870 }
2871 
2872 void
2873 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2874 {
2875 	struct ifaddr *ifa = ifac->ifa;
2876 
2877 	KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2878 	KKASSERT(ifac->ifa_refcnt == 0);
2879 	KASSERT(ifac->ifa_listmask == 0,
2880 		("ifa is still on %#x lists", ifac->ifa_listmask));
2881 
2882 	ifac->ifa_magic = IFA_CONTAINER_DEAD;
2883 
2884 #ifdef IFADDR_DEBUG_VERBOSE
2885 	kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2886 #endif
2887 
2888 	KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2889 		("invalid # of ifac, %d", ifa->ifa_ncnt));
2890 	if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2891 #ifdef IFADDR_DEBUG
2892 		kprintf("free ifa %p\n", ifa);
2893 #endif
2894 		kfree(ifa->ifa_containers, M_IFADDR);
2895 		kfree(ifa, M_IFADDR);
2896 	}
2897 }
2898 
2899 static void
2900 ifa_iflink_dispatch(netmsg_t nmsg)
2901 {
2902 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2903 	struct ifaddr *ifa = msg->ifa;
2904 	struct ifnet *ifp = msg->ifp;
2905 	int cpu = mycpuid;
2906 	struct ifaddr_container *ifac;
2907 
2908 	crit_enter();
2909 
2910 	ifac = &ifa->ifa_containers[cpu];
2911 	ASSERT_IFAC_VALID(ifac);
2912 	KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2913 		("ifaddr is on if_addrheads"));
2914 
2915 	ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2916 	if (msg->tail)
2917 		TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2918 	else
2919 		TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2920 
2921 	crit_exit();
2922 
2923 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2924 }
2925 
2926 void
2927 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2928 {
2929 	struct netmsg_ifaddr msg;
2930 
2931 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2932 		    0, ifa_iflink_dispatch);
2933 	msg.ifa = ifa;
2934 	msg.ifp = ifp;
2935 	msg.tail = tail;
2936 
2937 	ifa_domsg(&msg.base.lmsg, 0);
2938 }
2939 
2940 static void
2941 ifa_ifunlink_dispatch(netmsg_t nmsg)
2942 {
2943 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2944 	struct ifaddr *ifa = msg->ifa;
2945 	struct ifnet *ifp = msg->ifp;
2946 	int cpu = mycpuid;
2947 	struct ifaddr_container *ifac;
2948 
2949 	crit_enter();
2950 
2951 	ifac = &ifa->ifa_containers[cpu];
2952 	ASSERT_IFAC_VALID(ifac);
2953 	KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2954 		("ifaddr is not on if_addrhead"));
2955 
2956 	TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2957 	ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2958 
2959 	crit_exit();
2960 
2961 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2962 }
2963 
2964 void
2965 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2966 {
2967 	struct netmsg_ifaddr msg;
2968 
2969 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2970 		    0, ifa_ifunlink_dispatch);
2971 	msg.ifa = ifa;
2972 	msg.ifp = ifp;
2973 
2974 	ifa_domsg(&msg.base.lmsg, 0);
2975 }
2976 
2977 static void
2978 ifa_destroy_dispatch(netmsg_t nmsg)
2979 {
2980 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2981 
2982 	IFAFREE(msg->ifa);
2983 	ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2984 }
2985 
2986 void
2987 ifa_destroy(struct ifaddr *ifa)
2988 {
2989 	struct netmsg_ifaddr msg;
2990 
2991 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2992 		    0, ifa_destroy_dispatch);
2993 	msg.ifa = ifa;
2994 
2995 	ifa_domsg(&msg.base.lmsg, 0);
2996 }
2997 
2998 struct lwkt_port *
2999 ifnet_portfn(int cpu)
3000 {
3001 	return &ifnet_threads[cpu].td_msgport;
3002 }
3003 
3004 void
3005 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
3006 {
3007 	KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
3008 
3009 	if (next_cpu < ncpus)
3010 		lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
3011 	else
3012 		lwkt_replymsg(lmsg, 0);
3013 }
3014 
3015 int
3016 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
3017 {
3018 	KKASSERT(cpu < ncpus);
3019 	return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
3020 }
3021 
3022 void
3023 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
3024 {
3025 	KKASSERT(cpu < ncpus);
3026 	lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
3027 }
3028 
3029 /*
3030  * Generic netmsg service loop.  Some protocols may roll their own but all
3031  * must do the basic command dispatch function call done here.
3032  */
3033 static void
3034 ifnet_service_loop(void *arg __unused)
3035 {
3036 	netmsg_t msg;
3037 
3038 	while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
3039 		KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
3040 		msg->base.nm_dispatch(msg);
3041 	}
3042 }
3043 
3044 static void
3045 if_start_rollup(void)
3046 {
3047 	struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid];
3048 	struct ifsubq_stage *stage;
3049 
3050 	crit_enter();
3051 
3052 	while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) {
3053 		struct ifaltq_subque *ifsq = stage->stg_subq;
3054 		int is_sched = 0;
3055 
3056 		if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)
3057 			is_sched = 1;
3058 		ifsq_stage_remove(head, stage);
3059 
3060 		if (is_sched) {
3061 			ifsq_ifstart_schedule(ifsq, 1);
3062 		} else {
3063 			int start = 0;
3064 
3065 			ALTQ_SQ_LOCK(ifsq);
3066 			if (!ifsq_is_started(ifsq)) {
3067 				/*
3068 				 * Hold the subqueue interlock of
3069 				 * ifnet.if_start
3070 				 */
3071 				ifsq_set_started(ifsq);
3072 				start = 1;
3073 			}
3074 			ALTQ_SQ_UNLOCK(ifsq);
3075 
3076 			if (start)
3077 				ifsq_ifstart_try(ifsq, 1);
3078 		}
3079 		KKASSERT((stage->stg_flags &
3080 		    (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
3081 	}
3082 
3083 	crit_exit();
3084 }
3085 
3086 static void
3087 ifnetinit(void *dummy __unused)
3088 {
3089 	int i;
3090 
3091 	for (i = 0; i < ncpus; ++i) {
3092 		struct thread *thr = &ifnet_threads[i];
3093 
3094 		lwkt_create(ifnet_service_loop, NULL, NULL,
3095 			    thr, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU,
3096 			    i, "ifnet %d", i);
3097 		netmsg_service_port_init(&thr->td_msgport);
3098 		lwkt_schedule(thr);
3099 	}
3100 
3101 	for (i = 0; i < ncpus; ++i)
3102 		TAILQ_INIT(&ifsubq_stage_heads[i].stg_head);
3103 	netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART);
3104 }
3105 
3106 struct ifnet *
3107 ifnet_byindex(unsigned short idx)
3108 {
3109 	if (idx > if_index)
3110 		return NULL;
3111 	return ifindex2ifnet[idx];
3112 }
3113 
3114 struct ifaddr *
3115 ifaddr_byindex(unsigned short idx)
3116 {
3117 	struct ifnet *ifp;
3118 
3119 	ifp = ifnet_byindex(idx);
3120 	if (!ifp)
3121 		return NULL;
3122 	return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
3123 }
3124 
3125 void
3126 if_register_com_alloc(u_char type,
3127     if_com_alloc_t *a, if_com_free_t *f)
3128 {
3129 
3130         KASSERT(if_com_alloc[type] == NULL,
3131             ("if_register_com_alloc: %d already registered", type));
3132         KASSERT(if_com_free[type] == NULL,
3133             ("if_register_com_alloc: %d free already registered", type));
3134 
3135         if_com_alloc[type] = a;
3136         if_com_free[type] = f;
3137 }
3138 
3139 void
3140 if_deregister_com_alloc(u_char type)
3141 {
3142 
3143         KASSERT(if_com_alloc[type] != NULL,
3144             ("if_deregister_com_alloc: %d not registered", type));
3145         KASSERT(if_com_free[type] != NULL,
3146             ("if_deregister_com_alloc: %d free not registered", type));
3147         if_com_alloc[type] = NULL;
3148         if_com_free[type] = NULL;
3149 }
3150 
3151 int
3152 if_ring_count2(int cnt, int cnt_max)
3153 {
3154 	int shift = 0;
3155 
3156 	KASSERT(cnt_max >= 1 && powerof2(cnt_max),
3157 	    ("invalid ring count max %d", cnt_max));
3158 
3159 	if (cnt <= 0)
3160 		cnt = cnt_max;
3161 	if (cnt > ncpus2)
3162 		cnt = ncpus2;
3163 	if (cnt > cnt_max)
3164 		cnt = cnt_max;
3165 
3166 	while ((1 << (shift + 1)) <= cnt)
3167 		++shift;
3168 	cnt = 1 << shift;
3169 
3170 	KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
3171 	    ("calculate cnt %d, ncpus2 %d, cnt max %d",
3172 	     cnt, ncpus2, cnt_max));
3173 	return cnt;
3174 }
3175 
3176 void
3177 ifq_set_maxlen(struct ifaltq *ifq, int len)
3178 {
3179 	ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax);
3180 }
3181 
3182 int
3183 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused)
3184 {
3185 	return ALTQ_SUBQ_INDEX_DEFAULT;
3186 }
3187 
3188 int
3189 ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid)
3190 {
3191 	return (cpuid & ifq->altq_subq_mask);
3192 }
3193 
3194 static void
3195 ifsq_watchdog(void *arg)
3196 {
3197 	struct ifsubq_watchdog *wd = arg;
3198 	struct ifnet *ifp;
3199 
3200 	if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer))
3201 		goto done;
3202 
3203 	ifp = ifsq_get_ifp(wd->wd_subq);
3204 	if (ifnet_tryserialize_all(ifp)) {
3205 		wd->wd_watchdog(wd->wd_subq);
3206 		ifnet_deserialize_all(ifp);
3207 	} else {
3208 		/* try again next timeout */
3209 		wd->wd_timer = 1;
3210 	}
3211 done:
3212 	ifsq_watchdog_reset(wd);
3213 }
3214 
3215 static void
3216 ifsq_watchdog_reset(struct ifsubq_watchdog *wd)
3217 {
3218 	callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd,
3219 	    ifsq_get_cpuid(wd->wd_subq));
3220 }
3221 
3222 void
3223 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq,
3224     ifsq_watchdog_t watchdog)
3225 {
3226 	callout_init_mp(&wd->wd_callout);
3227 	wd->wd_timer = 0;
3228 	wd->wd_subq = ifsq;
3229 	wd->wd_watchdog = watchdog;
3230 }
3231 
3232 void
3233 ifsq_watchdog_start(struct ifsubq_watchdog *wd)
3234 {
3235 	wd->wd_timer = 0;
3236 	ifsq_watchdog_reset(wd);
3237 }
3238 
3239 void
3240 ifsq_watchdog_stop(struct ifsubq_watchdog *wd)
3241 {
3242 	wd->wd_timer = 0;
3243 	callout_stop(&wd->wd_callout);
3244 }
3245