xref: /openbsd/sys/net/if.c (revision 09467b48)
1 /*	$OpenBSD: if.c,v 1.616 2020/07/24 18:17:14 mvs Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "if_wg.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 #include <sys/stdint.h>	/* uintptr_t */
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 static int
147 	if_sffpage_check(const caddr_t);
148 
149 int	if_getgroup(caddr_t, struct ifnet *);
150 int	if_getgroupmembers(caddr_t);
151 int	if_getgroupattribs(caddr_t);
152 int	if_setgroupattribs(caddr_t);
153 int	if_getgrouplist(caddr_t);
154 
155 void	if_linkstate(struct ifnet *);
156 void	if_linkstate_task(void *);
157 
158 int	if_clone_list(struct if_clonereq *);
159 struct if_clone	*if_clone_lookup(const char *, int *);
160 
161 int	if_group_egress_build(void);
162 
163 void	if_watchdog_task(void *);
164 
165 void	if_netisr(void *);
166 
167 #ifdef DDB
168 void	ifa_print_all(void);
169 #endif
170 
171 void	if_qstart_compat(struct ifqueue *);
172 
173 /*
174  * interface index map
175  *
176  * the kernel maintains a mapping of interface indexes to struct ifnet
177  * pointers.
178  *
179  * the map is an array of struct ifnet pointers prefixed by an if_map
180  * structure. the if_map structure stores the length of its array.
181  *
182  * as interfaces are attached to the system, the map is grown on demand
183  * up to USHRT_MAX entries.
184  *
185  * interface index 0 is reserved and represents no interface. this
186  * supports the use of the interface index as the scope for IPv6 link
187  * local addresses, where scope 0 means no scope has been specified.
188  * it also supports the use of interface index as the unique identifier
189  * for network interfaces in SNMP applications as per RFC2863. therefore
190  * if_get(0) returns NULL.
191  */
192 
193 void if_ifp_dtor(void *, void *);
194 void if_map_dtor(void *, void *);
195 struct ifnet *if_ref(struct ifnet *);
196 
197 /*
198  * struct if_map
199  *
200  * bounded array of ifnet srp pointers used to fetch references of live
201  * interfaces with if_get().
202  */
203 
204 struct if_map {
205 	unsigned long		 limit;
206 	/* followed by limit ifnet srp pointers */
207 };
208 
209 /*
210  * struct if_idxmap
211  *
212  * infrastructure to manage updates and accesses to the current if_map.
213  */
214 
215 struct if_idxmap {
216 	unsigned int		 serial;
217 	unsigned int		 count;
218 	struct srp		 map;
219 };
220 
221 void	if_idxmap_init(unsigned int);
222 void	if_idxmap_insert(struct ifnet *);
223 void	if_idxmap_remove(struct ifnet *);
224 
225 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
226 
227 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
228 int if_cloners_count;
229 
230 /* hooks should only be added, deleted, and run from a process context */
231 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
232 void	if_hooks_run(struct task_list *);
233 
234 int	ifq_congestion;
235 
236 int		 netisr;
237 
238 #define	NET_TASKQ	1
239 struct taskq	*nettqmp[NET_TASKQ];
240 
241 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
242 
243 /*
244  * Serialize socket operations to ensure no new sleeping points
245  * are introduced in IP output paths.
246  */
247 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
248 
249 /*
250  * Network interface utility routines.
251  */
252 void
253 ifinit(void)
254 {
255 	unsigned int	i;
256 
257 	/*
258 	 * most machines boot with 4 or 5 interfaces, so size the initial map
259 	 * to accomodate this
260 	 */
261 	if_idxmap_init(8);
262 
263 	for (i = 0; i < NET_TASKQ; i++) {
264 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
265 		if (nettqmp[i] == NULL)
266 			panic("unable to create network taskq %d", i);
267 	}
268 }
269 
270 static struct if_idxmap if_idxmap = {
271 	0,
272 	0,
273 	SRP_INITIALIZER()
274 };
275 
276 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
277 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
278 
279 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
280 
281 void
282 if_idxmap_init(unsigned int limit)
283 {
284 	struct if_map *if_map;
285 	struct srp *map;
286 	unsigned int i;
287 
288 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
289 
290 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
291 	    M_IFADDR, M_WAITOK);
292 
293 	if_map->limit = limit;
294 	map = (struct srp *)(if_map + 1);
295 	for (i = 0; i < limit; i++)
296 		srp_init(&map[i]);
297 
298 	/* this is called early so there's nothing to race with */
299 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
300 }
301 
302 void
303 if_idxmap_insert(struct ifnet *ifp)
304 {
305 	struct if_map *if_map;
306 	struct srp *map;
307 	unsigned int index, i;
308 
309 	refcnt_init(&ifp->if_refcnt);
310 
311 	/* the kernel lock guarantees serialised modifications to if_idxmap */
312 	KERNEL_ASSERT_LOCKED();
313 
314 	if (++if_idxmap.count > USHRT_MAX)
315 		panic("too many interfaces");
316 
317 	if_map = srp_get_locked(&if_idxmap.map);
318 	map = (struct srp *)(if_map + 1);
319 
320 	index = if_idxmap.serial++ & USHRT_MAX;
321 
322 	if (index >= if_map->limit) {
323 		struct if_map *nif_map;
324 		struct srp *nmap;
325 		unsigned int nlimit;
326 		struct ifnet *nifp;
327 
328 		nlimit = if_map->limit * 2;
329 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
330 		    M_IFADDR, M_WAITOK);
331 		nmap = (struct srp *)(nif_map + 1);
332 
333 		nif_map->limit = nlimit;
334 		for (i = 0; i < if_map->limit; i++) {
335 			srp_init(&nmap[i]);
336 			nifp = srp_get_locked(&map[i]);
337 			if (nifp != NULL) {
338 				srp_update_locked(&if_ifp_gc, &nmap[i],
339 				    if_ref(nifp));
340 			}
341 		}
342 
343 		while (i < nlimit) {
344 			srp_init(&nmap[i]);
345 			i++;
346 		}
347 
348 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
349 		if_map = nif_map;
350 		map = nmap;
351 	}
352 
353 	/* pick the next free index */
354 	for (i = 0; i < USHRT_MAX; i++) {
355 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
356 			break;
357 
358 		index = if_idxmap.serial++ & USHRT_MAX;
359 	}
360 
361 	/* commit */
362 	ifp->if_index = index;
363 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
364 }
365 
366 void
367 if_idxmap_remove(struct ifnet *ifp)
368 {
369 	struct if_map *if_map;
370 	struct srp *map;
371 	unsigned int index;
372 
373 	index = ifp->if_index;
374 
375 	/* the kernel lock guarantees serialised modifications to if_idxmap */
376 	KERNEL_ASSERT_LOCKED();
377 
378 	if_map = srp_get_locked(&if_idxmap.map);
379 	KASSERT(index < if_map->limit);
380 
381 	map = (struct srp *)(if_map + 1);
382 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
383 
384 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
385 	if_idxmap.count--;
386 	/* end of if_idxmap modifications */
387 
388 	/* sleep until the last reference is released */
389 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
390 }
391 
392 void
393 if_ifp_dtor(void *null, void *ifp)
394 {
395 	if_put(ifp);
396 }
397 
398 void
399 if_map_dtor(void *null, void *m)
400 {
401 	struct if_map *if_map = m;
402 	struct srp *map = (struct srp *)(if_map + 1);
403 	unsigned int i;
404 
405 	/*
406 	 * dont need to serialize the use of update_locked since this is
407 	 * the last reference to this map. there's nothing to race against.
408 	 */
409 	for (i = 0; i < if_map->limit; i++)
410 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
411 
412 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
413 }
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attachsetup(struct ifnet *ifp)
421 {
422 	unsigned long ifidx;
423 
424 	NET_ASSERT_LOCKED();
425 
426 	TAILQ_INIT(&ifp->if_groups);
427 
428 	if_addgroup(ifp, IFG_ALL);
429 
430 	if_attachdomain(ifp);
431 #if NPF > 0
432 	pfi_attach_ifnet(ifp);
433 #endif
434 
435 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
436 	if_slowtimo(ifp);
437 
438 	if_idxmap_insert(ifp);
439 	KASSERT(if_get(0) == NULL);
440 
441 	ifidx = ifp->if_index;
442 
443 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
444 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
445 
446 	/* Announce the interface. */
447 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
448 }
449 
450 /*
451  * Allocate the link level name for the specified interface.  This
452  * is an attachment helper.  It must be called after ifp->if_addrlen
453  * is initialized, which may not be the case when if_attach() is
454  * called.
455  */
456 void
457 if_alloc_sadl(struct ifnet *ifp)
458 {
459 	unsigned int socksize;
460 	int namelen, masklen;
461 	struct sockaddr_dl *sdl;
462 
463 	/*
464 	 * If the interface already has a link name, release it
465 	 * now.  This is useful for interfaces that can change
466 	 * link types, and thus switch link names often.
467 	 */
468 	if_free_sadl(ifp);
469 
470 	namelen = strlen(ifp->if_xname);
471 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
472 	socksize = masklen + ifp->if_addrlen;
473 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
474 	if (socksize < sizeof(*sdl))
475 		socksize = sizeof(*sdl);
476 	socksize = ROUNDUP(socksize);
477 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
478 	sdl->sdl_len = socksize;
479 	sdl->sdl_family = AF_LINK;
480 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
481 	sdl->sdl_nlen = namelen;
482 	sdl->sdl_alen = ifp->if_addrlen;
483 	sdl->sdl_index = ifp->if_index;
484 	sdl->sdl_type = ifp->if_type;
485 	ifp->if_sadl = sdl;
486 }
487 
488 /*
489  * Free the link level name for the specified interface.  This is
490  * a detach helper.  This is called from if_detach() or from
491  * link layer type specific detach functions.
492  */
493 void
494 if_free_sadl(struct ifnet *ifp)
495 {
496 	if (ifp->if_sadl == NULL)
497 		return;
498 
499 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
500 	ifp->if_sadl = NULL;
501 }
502 
503 void
504 if_attachdomain(struct ifnet *ifp)
505 {
506 	struct domain *dp;
507 	int i, s;
508 
509 	s = splnet();
510 
511 	/* address family dependent data region */
512 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
513 	for (i = 0; (dp = domains[i]) != NULL; i++) {
514 		if (dp->dom_ifattach)
515 			ifp->if_afdata[dp->dom_family] =
516 			    (*dp->dom_ifattach)(ifp);
517 	}
518 
519 	splx(s);
520 }
521 
522 void
523 if_attachhead(struct ifnet *ifp)
524 {
525 	if_attach_common(ifp);
526 	KERNEL_ASSERT_LOCKED();
527 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
528 	NET_LOCK();
529 	if_attachsetup(ifp);
530 	NET_UNLOCK();
531 }
532 
533 void
534 if_attach(struct ifnet *ifp)
535 {
536 	if_attach_common(ifp);
537 	KERNEL_ASSERT_LOCKED();
538 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
539 	NET_LOCK();
540 	if_attachsetup(ifp);
541 	NET_UNLOCK();
542 }
543 
544 void
545 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
546 {
547 	struct ifqueue **map;
548 	struct ifqueue *ifq;
549 	int i;
550 
551 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
552 	KASSERT(nqs != 0);
553 
554 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
555 
556 	ifp->if_snd.ifq_softc = NULL;
557 	map[0] = &ifp->if_snd;
558 
559 	for (i = 1; i < nqs; i++) {
560 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
561 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
562 		ifq_init(ifq, ifp, i);
563 		map[i] = ifq;
564 	}
565 
566 	ifp->if_ifqs = map;
567 	ifp->if_nifqs = nqs;
568 }
569 
570 void
571 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
572 {
573 	struct ifiqueue **map;
574 	struct ifiqueue *ifiq;
575 	unsigned int i;
576 
577 	KASSERT(niqs != 0);
578 
579 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
580 
581 	ifp->if_rcv.ifiq_softc = NULL;
582 	map[0] = &ifp->if_rcv;
583 
584 	for (i = 1; i < niqs; i++) {
585 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
586 		ifiq_init(ifiq, ifp, i);
587 		map[i] = ifiq;
588 	}
589 
590 	ifp->if_iqs = map;
591 	ifp->if_niqs = niqs;
592 }
593 
594 void
595 if_attach_common(struct ifnet *ifp)
596 {
597 	KASSERT(ifp->if_ioctl != NULL);
598 
599 	TAILQ_INIT(&ifp->if_addrlist);
600 	TAILQ_INIT(&ifp->if_maddrlist);
601 
602 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
603 		KASSERTMSG(ifp->if_qstart == NULL,
604 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
605 		ifp->if_qstart = if_qstart_compat;
606 	} else {
607 		KASSERTMSG(ifp->if_start == NULL,
608 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
609 		KASSERTMSG(ifp->if_qstart != NULL,
610 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
611 	}
612 
613 	ifq_init(&ifp->if_snd, ifp, 0);
614 
615 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
616 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
617 	ifp->if_nifqs = 1;
618 	if (ifp->if_txmit == 0)
619 		ifp->if_txmit = IF_TXMIT_DEFAULT;
620 
621 	ifiq_init(&ifp->if_rcv, ifp, 0);
622 
623 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
624 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
625 	ifp->if_niqs = 1;
626 
627 	TAILQ_INIT(&ifp->if_addrhooks);
628 	TAILQ_INIT(&ifp->if_linkstatehooks);
629 	TAILQ_INIT(&ifp->if_detachhooks);
630 
631 	if (ifp->if_rtrequest == NULL)
632 		ifp->if_rtrequest = if_rtrequest_dummy;
633 	if (ifp->if_enqueue == NULL)
634 		ifp->if_enqueue = if_enqueue_ifq;
635 	ifp->if_llprio = IFQ_DEFPRIO;
636 }
637 
638 void
639 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
640 {
641 	/*
642 	 * only switch the ifq_ops on the first ifq on an interface.
643 	 *
644 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
645 	 * works on a single ifq. because the code uses the ifq_ops
646 	 * on the first ifq (if_snd) to select a queue for an mbuf,
647 	 * by switching only the first one we change both the algorithm
648 	 * and force the routing of all new packets to it.
649 	 */
650 	ifq_attach(&ifp->if_snd, newops, args);
651 }
652 
653 void
654 if_start(struct ifnet *ifp)
655 {
656 	KASSERT(ifp->if_qstart == if_qstart_compat);
657 	if_qstart_compat(&ifp->if_snd);
658 }
659 void
660 if_qstart_compat(struct ifqueue *ifq)
661 {
662 	struct ifnet *ifp = ifq->ifq_if;
663 	int s;
664 
665 	/*
666 	 * the stack assumes that an interface can have multiple
667 	 * transmit rings, but a lot of drivers are still written
668 	 * so that interfaces and send rings have a 1:1 mapping.
669 	 * this provides compatability between the stack and the older
670 	 * drivers by translating from the only queue they have
671 	 * (ifp->if_snd) back to the interface and calling if_start.
672 	 */
673 
674 	KERNEL_LOCK();
675 	s = splnet();
676 	(*ifp->if_start)(ifp);
677 	splx(s);
678 	KERNEL_UNLOCK();
679 }
680 
681 int
682 if_enqueue(struct ifnet *ifp, struct mbuf *m)
683 {
684 #if NPF > 0
685 	if (m->m_pkthdr.pf.delay > 0)
686 		return (pf_delay_pkt(m, ifp->if_index));
687 #endif
688 
689 #if NBRIDGE > 0
690 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
691 		int error;
692 
693 		error = bridge_enqueue(ifp, m);
694 		return (error);
695 	}
696 #endif
697 
698 #if NPF > 0
699 	pf_pkt_addr_changed(m);
700 #endif	/* NPF > 0 */
701 
702 	return ((*ifp->if_enqueue)(ifp, m));
703 }
704 
705 int
706 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
707 {
708 	struct ifqueue *ifq = &ifp->if_snd;
709 	int error;
710 
711 	if (ifp->if_nifqs > 1) {
712 		unsigned int idx;
713 
714 		/*
715 		 * use the operations on the first ifq to pick which of
716 		 * the array gets this mbuf.
717 		 */
718 
719 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
720 		ifq = ifp->if_ifqs[idx];
721 	}
722 
723 	error = ifq_enqueue(ifq, m);
724 	if (error)
725 		return (error);
726 
727 	ifq_start(ifq);
728 
729 	return (0);
730 }
731 
732 void
733 if_input(struct ifnet *ifp, struct mbuf_list *ml)
734 {
735 	ifiq_input(&ifp->if_rcv, ml);
736 }
737 
738 int
739 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
740 {
741 #if NBPFILTER > 0
742 	/*
743 	 * Only send packets to bpf if they are destinated to local
744 	 * addresses.
745 	 *
746 	 * if_input_local() is also called for SIMPLEX interfaces to
747 	 * duplicate packets for local use.  But don't dup them to bpf.
748 	 */
749 	if (ifp->if_flags & IFF_LOOPBACK) {
750 		caddr_t if_bpf = ifp->if_bpf;
751 
752 		if (if_bpf)
753 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
754 	}
755 #endif
756 	m_resethdr(m);
757 	m->m_flags |= M_LOOP;
758 	m->m_pkthdr.ph_ifidx = ifp->if_index;
759 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
760 
761 	ifp->if_opackets++;
762 	ifp->if_obytes += m->m_pkthdr.len;
763 
764 	ifp->if_ipackets++;
765 	ifp->if_ibytes += m->m_pkthdr.len;
766 
767 	switch (af) {
768 	case AF_INET:
769 		ipv4_input(ifp, m);
770 		break;
771 #ifdef INET6
772 	case AF_INET6:
773 		ipv6_input(ifp, m);
774 		break;
775 #endif /* INET6 */
776 #ifdef MPLS
777 	case AF_MPLS:
778 		mpls_input(ifp, m);
779 		break;
780 #endif /* MPLS */
781 	default:
782 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
783 		m_freem(m);
784 		return (EAFNOSUPPORT);
785 	}
786 
787 	return (0);
788 }
789 
790 int
791 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
792 {
793 	struct ifiqueue *ifiq;
794 	unsigned int flow = 0;
795 
796 	m->m_pkthdr.ph_family = af;
797 	m->m_pkthdr.ph_ifidx = ifp->if_index;
798 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
799 
800 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
801 		flow = m->m_pkthdr.ph_flowid;
802 
803 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
804 
805 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
806 }
807 
808 void
809 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
810 {
811 	struct mbuf *m;
812 
813 	if (ml_empty(ml))
814 		return;
815 
816 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
817 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
818 
819 	/*
820 	 * We grab the NET_LOCK() before processing any packet to
821 	 * ensure there's no contention on the routing table lock.
822 	 *
823 	 * Without it we could race with a userland thread to insert
824 	 * a L2 entry in ip{6,}_output().  Such race would result in
825 	 * one of the threads sleeping *inside* the IP output path.
826 	 *
827 	 * Since we have a NET_LOCK() we also use it to serialize access
828 	 * to PF globals, pipex globals, unicast and multicast addresses
829 	 * lists and the socket layer.
830 	 */
831 	NET_LOCK();
832 	while ((m = ml_dequeue(ml)) != NULL)
833 		(*ifp->if_input)(ifp, m);
834 	NET_UNLOCK();
835 }
836 
837 void
838 if_vinput(struct ifnet *ifp, struct mbuf *m)
839 {
840 #if NBPFILTER > 0
841 	caddr_t if_bpf;
842 #endif
843 
844 	m->m_pkthdr.ph_ifidx = ifp->if_index;
845 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
846 
847 	counters_pkt(ifp->if_counters,
848 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
849 
850 #if NBPFILTER > 0
851 	if_bpf = ifp->if_bpf;
852 	if (if_bpf) {
853 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
854 			m_freem(m);
855 			return;
856 		}
857 	}
858 #endif
859 
860 	(*ifp->if_input)(ifp, m);
861 }
862 
863 void
864 if_netisr(void *unused)
865 {
866 	int n, t = 0;
867 
868 	NET_LOCK();
869 
870 	while ((n = netisr) != 0) {
871 		/* Like sched_pause() but with a rwlock dance. */
872 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
873 			NET_UNLOCK();
874 			yield();
875 			NET_LOCK();
876 		}
877 
878 		atomic_clearbits_int(&netisr, n);
879 
880 #if NETHER > 0
881 		if (n & (1 << NETISR_ARP)) {
882 			KERNEL_LOCK();
883 			arpintr();
884 			KERNEL_UNLOCK();
885 		}
886 #endif
887 #if NPPP > 0
888 		if (n & (1 << NETISR_PPP)) {
889 			KERNEL_LOCK();
890 			pppintr();
891 			KERNEL_UNLOCK();
892 		}
893 #endif
894 #if NBRIDGE > 0
895 		if (n & (1 << NETISR_BRIDGE))
896 			bridgeintr();
897 #endif
898 #if NSWITCH > 0
899 		if (n & (1 << NETISR_SWITCH)) {
900 			KERNEL_LOCK();
901 			switchintr();
902 			KERNEL_UNLOCK();
903 		}
904 #endif
905 #if NPPPOE > 0
906 		if (n & (1 << NETISR_PPPOE)) {
907 			KERNEL_LOCK();
908 			pppoeintr();
909 			KERNEL_UNLOCK();
910 		}
911 #endif
912 #ifdef PIPEX
913 		if (n & (1 << NETISR_PIPEX)) {
914 			KERNEL_LOCK();
915 			pipexintr();
916 			KERNEL_UNLOCK();
917 		}
918 #endif
919 		t |= n;
920 	}
921 
922 #if NPFSYNC > 0
923 	if (t & (1 << NETISR_PFSYNC)) {
924 		KERNEL_LOCK();
925 		pfsyncintr();
926 		KERNEL_UNLOCK();
927 	}
928 #endif
929 
930 	NET_UNLOCK();
931 }
932 
933 void
934 if_hooks_run(struct task_list *hooks)
935 {
936 	struct task *t, *nt;
937 	struct task cursor = { .t_func = NULL };
938 	void (*func)(void *);
939 	void *arg;
940 
941 	mtx_enter(&if_hooks_mtx);
942 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
943 		if (t->t_func == NULL) { /* skip cursors */
944 			nt = TAILQ_NEXT(t, t_entry);
945 			continue;
946 		}
947 		func = t->t_func;
948 		arg = t->t_arg;
949 
950 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
951 		mtx_leave(&if_hooks_mtx);
952 
953 		(*func)(arg);
954 
955 		mtx_enter(&if_hooks_mtx);
956 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
957 		TAILQ_REMOVE(hooks, &cursor, t_entry);
958 	}
959 	mtx_leave(&if_hooks_mtx);
960 }
961 
962 void
963 if_deactivate(struct ifnet *ifp)
964 {
965 	/*
966 	 * Call detach hooks from head to tail.  To make sure detach
967 	 * hooks are executed in the reverse order they were added, all
968 	 * the hooks have to be added to the head!
969 	 */
970 
971 	NET_LOCK();
972 	if_hooks_run(&ifp->if_detachhooks);
973 	NET_UNLOCK();
974 }
975 
976 void
977 if_detachhook_add(struct ifnet *ifp, struct task *t)
978 {
979 	mtx_enter(&if_hooks_mtx);
980 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
981 	mtx_leave(&if_hooks_mtx);
982 }
983 
984 void
985 if_detachhook_del(struct ifnet *ifp, struct task *t)
986 {
987 	mtx_enter(&if_hooks_mtx);
988 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
989 	mtx_leave(&if_hooks_mtx);
990 }
991 
992 /*
993  * Detach an interface from everything in the kernel.  Also deallocate
994  * private resources.
995  */
996 void
997 if_detach(struct ifnet *ifp)
998 {
999 	struct ifaddr *ifa;
1000 	struct ifg_list *ifg;
1001 	struct domain *dp;
1002 	int i, s;
1003 
1004 	/* Undo pseudo-driver changes. */
1005 	if_deactivate(ifp);
1006 
1007 	ifq_clr_oactive(&ifp->if_snd);
1008 
1009 	/* Other CPUs must not have a reference before we start destroying. */
1010 	if_idxmap_remove(ifp);
1011 
1012 #if NBPFILTER > 0
1013 	bpfdetach(ifp);
1014 #endif
1015 
1016 	NET_LOCK();
1017 	s = splnet();
1018 	ifp->if_qstart = if_detached_qstart;
1019 	ifp->if_ioctl = if_detached_ioctl;
1020 	ifp->if_watchdog = NULL;
1021 
1022 	/* Remove the watchdog timeout & task */
1023 	timeout_del(&ifp->if_slowtimo);
1024 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1025 
1026 	/* Remove the link state task */
1027 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1028 
1029 	rti_delete(ifp);
1030 #if NETHER > 0 && defined(NFSCLIENT)
1031 	if (ifp->if_index == revarp_ifidx)
1032 		revarp_ifidx = 0;
1033 #endif
1034 #ifdef MROUTING
1035 	vif_delete(ifp);
1036 #endif
1037 	in_ifdetach(ifp);
1038 #ifdef INET6
1039 	in6_ifdetach(ifp);
1040 #endif
1041 #if NPF > 0
1042 	pfi_detach_ifnet(ifp);
1043 #endif
1044 
1045 	KERNEL_ASSERT_LOCKED();
1046 	/* Remove the interface from the list of all interfaces.  */
1047 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1048 
1049 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1050 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1051 
1052 	if_free_sadl(ifp);
1053 
1054 	/* We should not have any address left at this point. */
1055 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1056 #ifdef DIAGNOSTIC
1057 		printf("%s: address list non empty\n", ifp->if_xname);
1058 #endif
1059 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1060 			ifa_del(ifp, ifa);
1061 			ifa->ifa_ifp = NULL;
1062 			ifafree(ifa);
1063 		}
1064 	}
1065 
1066 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1067 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1068 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1069 
1070 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1071 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1072 			(*dp->dom_ifdetach)(ifp,
1073 			    ifp->if_afdata[dp->dom_family]);
1074 	}
1075 
1076 	/* Announce that the interface is gone. */
1077 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1078 	splx(s);
1079 	NET_UNLOCK();
1080 
1081 	if (ifp->if_counters != NULL)
1082 		if_counters_free(ifp);
1083 
1084 	for (i = 0; i < ifp->if_nifqs; i++)
1085 		ifq_destroy(ifp->if_ifqs[i]);
1086 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1087 		for (i = 1; i < ifp->if_nifqs; i++) {
1088 			free(ifp->if_ifqs[i], M_DEVBUF,
1089 			    sizeof(struct ifqueue));
1090 		}
1091 		free(ifp->if_ifqs, M_DEVBUF,
1092 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1093 	}
1094 
1095 	for (i = 0; i < ifp->if_niqs; i++)
1096 		ifiq_destroy(ifp->if_iqs[i]);
1097 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1098 		for (i = 1; i < ifp->if_niqs; i++) {
1099 			free(ifp->if_iqs[i], M_DEVBUF,
1100 			    sizeof(struct ifiqueue));
1101 		}
1102 		free(ifp->if_iqs, M_DEVBUF,
1103 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1104 	}
1105 }
1106 
1107 /*
1108  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1109  */
1110 int
1111 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1112 {
1113 	struct ifnet *ifp;
1114 	int connected = 0;
1115 
1116 	ifp = if_get(ifidx);
1117 	if (ifp == NULL)
1118 		return (0);
1119 
1120 	if (ifp0->if_index == ifp->if_index)
1121 		connected = 1;
1122 
1123 #if NBRIDGE > 0
1124 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1125 		connected = 1;
1126 #endif
1127 #if NCARP > 0
1128 	if ((ifp0->if_type == IFT_CARP &&
1129 	    ifp0->if_carpdevidx == ifp->if_index) ||
1130 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1131 		connected = 1;
1132 #endif
1133 
1134 	if_put(ifp);
1135 	return (connected);
1136 }
1137 
1138 /*
1139  * Create a clone network interface.
1140  */
1141 int
1142 if_clone_create(const char *name, int rdomain)
1143 {
1144 	struct if_clone *ifc;
1145 	struct ifnet *ifp;
1146 	int unit, ret;
1147 
1148 	ifc = if_clone_lookup(name, &unit);
1149 	if (ifc == NULL)
1150 		return (EINVAL);
1151 
1152 	if (ifunit(name) != NULL)
1153 		return (EEXIST);
1154 
1155 	ret = (*ifc->ifc_create)(ifc, unit);
1156 
1157 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1158 		return (ret);
1159 
1160 	NET_LOCK();
1161 	if_addgroup(ifp, ifc->ifc_name);
1162 	if (rdomain != 0)
1163 		if_setrdomain(ifp, rdomain);
1164 	NET_UNLOCK();
1165 
1166 	return (ret);
1167 }
1168 
1169 /*
1170  * Destroy a clone network interface.
1171  */
1172 int
1173 if_clone_destroy(const char *name)
1174 {
1175 	struct if_clone *ifc;
1176 	struct ifnet *ifp;
1177 	int ret;
1178 
1179 	ifc = if_clone_lookup(name, NULL);
1180 	if (ifc == NULL)
1181 		return (EINVAL);
1182 
1183 	if (ifc->ifc_destroy == NULL)
1184 		return (EOPNOTSUPP);
1185 
1186 	ifp = ifunit(name);
1187 	if (ifp == NULL)
1188 		return (ENXIO);
1189 
1190 	NET_LOCK();
1191 	if (ifp->if_flags & IFF_UP) {
1192 		int s;
1193 		s = splnet();
1194 		if_down(ifp);
1195 		splx(s);
1196 	}
1197 	NET_UNLOCK();
1198 	ret = (*ifc->ifc_destroy)(ifp);
1199 
1200 	return (ret);
1201 }
1202 
1203 /*
1204  * Look up a network interface cloner.
1205  */
1206 struct if_clone *
1207 if_clone_lookup(const char *name, int *unitp)
1208 {
1209 	struct if_clone *ifc;
1210 	const char *cp;
1211 	int unit;
1212 
1213 	/* separate interface name from unit */
1214 	for (cp = name;
1215 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1216 	    cp++)
1217 		continue;
1218 
1219 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1220 		return (NULL);	/* No name or unit number */
1221 
1222 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1223 		return (NULL);	/* unit number 0 padded */
1224 
1225 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1226 		if (strlen(ifc->ifc_name) == cp - name &&
1227 		    !strncmp(name, ifc->ifc_name, cp - name))
1228 			break;
1229 	}
1230 
1231 	if (ifc == NULL)
1232 		return (NULL);
1233 
1234 	unit = 0;
1235 	while (cp - name < IFNAMSIZ && *cp) {
1236 		if (*cp < '0' || *cp > '9' ||
1237 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1238 			/* Bogus unit number. */
1239 			return (NULL);
1240 		}
1241 		unit = (unit * 10) + (*cp++ - '0');
1242 	}
1243 
1244 	if (unitp != NULL)
1245 		*unitp = unit;
1246 	return (ifc);
1247 }
1248 
1249 /*
1250  * Register a network interface cloner.
1251  */
1252 void
1253 if_clone_attach(struct if_clone *ifc)
1254 {
1255 	/*
1256 	 * we are called at kernel boot by main(), when pseudo devices are
1257 	 * being attached. The main() is the only guy which may alter the
1258 	 * if_cloners. While system is running and main() is done with
1259 	 * initialization, the if_cloners becomes immutable.
1260 	 */
1261 	KASSERT(pdevinit_done == 0);
1262 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1263 	if_cloners_count++;
1264 }
1265 
1266 /*
1267  * Provide list of interface cloners to userspace.
1268  */
1269 int
1270 if_clone_list(struct if_clonereq *ifcr)
1271 {
1272 	char outbuf[IFNAMSIZ], *dst;
1273 	struct if_clone *ifc;
1274 	int count, error = 0;
1275 
1276 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1277 		/* Just asking how many there are. */
1278 		ifcr->ifcr_total = if_cloners_count;
1279 		return (0);
1280 	}
1281 
1282 	if (ifcr->ifcr_count < 0)
1283 		return (EINVAL);
1284 
1285 	ifcr->ifcr_total = if_cloners_count;
1286 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1287 
1288 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1289 		if (count == 0)
1290 			break;
1291 		bzero(outbuf, sizeof outbuf);
1292 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1293 		error = copyout(outbuf, dst, IFNAMSIZ);
1294 		if (error)
1295 			break;
1296 		count--;
1297 		dst += IFNAMSIZ;
1298 	}
1299 
1300 	return (error);
1301 }
1302 
1303 /*
1304  * set queue congestion marker
1305  */
1306 void
1307 if_congestion(void)
1308 {
1309 	extern int ticks;
1310 
1311 	ifq_congestion = ticks;
1312 }
1313 
1314 int
1315 if_congested(void)
1316 {
1317 	extern int ticks;
1318 	int diff;
1319 
1320 	diff = ticks - ifq_congestion;
1321 	if (diff < 0) {
1322 		ifq_congestion = ticks - hz;
1323 		return (0);
1324 	}
1325 
1326 	return (diff <= (hz / 100));
1327 }
1328 
1329 #define	equal(a1, a2)	\
1330 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1331 	(a1)->sa_len) == 0)
1332 
1333 /*
1334  * Locate an interface based on a complete address.
1335  */
1336 struct ifaddr *
1337 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1338 {
1339 	struct ifnet *ifp;
1340 	struct ifaddr *ifa;
1341 	u_int rdomain;
1342 
1343 	rdomain = rtable_l2(rtableid);
1344 	KERNEL_LOCK();
1345 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1346 		if (ifp->if_rdomain != rdomain)
1347 			continue;
1348 
1349 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1350 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1351 				continue;
1352 
1353 			if (equal(addr, ifa->ifa_addr)) {
1354 				KERNEL_UNLOCK();
1355 				return (ifa);
1356 			}
1357 		}
1358 	}
1359 	KERNEL_UNLOCK();
1360 	return (NULL);
1361 }
1362 
1363 /*
1364  * Locate the point to point interface with a given destination address.
1365  */
1366 struct ifaddr *
1367 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1368 {
1369 	struct ifnet *ifp;
1370 	struct ifaddr *ifa;
1371 
1372 	rdomain = rtable_l2(rdomain);
1373 	KERNEL_LOCK();
1374 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1375 		if (ifp->if_rdomain != rdomain)
1376 			continue;
1377 		if (ifp->if_flags & IFF_POINTOPOINT) {
1378 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1379 				if (ifa->ifa_addr->sa_family !=
1380 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1381 					continue;
1382 				if (equal(addr, ifa->ifa_dstaddr)) {
1383 					KERNEL_UNLOCK();
1384 					return (ifa);
1385 				}
1386 			}
1387 		}
1388 	}
1389 	KERNEL_UNLOCK();
1390 	return (NULL);
1391 }
1392 
1393 /*
1394  * Find an interface address specific to an interface best matching
1395  * a given address.
1396  */
1397 struct ifaddr *
1398 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1399 {
1400 	struct ifaddr *ifa;
1401 	char *cp, *cp2, *cp3;
1402 	char *cplim;
1403 	struct ifaddr *ifa_maybe = NULL;
1404 	u_int af = addr->sa_family;
1405 
1406 	if (af >= AF_MAX)
1407 		return (NULL);
1408 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1409 		if (ifa->ifa_addr->sa_family != af)
1410 			continue;
1411 		if (ifa_maybe == NULL)
1412 			ifa_maybe = ifa;
1413 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1414 			if (equal(addr, ifa->ifa_addr) ||
1415 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1416 				return (ifa);
1417 			continue;
1418 		}
1419 		cp = addr->sa_data;
1420 		cp2 = ifa->ifa_addr->sa_data;
1421 		cp3 = ifa->ifa_netmask->sa_data;
1422 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1423 		for (; cp3 < cplim; cp3++)
1424 			if ((*cp++ ^ *cp2++) & *cp3)
1425 				break;
1426 		if (cp3 == cplim)
1427 			return (ifa);
1428 	}
1429 	return (ifa_maybe);
1430 }
1431 
1432 void
1433 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1434 {
1435 }
1436 
1437 /*
1438  * Default action when installing a local route on a point-to-point
1439  * interface.
1440  */
1441 void
1442 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1443 {
1444 	struct ifnet *lo0ifp;
1445 	struct ifaddr *ifa, *lo0ifa;
1446 
1447 	switch (req) {
1448 	case RTM_ADD:
1449 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1450 			break;
1451 
1452 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1453 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1454 			    rt_key(rt)->sa_len) == 0)
1455 				break;
1456 		}
1457 
1458 		if (ifa == NULL)
1459 			break;
1460 
1461 		KASSERT(ifa == rt->rt_ifa);
1462 
1463 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1464 		KASSERT(lo0ifp != NULL);
1465 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1466 			if (lo0ifa->ifa_addr->sa_family ==
1467 			    ifa->ifa_addr->sa_family)
1468 				break;
1469 		}
1470 		if_put(lo0ifp);
1471 
1472 		if (lo0ifa == NULL)
1473 			break;
1474 
1475 		rt->rt_flags &= ~RTF_LLINFO;
1476 		break;
1477 	case RTM_DELETE:
1478 	case RTM_RESOLVE:
1479 	default:
1480 		break;
1481 	}
1482 }
1483 
1484 
1485 /*
1486  * Bring down all interfaces
1487  */
1488 void
1489 if_downall(void)
1490 {
1491 	struct ifreq ifrq;	/* XXX only partly built */
1492 	struct ifnet *ifp;
1493 
1494 	NET_LOCK();
1495 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1496 		if ((ifp->if_flags & IFF_UP) == 0)
1497 			continue;
1498 		if_down(ifp);
1499 		ifrq.ifr_flags = ifp->if_flags;
1500 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1501 	}
1502 	NET_UNLOCK();
1503 }
1504 
1505 /*
1506  * Mark an interface down and notify protocols of
1507  * the transition.
1508  */
1509 void
1510 if_down(struct ifnet *ifp)
1511 {
1512 	NET_ASSERT_LOCKED();
1513 
1514 	ifp->if_flags &= ~IFF_UP;
1515 	getmicrotime(&ifp->if_lastchange);
1516 	ifq_purge(&ifp->if_snd);
1517 
1518 	if_linkstate(ifp);
1519 }
1520 
1521 /*
1522  * Mark an interface up and notify protocols of
1523  * the transition.
1524  */
1525 void
1526 if_up(struct ifnet *ifp)
1527 {
1528 	NET_ASSERT_LOCKED();
1529 
1530 	ifp->if_flags |= IFF_UP;
1531 	getmicrotime(&ifp->if_lastchange);
1532 
1533 #ifdef INET6
1534 	/* Userland expects the kernel to set ::1 on default lo(4). */
1535 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1536 		in6_ifattach(ifp);
1537 #endif
1538 
1539 	if_linkstate(ifp);
1540 }
1541 
1542 /*
1543  * Notify userland, the routing table and hooks owner of
1544  * a link-state transition.
1545  */
1546 void
1547 if_linkstate_task(void *xifidx)
1548 {
1549 	unsigned int ifidx = (unsigned long)xifidx;
1550 	struct ifnet *ifp;
1551 
1552 	KERNEL_LOCK();
1553 	NET_LOCK();
1554 
1555 	ifp = if_get(ifidx);
1556 	if (ifp != NULL)
1557 		if_linkstate(ifp);
1558 	if_put(ifp);
1559 
1560 	NET_UNLOCK();
1561 	KERNEL_UNLOCK();
1562 }
1563 
1564 void
1565 if_linkstate(struct ifnet *ifp)
1566 {
1567 	NET_ASSERT_LOCKED();
1568 
1569 	rtm_ifchg(ifp);
1570 	rt_if_track(ifp);
1571 
1572 	if_hooks_run(&ifp->if_linkstatehooks);
1573 }
1574 
1575 void
1576 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1577 {
1578 	mtx_enter(&if_hooks_mtx);
1579 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1580 	mtx_leave(&if_hooks_mtx);
1581 }
1582 
1583 void
1584 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1585 {
1586 	mtx_enter(&if_hooks_mtx);
1587 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1588 	mtx_leave(&if_hooks_mtx);
1589 }
1590 
1591 /*
1592  * Schedule a link state change task.
1593  */
1594 void
1595 if_link_state_change(struct ifnet *ifp)
1596 {
1597 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1598 }
1599 
1600 /*
1601  * Handle interface watchdog timer routine.  Called
1602  * from softclock, we decrement timer (if set) and
1603  * call the appropriate interface routine on expiration.
1604  */
1605 void
1606 if_slowtimo(void *arg)
1607 {
1608 	struct ifnet *ifp = arg;
1609 	int s = splnet();
1610 
1611 	if (ifp->if_watchdog) {
1612 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1613 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1614 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1615 	}
1616 	splx(s);
1617 }
1618 
1619 void
1620 if_watchdog_task(void *xifidx)
1621 {
1622 	unsigned int ifidx = (unsigned long)xifidx;
1623 	struct ifnet *ifp;
1624 	int s;
1625 
1626 	ifp = if_get(ifidx);
1627 	if (ifp == NULL)
1628 		return;
1629 
1630 	KERNEL_LOCK();
1631 	s = splnet();
1632 	if (ifp->if_watchdog)
1633 		(*ifp->if_watchdog)(ifp);
1634 	splx(s);
1635 	KERNEL_UNLOCK();
1636 
1637 	if_put(ifp);
1638 }
1639 
1640 /*
1641  * Map interface name to interface structure pointer.
1642  */
1643 struct ifnet *
1644 ifunit(const char *name)
1645 {
1646 	struct ifnet *ifp;
1647 
1648 	KERNEL_ASSERT_LOCKED();
1649 
1650 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1651 		if (strcmp(ifp->if_xname, name) == 0)
1652 			return (ifp);
1653 	}
1654 	return (NULL);
1655 }
1656 
1657 /*
1658  * Map interface index to interface structure pointer.
1659  */
1660 struct ifnet *
1661 if_get(unsigned int index)
1662 {
1663 	struct srp_ref sr;
1664 	struct if_map *if_map;
1665 	struct srp *map;
1666 	struct ifnet *ifp = NULL;
1667 
1668 	if_map = srp_enter(&sr, &if_idxmap.map);
1669 	if (index < if_map->limit) {
1670 		map = (struct srp *)(if_map + 1);
1671 
1672 		ifp = srp_follow(&sr, &map[index]);
1673 		if (ifp != NULL) {
1674 			KASSERT(ifp->if_index == index);
1675 			if_ref(ifp);
1676 		}
1677 	}
1678 	srp_leave(&sr);
1679 
1680 	return (ifp);
1681 }
1682 
1683 struct ifnet *
1684 if_ref(struct ifnet *ifp)
1685 {
1686 	refcnt_take(&ifp->if_refcnt);
1687 
1688 	return (ifp);
1689 }
1690 
1691 void
1692 if_put(struct ifnet *ifp)
1693 {
1694 	if (ifp == NULL)
1695 		return;
1696 
1697 	refcnt_rele_wake(&ifp->if_refcnt);
1698 }
1699 
1700 int
1701 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1702 {
1703 	if (ifp->if_sadl == NULL)
1704 		return (EINVAL);
1705 
1706 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1707 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1708 
1709 	return (0);
1710 }
1711 
1712 int
1713 if_createrdomain(int rdomain, struct ifnet *ifp)
1714 {
1715 	int error;
1716 	struct ifnet *loifp;
1717 	char loifname[IFNAMSIZ];
1718 	unsigned int unit = rdomain;
1719 
1720 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1721 		return (error);
1722 	if (!rtable_empty(rdomain))
1723 		return (EEXIST);
1724 
1725 	/* Create rdomain including its loopback if with unit == rdomain */
1726 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1727 	error = if_clone_create(loifname, 0);
1728 	if ((loifp = ifunit(loifname)) == NULL)
1729 		return (ENXIO);
1730 	if (error && (ifp != loifp || error != EEXIST))
1731 		return (error);
1732 
1733 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1734 	loifp->if_rdomain = rdomain;
1735 
1736 	return (0);
1737 }
1738 
1739 int
1740 if_setrdomain(struct ifnet *ifp, int rdomain)
1741 {
1742 	struct ifreq ifr;
1743 	int error, up = 0, s;
1744 
1745 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1746 		return (EINVAL);
1747 
1748 	if (rdomain != ifp->if_rdomain &&
1749 	    (ifp->if_flags & IFF_LOOPBACK) &&
1750 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1751 		return (EPERM);
1752 
1753 	if (!rtable_exists(rdomain))
1754 		return (ESRCH);
1755 
1756 	/* make sure that the routing table is a real rdomain */
1757 	if (rdomain != rtable_l2(rdomain))
1758 		return (EINVAL);
1759 
1760 	if (rdomain != ifp->if_rdomain) {
1761 		s = splnet();
1762 		/*
1763 		 * We are tearing down the world.
1764 		 * Take down the IF so:
1765 		 * 1. everything that cares gets a message
1766 		 * 2. the automagic IPv6 bits are recreated
1767 		 */
1768 		if (ifp->if_flags & IFF_UP) {
1769 			up = 1;
1770 			if_down(ifp);
1771 		}
1772 		rti_delete(ifp);
1773 #ifdef MROUTING
1774 		vif_delete(ifp);
1775 #endif
1776 		in_ifdetach(ifp);
1777 #ifdef INET6
1778 		in6_ifdetach(ifp);
1779 #endif
1780 		splx(s);
1781 	}
1782 
1783 	/* Let devices like enc(4) or mpe(4) know about the change */
1784 	ifr.ifr_rdomainid = rdomain;
1785 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1786 	    (caddr_t)&ifr)) != ENOTTY)
1787 		return (error);
1788 	error = 0;
1789 
1790 	/* Add interface to the specified rdomain */
1791 	ifp->if_rdomain = rdomain;
1792 
1793 	/* If we took down the IF, bring it back */
1794 	if (up) {
1795 		s = splnet();
1796 		if_up(ifp);
1797 		splx(s);
1798 	}
1799 
1800 	return (0);
1801 }
1802 
1803 /*
1804  * Interface ioctls.
1805  */
1806 int
1807 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1808 {
1809 	struct ifnet *ifp;
1810 	struct ifreq *ifr = (struct ifreq *)data;
1811 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1812 	struct if_afreq *ifar = (struct if_afreq *)data;
1813 	char ifdescrbuf[IFDESCRSIZE];
1814 	char ifrtlabelbuf[RTLABEL_LEN];
1815 	int s, error = 0, oif_xflags;
1816 	size_t bytesdone;
1817 	unsigned short oif_flags;
1818 
1819 	switch (cmd) {
1820 	case SIOCIFCREATE:
1821 		if ((error = suser(p)) != 0)
1822 			return (error);
1823 		error = if_clone_create(ifr->ifr_name, 0);
1824 		return (error);
1825 	case SIOCIFDESTROY:
1826 		if ((error = suser(p)) != 0)
1827 			return (error);
1828 		error = if_clone_destroy(ifr->ifr_name);
1829 		return (error);
1830 	case SIOCSIFGATTR:
1831 		if ((error = suser(p)) != 0)
1832 			return (error);
1833 		NET_LOCK();
1834 		error = if_setgroupattribs(data);
1835 		NET_UNLOCK();
1836 		return (error);
1837 	case SIOCGIFCONF:
1838 	case SIOCIFGCLONERS:
1839 	case SIOCGIFGMEMB:
1840 	case SIOCGIFGATTR:
1841 	case SIOCGIFGLIST:
1842 	case SIOCGIFFLAGS:
1843 	case SIOCGIFXFLAGS:
1844 	case SIOCGIFMETRIC:
1845 	case SIOCGIFMTU:
1846 	case SIOCGIFHARDMTU:
1847 	case SIOCGIFDATA:
1848 	case SIOCGIFDESCR:
1849 	case SIOCGIFRTLABEL:
1850 	case SIOCGIFPRIORITY:
1851 	case SIOCGIFRDOMAIN:
1852 	case SIOCGIFGROUP:
1853 	case SIOCGIFLLPRIO:
1854 		return (ifioctl_get(cmd, data));
1855 	}
1856 
1857 	ifp = ifunit(ifr->ifr_name);
1858 	if (ifp == NULL)
1859 		return (ENXIO);
1860 	oif_flags = ifp->if_flags;
1861 	oif_xflags = ifp->if_xflags;
1862 
1863 	switch (cmd) {
1864 	case SIOCIFAFATTACH:
1865 	case SIOCIFAFDETACH:
1866 		if ((error = suser(p)) != 0)
1867 			break;
1868 		NET_LOCK();
1869 		switch (ifar->ifar_af) {
1870 		case AF_INET:
1871 			/* attach is a noop for AF_INET */
1872 			if (cmd == SIOCIFAFDETACH)
1873 				in_ifdetach(ifp);
1874 			break;
1875 #ifdef INET6
1876 		case AF_INET6:
1877 			if (cmd == SIOCIFAFATTACH)
1878 				error = in6_ifattach(ifp);
1879 			else
1880 				in6_ifdetach(ifp);
1881 			break;
1882 #endif /* INET6 */
1883 		default:
1884 			error = EAFNOSUPPORT;
1885 		}
1886 		NET_UNLOCK();
1887 		break;
1888 
1889 	case SIOCSIFFLAGS:
1890 		if ((error = suser(p)) != 0)
1891 			break;
1892 
1893 		NET_LOCK();
1894 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1895 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1896 
1897 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1898 		if (error != 0) {
1899 			ifp->if_flags = oif_flags;
1900 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1901 			s = splnet();
1902 			if (ISSET(ifp->if_flags, IFF_UP))
1903 				if_up(ifp);
1904 			else
1905 				if_down(ifp);
1906 			splx(s);
1907 		}
1908 		NET_UNLOCK();
1909 		break;
1910 
1911 	case SIOCSIFXFLAGS:
1912 		if ((error = suser(p)) != 0)
1913 			break;
1914 
1915 		NET_LOCK();
1916 #ifdef INET6
1917 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1918 			error = in6_ifattach(ifp);
1919 			if (error != 0) {
1920 				NET_UNLOCK();
1921 				break;
1922 			}
1923 		}
1924 
1925 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1926 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1927 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1928 
1929 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1930 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1931 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1932 
1933 #endif	/* INET6 */
1934 
1935 #ifdef MPLS
1936 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1937 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1938 			s = splnet();
1939 			ifp->if_xflags |= IFXF_MPLS;
1940 			ifp->if_ll_output = ifp->if_output;
1941 			ifp->if_output = mpls_output;
1942 			splx(s);
1943 		}
1944 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1945 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1946 			s = splnet();
1947 			ifp->if_xflags &= ~IFXF_MPLS;
1948 			ifp->if_output = ifp->if_ll_output;
1949 			ifp->if_ll_output = NULL;
1950 			splx(s);
1951 		}
1952 #endif	/* MPLS */
1953 
1954 #ifndef SMALL_KERNEL
1955 		if (ifp->if_capabilities & IFCAP_WOL) {
1956 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1957 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1958 				s = splnet();
1959 				ifp->if_xflags |= IFXF_WOL;
1960 				error = ifp->if_wol(ifp, 1);
1961 				splx(s);
1962 			}
1963 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1964 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1965 				s = splnet();
1966 				ifp->if_xflags &= ~IFXF_WOL;
1967 				error = ifp->if_wol(ifp, 0);
1968 				splx(s);
1969 			}
1970 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1971 			ifr->ifr_flags &= ~IFXF_WOL;
1972 			error = ENOTSUP;
1973 		}
1974 #endif
1975 
1976 		if (error == 0)
1977 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1978 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1979 		NET_UNLOCK();
1980 		break;
1981 
1982 	case SIOCSIFMETRIC:
1983 		if ((error = suser(p)) != 0)
1984 			break;
1985 		NET_LOCK();
1986 		ifp->if_metric = ifr->ifr_metric;
1987 		NET_UNLOCK();
1988 		break;
1989 
1990 	case SIOCSIFMTU:
1991 		if ((error = suser(p)) != 0)
1992 			break;
1993 		NET_LOCK();
1994 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1995 		NET_UNLOCK();
1996 		if (!error)
1997 			rtm_ifchg(ifp);
1998 		break;
1999 
2000 	case SIOCSIFDESCR:
2001 		if ((error = suser(p)) != 0)
2002 			break;
2003 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2004 		    IFDESCRSIZE, &bytesdone);
2005 		if (error == 0) {
2006 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2007 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2008 		}
2009 		break;
2010 
2011 	case SIOCSIFRTLABEL:
2012 		if ((error = suser(p)) != 0)
2013 			break;
2014 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2015 		    RTLABEL_LEN, &bytesdone);
2016 		if (error == 0) {
2017 			rtlabel_unref(ifp->if_rtlabelid);
2018 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2019 		}
2020 		break;
2021 
2022 	case SIOCSIFPRIORITY:
2023 		if ((error = suser(p)) != 0)
2024 			break;
2025 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2026 			error = EINVAL;
2027 			break;
2028 		}
2029 		ifp->if_priority = ifr->ifr_metric;
2030 		break;
2031 
2032 	case SIOCSIFRDOMAIN:
2033 		if ((error = suser(p)) != 0)
2034 			break;
2035 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2036 		if (!error || error == EEXIST) {
2037 			NET_LOCK();
2038 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2039 			NET_UNLOCK();
2040 		}
2041 		break;
2042 
2043 	case SIOCAIFGROUP:
2044 		if ((error = suser(p)))
2045 			break;
2046 		NET_LOCK();
2047 		error = if_addgroup(ifp, ifgr->ifgr_group);
2048 		if (error == 0) {
2049 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2050 			if (error == ENOTTY)
2051 				error = 0;
2052 		}
2053 		NET_UNLOCK();
2054 		break;
2055 
2056 	case SIOCDIFGROUP:
2057 		if ((error = suser(p)))
2058 			break;
2059 		NET_LOCK();
2060 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2061 		if (error == ENOTTY)
2062 			error = 0;
2063 		if (error == 0)
2064 			error = if_delgroup(ifp, ifgr->ifgr_group);
2065 		NET_UNLOCK();
2066 		break;
2067 
2068 	case SIOCSIFLLADDR:
2069 		if ((error = suser(p)))
2070 			break;
2071 		if ((ifp->if_sadl == NULL) ||
2072 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2073 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2074 			error = EINVAL;
2075 			break;
2076 		}
2077 		NET_LOCK();
2078 		switch (ifp->if_type) {
2079 		case IFT_ETHER:
2080 		case IFT_CARP:
2081 		case IFT_XETHER:
2082 		case IFT_ISO88025:
2083 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2084 			if (error == ENOTTY)
2085 				error = 0;
2086 			if (error == 0)
2087 				error = if_setlladdr(ifp,
2088 				    ifr->ifr_addr.sa_data);
2089 			break;
2090 		default:
2091 			error = ENODEV;
2092 		}
2093 
2094 		if (error == 0)
2095 			ifnewlladdr(ifp);
2096 		NET_UNLOCK();
2097 		break;
2098 
2099 	case SIOCSIFLLPRIO:
2100 		if ((error = suser(p)))
2101 			break;
2102 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2103 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2104 			error = EINVAL;
2105 			break;
2106 		}
2107 		NET_LOCK();
2108 		ifp->if_llprio = ifr->ifr_llprio;
2109 		NET_UNLOCK();
2110 		break;
2111 
2112 	case SIOCGIFSFFPAGE:
2113 		error = suser(p);
2114 		if (error != 0)
2115 			break;
2116 
2117 		error = if_sffpage_check(data);
2118 		if (error != 0)
2119 			break;
2120 
2121 		/* don't take NET_LOCK because i2c reads take a long time */
2122 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2123 		break;
2124 
2125 	case SIOCSETKALIVE:
2126 	case SIOCDIFPHYADDR:
2127 	case SIOCSLIFPHYADDR:
2128 	case SIOCSLIFPHYRTABLE:
2129 	case SIOCSLIFPHYTTL:
2130 	case SIOCSLIFPHYDF:
2131 	case SIOCSLIFPHYECN:
2132 	case SIOCADDMULTI:
2133 	case SIOCDELMULTI:
2134 	case SIOCSIFMEDIA:
2135 	case SIOCSVNETID:
2136 	case SIOCDVNETID:
2137 	case SIOCSVNETFLOWID:
2138 	case SIOCSTXHPRIO:
2139 	case SIOCSRXHPRIO:
2140 	case SIOCSIFPAIR:
2141 	case SIOCSIFPARENT:
2142 	case SIOCDIFPARENT:
2143 	case SIOCSETMPWCFG:
2144 	case SIOCSETLABEL:
2145 	case SIOCDELLABEL:
2146 	case SIOCSPWE3CTRLWORD:
2147 	case SIOCSPWE3FAT:
2148 	case SIOCSPWE3NEIGHBOR:
2149 	case SIOCDPWE3NEIGHBOR:
2150 #if NBRIDGE > 0
2151 	case SIOCBRDGADD:
2152 	case SIOCBRDGDEL:
2153 	case SIOCBRDGSIFFLGS:
2154 	case SIOCBRDGSCACHE:
2155 	case SIOCBRDGADDS:
2156 	case SIOCBRDGDELS:
2157 	case SIOCBRDGSADDR:
2158 	case SIOCBRDGSTO:
2159 	case SIOCBRDGDADDR:
2160 	case SIOCBRDGFLUSH:
2161 	case SIOCBRDGADDL:
2162 	case SIOCBRDGSIFPROT:
2163 	case SIOCBRDGARL:
2164 	case SIOCBRDGFRL:
2165 	case SIOCBRDGSPRI:
2166 	case SIOCBRDGSHT:
2167 	case SIOCBRDGSFD:
2168 	case SIOCBRDGSMA:
2169 	case SIOCBRDGSIFPRIO:
2170 	case SIOCBRDGSIFCOST:
2171 	case SIOCBRDGSTXHC:
2172 	case SIOCBRDGSPROTO:
2173 	case SIOCSWGDPID:
2174 	case SIOCSWSPORTNO:
2175 	case SIOCSWGMAXFLOW:
2176 #endif
2177 		if ((error = suser(p)) != 0)
2178 			break;
2179 		/* FALLTHROUGH */
2180 	default:
2181 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2182 			(struct mbuf *) cmd, (struct mbuf *) data,
2183 			(struct mbuf *) ifp, p));
2184 		if (error != EOPNOTSUPP)
2185 			break;
2186 		switch (cmd) {
2187 		case SIOCAIFADDR:
2188 		case SIOCDIFADDR:
2189 		case SIOCSIFADDR:
2190 		case SIOCSIFNETMASK:
2191 		case SIOCSIFDSTADDR:
2192 		case SIOCSIFBRDADDR:
2193 #ifdef INET6
2194 		case SIOCAIFADDR_IN6:
2195 		case SIOCDIFADDR_IN6:
2196 #endif
2197 			error = suser(p);
2198 			break;
2199 		default:
2200 			error = 0;
2201 			break;
2202 		}
2203 		if (error)
2204 			break;
2205 		NET_LOCK();
2206 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2207 		NET_UNLOCK();
2208 		break;
2209 	}
2210 
2211 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2212 		rtm_ifchg(ifp);
2213 
2214 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2215 		getmicrotime(&ifp->if_lastchange);
2216 
2217 	return (error);
2218 }
2219 
2220 int
2221 ifioctl_get(u_long cmd, caddr_t data)
2222 {
2223 	struct ifnet *ifp;
2224 	struct ifreq *ifr = (struct ifreq *)data;
2225 	char ifdescrbuf[IFDESCRSIZE];
2226 	char ifrtlabelbuf[RTLABEL_LEN];
2227 	int error = 0;
2228 	size_t bytesdone;
2229 	const char *label;
2230 
2231 	switch(cmd) {
2232 	case SIOCGIFCONF:
2233 		NET_RLOCK_IN_IOCTL();
2234 		error = ifconf(data);
2235 		NET_RUNLOCK_IN_IOCTL();
2236 		return (error);
2237 	case SIOCIFGCLONERS:
2238 		error = if_clone_list((struct if_clonereq *)data);
2239 		return (error);
2240 	case SIOCGIFGMEMB:
2241 		NET_RLOCK_IN_IOCTL();
2242 		error = if_getgroupmembers(data);
2243 		NET_RUNLOCK_IN_IOCTL();
2244 		return (error);
2245 	case SIOCGIFGATTR:
2246 		NET_RLOCK_IN_IOCTL();
2247 		error = if_getgroupattribs(data);
2248 		NET_RUNLOCK_IN_IOCTL();
2249 		return (error);
2250 	case SIOCGIFGLIST:
2251 		NET_RLOCK_IN_IOCTL();
2252 		error = if_getgrouplist(data);
2253 		NET_RUNLOCK_IN_IOCTL();
2254 		return (error);
2255 	}
2256 
2257 	ifp = ifunit(ifr->ifr_name);
2258 	if (ifp == NULL)
2259 		return (ENXIO);
2260 
2261 	NET_RLOCK_IN_IOCTL();
2262 
2263 	switch(cmd) {
2264 	case SIOCGIFFLAGS:
2265 		ifr->ifr_flags = ifp->if_flags;
2266 		if (ifq_is_oactive(&ifp->if_snd))
2267 			ifr->ifr_flags |= IFF_OACTIVE;
2268 		break;
2269 
2270 	case SIOCGIFXFLAGS:
2271 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2272 		break;
2273 
2274 	case SIOCGIFMETRIC:
2275 		ifr->ifr_metric = ifp->if_metric;
2276 		break;
2277 
2278 	case SIOCGIFMTU:
2279 		ifr->ifr_mtu = ifp->if_mtu;
2280 		break;
2281 
2282 	case SIOCGIFHARDMTU:
2283 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2284 		break;
2285 
2286 	case SIOCGIFDATA: {
2287 		struct if_data ifdata;
2288 		if_getdata(ifp, &ifdata);
2289 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2290 		break;
2291 	}
2292 
2293 	case SIOCGIFDESCR:
2294 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2295 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2296 		    &bytesdone);
2297 		break;
2298 
2299 	case SIOCGIFRTLABEL:
2300 		if (ifp->if_rtlabelid &&
2301 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2302 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2303 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2304 			    RTLABEL_LEN, &bytesdone);
2305 		} else
2306 			error = ENOENT;
2307 		break;
2308 
2309 	case SIOCGIFPRIORITY:
2310 		ifr->ifr_metric = ifp->if_priority;
2311 		break;
2312 
2313 	case SIOCGIFRDOMAIN:
2314 		ifr->ifr_rdomainid = ifp->if_rdomain;
2315 		break;
2316 
2317 	case SIOCGIFGROUP:
2318 		error = if_getgroup(data, ifp);
2319 		break;
2320 
2321 	case SIOCGIFLLPRIO:
2322 		ifr->ifr_llprio = ifp->if_llprio;
2323 		break;
2324 
2325 	default:
2326 		panic("invalid ioctl %lu", cmd);
2327 	}
2328 
2329 	NET_RUNLOCK_IN_IOCTL();
2330 
2331 	return (error);
2332 }
2333 
2334 static int
2335 if_sffpage_check(const caddr_t data)
2336 {
2337 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2338 
2339 	switch (sff->sff_addr) {
2340 	case IFSFF_ADDR_EEPROM:
2341 	case IFSFF_ADDR_DDM:
2342 		break;
2343 	default:
2344 		return (EINVAL);
2345 	}
2346 
2347 	return (0);
2348 }
2349 
2350 int
2351 if_txhprio_l2_check(int hdrprio)
2352 {
2353 	switch (hdrprio) {
2354 	case IF_HDRPRIO_PACKET:
2355 		return (0);
2356 	default:
2357 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2358 			return (0);
2359 		break;
2360 	}
2361 
2362 	return (EINVAL);
2363 }
2364 
2365 int
2366 if_txhprio_l3_check(int hdrprio)
2367 {
2368 	switch (hdrprio) {
2369 	case IF_HDRPRIO_PACKET:
2370 	case IF_HDRPRIO_PAYLOAD:
2371 		return (0);
2372 	default:
2373 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2374 			return (0);
2375 		break;
2376 	}
2377 
2378 	return (EINVAL);
2379 }
2380 
2381 int
2382 if_rxhprio_l2_check(int hdrprio)
2383 {
2384 	switch (hdrprio) {
2385 	case IF_HDRPRIO_PACKET:
2386 	case IF_HDRPRIO_OUTER:
2387 		return (0);
2388 	default:
2389 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2390 			return (0);
2391 		break;
2392 	}
2393 
2394 	return (EINVAL);
2395 }
2396 
2397 int
2398 if_rxhprio_l3_check(int hdrprio)
2399 {
2400 	switch (hdrprio) {
2401 	case IF_HDRPRIO_PACKET:
2402 	case IF_HDRPRIO_PAYLOAD:
2403 	case IF_HDRPRIO_OUTER:
2404 		return (0);
2405 	default:
2406 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2407 			return (0);
2408 		break;
2409 	}
2410 
2411 	return (EINVAL);
2412 }
2413 
2414 /*
2415  * Return interface configuration
2416  * of system.  List may be used
2417  * in later ioctl's (above) to get
2418  * other information.
2419  */
2420 int
2421 ifconf(caddr_t data)
2422 {
2423 	struct ifconf *ifc = (struct ifconf *)data;
2424 	struct ifnet *ifp;
2425 	struct ifaddr *ifa;
2426 	struct ifreq ifr, *ifrp;
2427 	int space = ifc->ifc_len, error = 0;
2428 
2429 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2430 	if (space == 0) {
2431 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2432 			struct sockaddr *sa;
2433 
2434 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2435 				space += sizeof (ifr);
2436 			else
2437 				TAILQ_FOREACH(ifa,
2438 				    &ifp->if_addrlist, ifa_list) {
2439 					sa = ifa->ifa_addr;
2440 					if (sa->sa_len > sizeof(*sa))
2441 						space += sa->sa_len -
2442 						    sizeof(*sa);
2443 					space += sizeof(ifr);
2444 				}
2445 		}
2446 		ifc->ifc_len = space;
2447 		return (0);
2448 	}
2449 
2450 	ifrp = ifc->ifc_req;
2451 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2452 		if (space < sizeof(ifr))
2453 			break;
2454 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2455 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2456 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2457 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2458 			    sizeof(ifr));
2459 			if (error)
2460 				break;
2461 			space -= sizeof (ifr), ifrp++;
2462 		} else
2463 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2464 				struct sockaddr *sa = ifa->ifa_addr;
2465 
2466 				if (space < sizeof(ifr))
2467 					break;
2468 				if (sa->sa_len <= sizeof(*sa)) {
2469 					ifr.ifr_addr = *sa;
2470 					error = copyout((caddr_t)&ifr,
2471 					    (caddr_t)ifrp, sizeof (ifr));
2472 					ifrp++;
2473 				} else {
2474 					space -= sa->sa_len - sizeof(*sa);
2475 					if (space < sizeof (ifr))
2476 						break;
2477 					error = copyout((caddr_t)&ifr,
2478 					    (caddr_t)ifrp,
2479 					    sizeof(ifr.ifr_name));
2480 					if (error == 0)
2481 						error = copyout((caddr_t)sa,
2482 						    (caddr_t)&ifrp->ifr_addr,
2483 						    sa->sa_len);
2484 					ifrp = (struct ifreq *)(sa->sa_len +
2485 					    (caddr_t)&ifrp->ifr_addr);
2486 				}
2487 				if (error)
2488 					break;
2489 				space -= sizeof (ifr);
2490 			}
2491 	}
2492 	ifc->ifc_len -= space;
2493 	return (error);
2494 }
2495 
2496 void
2497 if_counters_alloc(struct ifnet *ifp)
2498 {
2499 	KASSERT(ifp->if_counters == NULL);
2500 
2501 	ifp->if_counters = counters_alloc(ifc_ncounters);
2502 }
2503 
2504 void
2505 if_counters_free(struct ifnet *ifp)
2506 {
2507 	KASSERT(ifp->if_counters != NULL);
2508 
2509 	counters_free(ifp->if_counters, ifc_ncounters);
2510 	ifp->if_counters = NULL;
2511 }
2512 
2513 void
2514 if_getdata(struct ifnet *ifp, struct if_data *data)
2515 {
2516 	unsigned int i;
2517 
2518 	*data = ifp->if_data;
2519 
2520 	if (ifp->if_counters != NULL) {
2521 		uint64_t counters[ifc_ncounters];
2522 
2523 		counters_read(ifp->if_counters, counters, nitems(counters));
2524 
2525 		data->ifi_ipackets += counters[ifc_ipackets];
2526 		data->ifi_ierrors += counters[ifc_ierrors];
2527 		data->ifi_opackets += counters[ifc_opackets];
2528 		data->ifi_oerrors += counters[ifc_oerrors];
2529 		data->ifi_collisions += counters[ifc_collisions];
2530 		data->ifi_ibytes += counters[ifc_ibytes];
2531 		data->ifi_obytes += counters[ifc_obytes];
2532 		data->ifi_imcasts += counters[ifc_imcasts];
2533 		data->ifi_omcasts += counters[ifc_omcasts];
2534 		data->ifi_iqdrops += counters[ifc_iqdrops];
2535 		data->ifi_oqdrops += counters[ifc_oqdrops];
2536 		data->ifi_noproto += counters[ifc_noproto];
2537 	}
2538 
2539 	for (i = 0; i < ifp->if_nifqs; i++) {
2540 		struct ifqueue *ifq = ifp->if_ifqs[i];
2541 
2542 		ifq_add_data(ifq, data);
2543 	}
2544 
2545 	for (i = 0; i < ifp->if_niqs; i++) {
2546 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2547 
2548 		ifiq_add_data(ifiq, data);
2549 	}
2550 }
2551 
2552 /*
2553  * Dummy functions replaced in ifnet during detach (if protocols decide to
2554  * fiddle with the if during detach.
2555  */
2556 void
2557 if_detached_qstart(struct ifqueue *ifq)
2558 {
2559 	ifq_purge(ifq);
2560 }
2561 
2562 int
2563 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2564 {
2565 	return ENODEV;
2566 }
2567 
2568 /*
2569  * Create interface group without members
2570  */
2571 struct ifg_group *
2572 if_creategroup(const char *groupname)
2573 {
2574 	struct ifg_group	*ifg;
2575 
2576 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2577 		return (NULL);
2578 
2579 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2580 	ifg->ifg_refcnt = 0;
2581 	ifg->ifg_carp_demoted = 0;
2582 	TAILQ_INIT(&ifg->ifg_members);
2583 #if NPF > 0
2584 	pfi_attach_ifgroup(ifg);
2585 #endif
2586 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2587 
2588 	return (ifg);
2589 }
2590 
2591 /*
2592  * Add a group to an interface
2593  */
2594 int
2595 if_addgroup(struct ifnet *ifp, const char *groupname)
2596 {
2597 	struct ifg_list		*ifgl;
2598 	struct ifg_group	*ifg = NULL;
2599 	struct ifg_member	*ifgm;
2600 
2601 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2602 	    groupname[strlen(groupname) - 1] <= '9')
2603 		return (EINVAL);
2604 
2605 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2606 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2607 			return (EEXIST);
2608 
2609 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2610 		return (ENOMEM);
2611 
2612 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2613 		free(ifgl, M_TEMP, sizeof(*ifgl));
2614 		return (ENOMEM);
2615 	}
2616 
2617 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2618 		if (!strcmp(ifg->ifg_group, groupname))
2619 			break;
2620 
2621 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2622 		free(ifgl, M_TEMP, sizeof(*ifgl));
2623 		free(ifgm, M_TEMP, sizeof(*ifgm));
2624 		return (ENOMEM);
2625 	}
2626 
2627 	ifg->ifg_refcnt++;
2628 	ifgl->ifgl_group = ifg;
2629 	ifgm->ifgm_ifp = ifp;
2630 
2631 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2632 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2633 
2634 #if NPF > 0
2635 	pfi_group_addmember(groupname, ifp);
2636 #endif
2637 
2638 	return (0);
2639 }
2640 
2641 /*
2642  * Remove a group from an interface
2643  */
2644 int
2645 if_delgroup(struct ifnet *ifp, const char *groupname)
2646 {
2647 	struct ifg_list		*ifgl;
2648 	struct ifg_member	*ifgm;
2649 
2650 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2651 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2652 			break;
2653 	if (ifgl == NULL)
2654 		return (ENOENT);
2655 
2656 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2657 
2658 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2659 		if (ifgm->ifgm_ifp == ifp)
2660 			break;
2661 
2662 	if (ifgm != NULL) {
2663 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2664 		free(ifgm, M_TEMP, sizeof(*ifgm));
2665 	}
2666 
2667 #if NPF > 0
2668 	pfi_group_change(groupname);
2669 #endif
2670 
2671 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2672 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2673 #if NPF > 0
2674 		pfi_detach_ifgroup(ifgl->ifgl_group);
2675 #endif
2676 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2677 	}
2678 
2679 	free(ifgl, M_TEMP, sizeof(*ifgl));
2680 
2681 	return (0);
2682 }
2683 
2684 /*
2685  * Stores all groups from an interface in memory pointed
2686  * to by data
2687  */
2688 int
2689 if_getgroup(caddr_t data, struct ifnet *ifp)
2690 {
2691 	int			 len, error;
2692 	struct ifg_list		*ifgl;
2693 	struct ifg_req		 ifgrq, *ifgp;
2694 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2695 
2696 	if (ifgr->ifgr_len == 0) {
2697 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2698 			ifgr->ifgr_len += sizeof(struct ifg_req);
2699 		return (0);
2700 	}
2701 
2702 	len = ifgr->ifgr_len;
2703 	ifgp = ifgr->ifgr_groups;
2704 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2705 		if (len < sizeof(ifgrq))
2706 			return (EINVAL);
2707 		bzero(&ifgrq, sizeof ifgrq);
2708 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2709 		    sizeof(ifgrq.ifgrq_group));
2710 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2711 		    sizeof(struct ifg_req))))
2712 			return (error);
2713 		len -= sizeof(ifgrq);
2714 		ifgp++;
2715 	}
2716 
2717 	return (0);
2718 }
2719 
2720 /*
2721  * Stores all members of a group in memory pointed to by data
2722  */
2723 int
2724 if_getgroupmembers(caddr_t data)
2725 {
2726 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2727 	struct ifg_group	*ifg;
2728 	struct ifg_member	*ifgm;
2729 	struct ifg_req		 ifgrq, *ifgp;
2730 	int			 len, error;
2731 
2732 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2733 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2734 			break;
2735 	if (ifg == NULL)
2736 		return (ENOENT);
2737 
2738 	if (ifgr->ifgr_len == 0) {
2739 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2740 			ifgr->ifgr_len += sizeof(ifgrq);
2741 		return (0);
2742 	}
2743 
2744 	len = ifgr->ifgr_len;
2745 	ifgp = ifgr->ifgr_groups;
2746 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2747 		if (len < sizeof(ifgrq))
2748 			return (EINVAL);
2749 		bzero(&ifgrq, sizeof ifgrq);
2750 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2751 		    sizeof(ifgrq.ifgrq_member));
2752 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2753 		    sizeof(struct ifg_req))))
2754 			return (error);
2755 		len -= sizeof(ifgrq);
2756 		ifgp++;
2757 	}
2758 
2759 	return (0);
2760 }
2761 
2762 int
2763 if_getgroupattribs(caddr_t data)
2764 {
2765 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2766 	struct ifg_group	*ifg;
2767 
2768 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2769 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2770 			break;
2771 	if (ifg == NULL)
2772 		return (ENOENT);
2773 
2774 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2775 
2776 	return (0);
2777 }
2778 
2779 int
2780 if_setgroupattribs(caddr_t data)
2781 {
2782 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2783 	struct ifg_group	*ifg;
2784 	struct ifg_member	*ifgm;
2785 	int			 demote;
2786 
2787 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2788 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2789 			break;
2790 	if (ifg == NULL)
2791 		return (ENOENT);
2792 
2793 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2794 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2795 	    demote + ifg->ifg_carp_demoted < 0)
2796 		return (EINVAL);
2797 
2798 	ifg->ifg_carp_demoted += demote;
2799 
2800 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2801 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2802 
2803 	return (0);
2804 }
2805 
2806 /*
2807  * Stores all groups in memory pointed to by data
2808  */
2809 int
2810 if_getgrouplist(caddr_t data)
2811 {
2812 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2813 	struct ifg_group	*ifg;
2814 	struct ifg_req		 ifgrq, *ifgp;
2815 	int			 len, error;
2816 
2817 	if (ifgr->ifgr_len == 0) {
2818 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2819 			ifgr->ifgr_len += sizeof(ifgrq);
2820 		return (0);
2821 	}
2822 
2823 	len = ifgr->ifgr_len;
2824 	ifgp = ifgr->ifgr_groups;
2825 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2826 		if (len < sizeof(ifgrq))
2827 			return (EINVAL);
2828 		bzero(&ifgrq, sizeof ifgrq);
2829 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2830 		    sizeof(ifgrq.ifgrq_group));
2831 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2832 		    sizeof(struct ifg_req))))
2833 			return (error);
2834 		len -= sizeof(ifgrq);
2835 		ifgp++;
2836 	}
2837 
2838 	return (0);
2839 }
2840 
2841 void
2842 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2843 {
2844 	switch (dst->sa_family) {
2845 	case AF_INET:
2846 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2847 		    mask && (mask->sa_len == 0 ||
2848 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2849 			if_group_egress_build();
2850 		break;
2851 #ifdef INET6
2852 	case AF_INET6:
2853 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2854 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2855 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2856 		    &in6addr_any)))
2857 			if_group_egress_build();
2858 		break;
2859 #endif
2860 	}
2861 }
2862 
2863 int
2864 if_group_egress_build(void)
2865 {
2866 	struct ifnet		*ifp;
2867 	struct ifg_group	*ifg;
2868 	struct ifg_member	*ifgm, *next;
2869 	struct sockaddr_in	 sa_in;
2870 #ifdef INET6
2871 	struct sockaddr_in6	 sa_in6;
2872 #endif
2873 	struct rtentry		*rt;
2874 
2875 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2876 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2877 			break;
2878 
2879 	if (ifg != NULL)
2880 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2881 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2882 
2883 	bzero(&sa_in, sizeof(sa_in));
2884 	sa_in.sin_len = sizeof(sa_in);
2885 	sa_in.sin_family = AF_INET;
2886 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2887 	while (rt != NULL) {
2888 		ifp = if_get(rt->rt_ifidx);
2889 		if (ifp != NULL) {
2890 			if_addgroup(ifp, IFG_EGRESS);
2891 			if_put(ifp);
2892 		}
2893 		rt = rtable_iterate(rt);
2894 	}
2895 
2896 #ifdef INET6
2897 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2898 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2899 	    RTP_ANY);
2900 	while (rt != NULL) {
2901 		ifp = if_get(rt->rt_ifidx);
2902 		if (ifp != NULL) {
2903 			if_addgroup(ifp, IFG_EGRESS);
2904 			if_put(ifp);
2905 		}
2906 		rt = rtable_iterate(rt);
2907 	}
2908 #endif /* INET6 */
2909 
2910 	return (0);
2911 }
2912 
2913 /*
2914  * Set/clear promiscuous mode on interface ifp based on the truth value
2915  * of pswitch.  The calls are reference counted so that only the first
2916  * "on" request actually has an effect, as does the final "off" request.
2917  * Results are undefined if the "off" and "on" requests are not matched.
2918  */
2919 int
2920 ifpromisc(struct ifnet *ifp, int pswitch)
2921 {
2922 	struct ifreq ifr;
2923 	unsigned short oif_flags;
2924 	int oif_pcount, error;
2925 
2926 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2927 
2928 	oif_flags = ifp->if_flags;
2929 	oif_pcount = ifp->if_pcount;
2930 	if (pswitch) {
2931 		if (ifp->if_pcount++ != 0)
2932 			return (0);
2933 		ifp->if_flags |= IFF_PROMISC;
2934 	} else {
2935 		if (--ifp->if_pcount > 0)
2936 			return (0);
2937 		ifp->if_flags &= ~IFF_PROMISC;
2938 	}
2939 
2940 	if ((ifp->if_flags & IFF_UP) == 0)
2941 		return (0);
2942 
2943 	memset(&ifr, 0, sizeof(ifr));
2944 	ifr.ifr_flags = ifp->if_flags;
2945 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2946 	if (error) {
2947 		ifp->if_flags = oif_flags;
2948 		ifp->if_pcount = oif_pcount;
2949 	}
2950 
2951 	return (error);
2952 }
2953 
2954 void
2955 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2956 {
2957 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2958 }
2959 
2960 void
2961 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2962 {
2963 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2964 }
2965 
2966 void
2967 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2968 {
2969 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2970 		panic("ifa_update_broadaddr does not support dynamic length");
2971 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2972 }
2973 
2974 #ifdef DDB
2975 /* debug function, can be called from ddb> */
2976 void
2977 ifa_print_all(void)
2978 {
2979 	struct ifnet *ifp;
2980 	struct ifaddr *ifa;
2981 
2982 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2983 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2984 			char addr[INET6_ADDRSTRLEN];
2985 
2986 			switch (ifa->ifa_addr->sa_family) {
2987 			case AF_INET:
2988 				printf("%s", inet_ntop(AF_INET,
2989 				    &satosin(ifa->ifa_addr)->sin_addr,
2990 				    addr, sizeof(addr)));
2991 				break;
2992 #ifdef INET6
2993 			case AF_INET6:
2994 				printf("%s", inet_ntop(AF_INET6,
2995 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2996 				    addr, sizeof(addr)));
2997 				break;
2998 #endif
2999 			}
3000 			printf(" on %s\n", ifp->if_xname);
3001 		}
3002 	}
3003 }
3004 #endif /* DDB */
3005 
3006 void
3007 ifnewlladdr(struct ifnet *ifp)
3008 {
3009 #ifdef INET6
3010 	struct ifaddr *ifa;
3011 #endif
3012 	struct ifreq ifrq;
3013 	short up;
3014 	int s;
3015 
3016 	s = splnet();
3017 	up = ifp->if_flags & IFF_UP;
3018 
3019 	if (up) {
3020 		/* go down for a moment... */
3021 		ifp->if_flags &= ~IFF_UP;
3022 		ifrq.ifr_flags = ifp->if_flags;
3023 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3024 	}
3025 
3026 	ifp->if_flags |= IFF_UP;
3027 	ifrq.ifr_flags = ifp->if_flags;
3028 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3029 
3030 #ifdef INET6
3031 	/*
3032 	 * Update the link-local address.  Don't do it if we're
3033 	 * a router to avoid confusing hosts on the network.
3034 	 */
3035 	if (!ip6_forwarding) {
3036 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3037 		if (ifa) {
3038 			in6_purgeaddr(ifa);
3039 			if_hooks_run(&ifp->if_addrhooks);
3040 			in6_ifattach(ifp);
3041 		}
3042 	}
3043 #endif
3044 	if (!up) {
3045 		/* go back down */
3046 		ifp->if_flags &= ~IFF_UP;
3047 		ifrq.ifr_flags = ifp->if_flags;
3048 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3049 	}
3050 	splx(s);
3051 }
3052 
3053 void
3054 if_addrhook_add(struct ifnet *ifp, struct task *t)
3055 {
3056 	mtx_enter(&if_hooks_mtx);
3057 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3058 	mtx_leave(&if_hooks_mtx);
3059 }
3060 
3061 void
3062 if_addrhook_del(struct ifnet *ifp, struct task *t)
3063 {
3064 	mtx_enter(&if_hooks_mtx);
3065 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3066 	mtx_leave(&if_hooks_mtx);
3067 }
3068 
3069 void
3070 if_addrhooks_run(struct ifnet *ifp)
3071 {
3072 	if_hooks_run(&ifp->if_addrhooks);
3073 }
3074 
3075 void
3076 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3077 {
3078 	extern int ticks;
3079 
3080 	memset(rxr, 0, sizeof(*rxr));
3081 
3082 	rxr->rxr_adjusted = ticks;
3083 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3084 	rxr->rxr_hwm = hwm;
3085 }
3086 
3087 static inline void
3088 if_rxr_adjust_cwm(struct if_rxring *rxr)
3089 {
3090 	extern int ticks;
3091 
3092 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3093 		return;
3094 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3095 		rxr->rxr_cwm++;
3096 
3097 	rxr->rxr_adjusted = ticks;
3098 }
3099 
3100 void
3101 if_rxr_livelocked(struct if_rxring *rxr)
3102 {
3103 	extern int ticks;
3104 
3105 	if (ticks - rxr->rxr_adjusted >= 1) {
3106 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3107 			rxr->rxr_cwm--;
3108 
3109 		rxr->rxr_adjusted = ticks;
3110 	}
3111 }
3112 
3113 u_int
3114 if_rxr_get(struct if_rxring *rxr, u_int max)
3115 {
3116 	extern int ticks;
3117 	u_int diff;
3118 
3119 	if (ticks - rxr->rxr_adjusted >= 1) {
3120 		/* we're free to try for an adjustment */
3121 		if_rxr_adjust_cwm(rxr);
3122 	}
3123 
3124 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3125 		return (0);
3126 
3127 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3128 	rxr->rxr_alive += diff;
3129 
3130 	return (diff);
3131 }
3132 
3133 int
3134 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3135 {
3136 	struct if_rxrinfo kifri;
3137 	int error;
3138 	u_int n;
3139 
3140 	error = copyin(uifri, &kifri, sizeof(kifri));
3141 	if (error)
3142 		return (error);
3143 
3144 	n = min(t, kifri.ifri_total);
3145 	kifri.ifri_total = t;
3146 
3147 	if (n > 0) {
3148 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3149 		if (error)
3150 			return (error);
3151 	}
3152 
3153 	return (copyout(&kifri, uifri, sizeof(kifri)));
3154 }
3155 
3156 int
3157 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3158     struct if_rxring *rxr)
3159 {
3160 	struct if_rxring_info ifr;
3161 
3162 	memset(&ifr, 0, sizeof(ifr));
3163 
3164 	if (name != NULL)
3165 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3166 
3167 	ifr.ifr_size = size;
3168 	ifr.ifr_info = *rxr;
3169 
3170 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3171 }
3172 
3173 /*
3174  * Network stack input queues.
3175  */
3176 
3177 void
3178 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3179 {
3180 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3181 	niq->ni_isr = isr;
3182 }
3183 
3184 int
3185 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3186 {
3187 	int rv;
3188 
3189 	rv = mq_enqueue(&niq->ni_q, m);
3190 	if (rv == 0)
3191 		schednetisr(niq->ni_isr);
3192 	else
3193 		if_congestion();
3194 
3195 	return (rv);
3196 }
3197 
3198 int
3199 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3200 {
3201 	int rv;
3202 
3203 	rv = mq_enlist(&niq->ni_q, ml);
3204 	if (rv == 0)
3205 		schednetisr(niq->ni_isr);
3206 	else
3207 		if_congestion();
3208 
3209 	return (rv);
3210 }
3211 
3212 __dead void
3213 unhandled_af(int af)
3214 {
3215 	panic("unhandled af %d", af);
3216 }
3217 
3218 /*
3219  * XXXSMP This tunable is here to work around the fact that IPsec
3220  * globals aren't ready to be accessed by multiple threads in
3221  * parallel.
3222  */
3223 int		 nettaskqs = NET_TASKQ;
3224 
3225 struct taskq *
3226 net_tq(unsigned int ifindex)
3227 {
3228 	struct taskq *t = NULL;
3229 
3230 	t = nettqmp[ifindex % nettaskqs];
3231 
3232 	return (t);
3233 }
3234