xref: /openbsd/sys/net/if.c (revision 4cfece93)
1 /*	$OpenBSD: if.c,v 1.612 2020/07/10 13:23:34 patrick Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "if_wg.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 #include <sys/stdint.h>	/* uintptr_t */
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 static int
147 	if_sffpage_check(const caddr_t);
148 
149 int	if_getgroup(caddr_t, struct ifnet *);
150 int	if_getgroupmembers(caddr_t);
151 int	if_getgroupattribs(caddr_t);
152 int	if_setgroupattribs(caddr_t);
153 int	if_getgrouplist(caddr_t);
154 
155 void	if_linkstate(struct ifnet *);
156 void	if_linkstate_task(void *);
157 
158 int	if_clone_list(struct if_clonereq *);
159 struct if_clone	*if_clone_lookup(const char *, int *);
160 
161 int	if_group_egress_build(void);
162 
163 void	if_watchdog_task(void *);
164 
165 void	if_netisr(void *);
166 
167 #ifdef DDB
168 void	ifa_print_all(void);
169 #endif
170 
171 void	if_qstart_compat(struct ifqueue *);
172 
173 /*
174  * interface index map
175  *
176  * the kernel maintains a mapping of interface indexes to struct ifnet
177  * pointers.
178  *
179  * the map is an array of struct ifnet pointers prefixed by an if_map
180  * structure. the if_map structure stores the length of its array.
181  *
182  * as interfaces are attached to the system, the map is grown on demand
183  * up to USHRT_MAX entries.
184  *
185  * interface index 0 is reserved and represents no interface. this
186  * supports the use of the interface index as the scope for IPv6 link
187  * local addresses, where scope 0 means no scope has been specified.
188  * it also supports the use of interface index as the unique identifier
189  * for network interfaces in SNMP applications as per RFC2863. therefore
190  * if_get(0) returns NULL.
191  */
192 
193 void if_ifp_dtor(void *, void *);
194 void if_map_dtor(void *, void *);
195 struct ifnet *if_ref(struct ifnet *);
196 
197 /*
198  * struct if_map
199  *
200  * bounded array of ifnet srp pointers used to fetch references of live
201  * interfaces with if_get().
202  */
203 
204 struct if_map {
205 	unsigned long		 limit;
206 	/* followed by limit ifnet srp pointers */
207 };
208 
209 /*
210  * struct if_idxmap
211  *
212  * infrastructure to manage updates and accesses to the current if_map.
213  */
214 
215 struct if_idxmap {
216 	unsigned int		 serial;
217 	unsigned int		 count;
218 	struct srp		 map;
219 };
220 
221 void	if_idxmap_init(unsigned int);
222 void	if_idxmap_insert(struct ifnet *);
223 void	if_idxmap_remove(struct ifnet *);
224 
225 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
226 
227 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
228 int if_cloners_count;
229 
230 /* hooks should only be added, deleted, and run from a process context */
231 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
232 void	if_hooks_run(struct task_list *);
233 
234 int	ifq_congestion;
235 
236 int		 netisr;
237 
238 #define	NET_TASKQ	1
239 struct taskq	*nettqmp[NET_TASKQ];
240 
241 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
242 
243 /*
244  * Serialize socket operations to ensure no new sleeping points
245  * are introduced in IP output paths.
246  */
247 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
248 
249 /*
250  * Network interface utility routines.
251  */
252 void
253 ifinit(void)
254 {
255 	unsigned int	i;
256 
257 	/*
258 	 * most machines boot with 4 or 5 interfaces, so size the initial map
259 	 * to accomodate this
260 	 */
261 	if_idxmap_init(8);
262 
263 	for (i = 0; i < NET_TASKQ; i++) {
264 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
265 		if (nettqmp[i] == NULL)
266 			panic("unable to create network taskq %d", i);
267 	}
268 }
269 
270 static struct if_idxmap if_idxmap = {
271 	0,
272 	0,
273 	SRP_INITIALIZER()
274 };
275 
276 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
277 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
278 
279 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
280 
281 void
282 if_idxmap_init(unsigned int limit)
283 {
284 	struct if_map *if_map;
285 	struct srp *map;
286 	unsigned int i;
287 
288 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
289 
290 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
291 	    M_IFADDR, M_WAITOK);
292 
293 	if_map->limit = limit;
294 	map = (struct srp *)(if_map + 1);
295 	for (i = 0; i < limit; i++)
296 		srp_init(&map[i]);
297 
298 	/* this is called early so there's nothing to race with */
299 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
300 }
301 
302 void
303 if_idxmap_insert(struct ifnet *ifp)
304 {
305 	struct if_map *if_map;
306 	struct srp *map;
307 	unsigned int index, i;
308 
309 	refcnt_init(&ifp->if_refcnt);
310 
311 	/* the kernel lock guarantees serialised modifications to if_idxmap */
312 	KERNEL_ASSERT_LOCKED();
313 
314 	if (++if_idxmap.count > USHRT_MAX)
315 		panic("too many interfaces");
316 
317 	if_map = srp_get_locked(&if_idxmap.map);
318 	map = (struct srp *)(if_map + 1);
319 
320 	index = if_idxmap.serial++ & USHRT_MAX;
321 
322 	if (index >= if_map->limit) {
323 		struct if_map *nif_map;
324 		struct srp *nmap;
325 		unsigned int nlimit;
326 		struct ifnet *nifp;
327 
328 		nlimit = if_map->limit * 2;
329 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
330 		    M_IFADDR, M_WAITOK);
331 		nmap = (struct srp *)(nif_map + 1);
332 
333 		nif_map->limit = nlimit;
334 		for (i = 0; i < if_map->limit; i++) {
335 			srp_init(&nmap[i]);
336 			nifp = srp_get_locked(&map[i]);
337 			if (nifp != NULL) {
338 				srp_update_locked(&if_ifp_gc, &nmap[i],
339 				    if_ref(nifp));
340 			}
341 		}
342 
343 		while (i < nlimit) {
344 			srp_init(&nmap[i]);
345 			i++;
346 		}
347 
348 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
349 		if_map = nif_map;
350 		map = nmap;
351 	}
352 
353 	/* pick the next free index */
354 	for (i = 0; i < USHRT_MAX; i++) {
355 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
356 			break;
357 
358 		index = if_idxmap.serial++ & USHRT_MAX;
359 	}
360 
361 	/* commit */
362 	ifp->if_index = index;
363 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
364 }
365 
366 void
367 if_idxmap_remove(struct ifnet *ifp)
368 {
369 	struct if_map *if_map;
370 	struct srp *map;
371 	unsigned int index;
372 
373 	index = ifp->if_index;
374 
375 	/* the kernel lock guarantees serialised modifications to if_idxmap */
376 	KERNEL_ASSERT_LOCKED();
377 
378 	if_map = srp_get_locked(&if_idxmap.map);
379 	KASSERT(index < if_map->limit);
380 
381 	map = (struct srp *)(if_map + 1);
382 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
383 
384 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
385 	if_idxmap.count--;
386 	/* end of if_idxmap modifications */
387 
388 	/* sleep until the last reference is released */
389 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
390 }
391 
392 void
393 if_ifp_dtor(void *null, void *ifp)
394 {
395 	if_put(ifp);
396 }
397 
398 void
399 if_map_dtor(void *null, void *m)
400 {
401 	struct if_map *if_map = m;
402 	struct srp *map = (struct srp *)(if_map + 1);
403 	unsigned int i;
404 
405 	/*
406 	 * dont need to serialize the use of update_locked since this is
407 	 * the last reference to this map. there's nothing to race against.
408 	 */
409 	for (i = 0; i < if_map->limit; i++)
410 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
411 
412 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
413 }
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attachsetup(struct ifnet *ifp)
421 {
422 	unsigned long ifidx;
423 
424 	NET_ASSERT_LOCKED();
425 
426 	TAILQ_INIT(&ifp->if_groups);
427 
428 	if_addgroup(ifp, IFG_ALL);
429 
430 	if_attachdomain(ifp);
431 #if NPF > 0
432 	pfi_attach_ifnet(ifp);
433 #endif
434 
435 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
436 	if_slowtimo(ifp);
437 
438 	if_idxmap_insert(ifp);
439 	KASSERT(if_get(0) == NULL);
440 
441 	ifidx = ifp->if_index;
442 
443 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
444 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
445 
446 	/* Announce the interface. */
447 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
448 }
449 
450 /*
451  * Allocate the link level name for the specified interface.  This
452  * is an attachment helper.  It must be called after ifp->if_addrlen
453  * is initialized, which may not be the case when if_attach() is
454  * called.
455  */
456 void
457 if_alloc_sadl(struct ifnet *ifp)
458 {
459 	unsigned int socksize;
460 	int namelen, masklen;
461 	struct sockaddr_dl *sdl;
462 
463 	/*
464 	 * If the interface already has a link name, release it
465 	 * now.  This is useful for interfaces that can change
466 	 * link types, and thus switch link names often.
467 	 */
468 	if_free_sadl(ifp);
469 
470 	namelen = strlen(ifp->if_xname);
471 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
472 	socksize = masklen + ifp->if_addrlen;
473 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
474 	if (socksize < sizeof(*sdl))
475 		socksize = sizeof(*sdl);
476 	socksize = ROUNDUP(socksize);
477 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
478 	sdl->sdl_len = socksize;
479 	sdl->sdl_family = AF_LINK;
480 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
481 	sdl->sdl_nlen = namelen;
482 	sdl->sdl_alen = ifp->if_addrlen;
483 	sdl->sdl_index = ifp->if_index;
484 	sdl->sdl_type = ifp->if_type;
485 	ifp->if_sadl = sdl;
486 }
487 
488 /*
489  * Free the link level name for the specified interface.  This is
490  * a detach helper.  This is called from if_detach() or from
491  * link layer type specific detach functions.
492  */
493 void
494 if_free_sadl(struct ifnet *ifp)
495 {
496 	if (ifp->if_sadl == NULL)
497 		return;
498 
499 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
500 	ifp->if_sadl = NULL;
501 }
502 
503 void
504 if_attachdomain(struct ifnet *ifp)
505 {
506 	struct domain *dp;
507 	int i, s;
508 
509 	s = splnet();
510 
511 	/* address family dependent data region */
512 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
513 	for (i = 0; (dp = domains[i]) != NULL; i++) {
514 		if (dp->dom_ifattach)
515 			ifp->if_afdata[dp->dom_family] =
516 			    (*dp->dom_ifattach)(ifp);
517 	}
518 
519 	splx(s);
520 }
521 
522 void
523 if_attachhead(struct ifnet *ifp)
524 {
525 	if_attach_common(ifp);
526 	NET_LOCK();
527 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
528 	if_attachsetup(ifp);
529 	NET_UNLOCK();
530 }
531 
532 void
533 if_attach(struct ifnet *ifp)
534 {
535 	if_attach_common(ifp);
536 	NET_LOCK();
537 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
538 	if_attachsetup(ifp);
539 	NET_UNLOCK();
540 }
541 
542 void
543 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
544 {
545 	struct ifqueue **map;
546 	struct ifqueue *ifq;
547 	int i;
548 
549 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
550 	KASSERT(nqs != 0);
551 
552 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
553 
554 	ifp->if_snd.ifq_softc = NULL;
555 	map[0] = &ifp->if_snd;
556 
557 	for (i = 1; i < nqs; i++) {
558 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
559 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
560 		ifq_init(ifq, ifp, i);
561 		map[i] = ifq;
562 	}
563 
564 	ifp->if_ifqs = map;
565 	ifp->if_nifqs = nqs;
566 }
567 
568 void
569 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
570 {
571 	struct ifiqueue **map;
572 	struct ifiqueue *ifiq;
573 	unsigned int i;
574 
575 	KASSERT(niqs != 0);
576 
577 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
578 
579 	ifp->if_rcv.ifiq_softc = NULL;
580 	map[0] = &ifp->if_rcv;
581 
582 	for (i = 1; i < niqs; i++) {
583 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
584 		ifiq_init(ifiq, ifp, i);
585 		map[i] = ifiq;
586 	}
587 
588 	ifp->if_iqs = map;
589 	ifp->if_niqs = niqs;
590 }
591 
592 void
593 if_attach_common(struct ifnet *ifp)
594 {
595 	KASSERT(ifp->if_ioctl != NULL);
596 
597 	TAILQ_INIT(&ifp->if_addrlist);
598 	TAILQ_INIT(&ifp->if_maddrlist);
599 
600 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
601 		KASSERTMSG(ifp->if_qstart == NULL,
602 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
603 		ifp->if_qstart = if_qstart_compat;
604 	} else {
605 		KASSERTMSG(ifp->if_start == NULL,
606 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
607 		KASSERTMSG(ifp->if_qstart != NULL,
608 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
609 	}
610 
611 	ifq_init(&ifp->if_snd, ifp, 0);
612 
613 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
614 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
615 	ifp->if_nifqs = 1;
616 	if (ifp->if_txmit == 0)
617 		ifp->if_txmit = IF_TXMIT_DEFAULT;
618 
619 	ifiq_init(&ifp->if_rcv, ifp, 0);
620 
621 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
622 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
623 	ifp->if_niqs = 1;
624 
625 	TAILQ_INIT(&ifp->if_addrhooks);
626 	TAILQ_INIT(&ifp->if_linkstatehooks);
627 	TAILQ_INIT(&ifp->if_detachhooks);
628 
629 	if (ifp->if_rtrequest == NULL)
630 		ifp->if_rtrequest = if_rtrequest_dummy;
631 	if (ifp->if_enqueue == NULL)
632 		ifp->if_enqueue = if_enqueue_ifq;
633 	ifp->if_llprio = IFQ_DEFPRIO;
634 
635 	SRPL_INIT(&ifp->if_inputs);
636 }
637 
638 void
639 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
640 {
641 	/*
642 	 * only switch the ifq_ops on the first ifq on an interface.
643 	 *
644 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
645 	 * works on a single ifq. because the code uses the ifq_ops
646 	 * on the first ifq (if_snd) to select a queue for an mbuf,
647 	 * by switching only the first one we change both the algorithm
648 	 * and force the routing of all new packets to it.
649 	 */
650 	ifq_attach(&ifp->if_snd, newops, args);
651 }
652 
653 void
654 if_start(struct ifnet *ifp)
655 {
656 	KASSERT(ifp->if_qstart == if_qstart_compat);
657 	if_qstart_compat(&ifp->if_snd);
658 }
659 void
660 if_qstart_compat(struct ifqueue *ifq)
661 {
662 	struct ifnet *ifp = ifq->ifq_if;
663 	int s;
664 
665 	/*
666 	 * the stack assumes that an interface can have multiple
667 	 * transmit rings, but a lot of drivers are still written
668 	 * so that interfaces and send rings have a 1:1 mapping.
669 	 * this provides compatability between the stack and the older
670 	 * drivers by translating from the only queue they have
671 	 * (ifp->if_snd) back to the interface and calling if_start.
672 	 */
673 
674 	KERNEL_LOCK();
675 	s = splnet();
676 	(*ifp->if_start)(ifp);
677 	splx(s);
678 	KERNEL_UNLOCK();
679 }
680 
681 int
682 if_enqueue(struct ifnet *ifp, struct mbuf *m)
683 {
684 #if NPF > 0
685 	if (m->m_pkthdr.pf.delay > 0)
686 		return (pf_delay_pkt(m, ifp->if_index));
687 #endif
688 
689 #if NBRIDGE > 0
690 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
691 		int error;
692 
693 		error = bridge_enqueue(ifp, m);
694 		return (error);
695 	}
696 #endif
697 
698 #if NPF > 0
699 	pf_pkt_addr_changed(m);
700 #endif	/* NPF > 0 */
701 
702 	return ((*ifp->if_enqueue)(ifp, m));
703 }
704 
705 int
706 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
707 {
708 	struct ifqueue *ifq = &ifp->if_snd;
709 	int error;
710 
711 	if (ifp->if_nifqs > 1) {
712 		unsigned int idx;
713 
714 		/*
715 		 * use the operations on the first ifq to pick which of
716 		 * the array gets this mbuf.
717 		 */
718 
719 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
720 		ifq = ifp->if_ifqs[idx];
721 	}
722 
723 	error = ifq_enqueue(ifq, m);
724 	if (error)
725 		return (error);
726 
727 	ifq_start(ifq);
728 
729 	return (0);
730 }
731 
732 void
733 if_input(struct ifnet *ifp, struct mbuf_list *ml)
734 {
735 	ifiq_input(&ifp->if_rcv, ml);
736 }
737 
738 int
739 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
740 {
741 #if NBPFILTER > 0
742 	/*
743 	 * Only send packets to bpf if they are destinated to local
744 	 * addresses.
745 	 *
746 	 * if_input_local() is also called for SIMPLEX interfaces to
747 	 * duplicate packets for local use.  But don't dup them to bpf.
748 	 */
749 	if (ifp->if_flags & IFF_LOOPBACK) {
750 		caddr_t if_bpf = ifp->if_bpf;
751 
752 		if (if_bpf)
753 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
754 	}
755 #endif
756 	m_resethdr(m);
757 	m->m_flags |= M_LOOP;
758 	m->m_pkthdr.ph_ifidx = ifp->if_index;
759 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
760 
761 	ifp->if_opackets++;
762 	ifp->if_obytes += m->m_pkthdr.len;
763 
764 	ifp->if_ipackets++;
765 	ifp->if_ibytes += m->m_pkthdr.len;
766 
767 	switch (af) {
768 	case AF_INET:
769 		ipv4_input(ifp, m);
770 		break;
771 #ifdef INET6
772 	case AF_INET6:
773 		ipv6_input(ifp, m);
774 		break;
775 #endif /* INET6 */
776 #ifdef MPLS
777 	case AF_MPLS:
778 		mpls_input(ifp, m);
779 		break;
780 #endif /* MPLS */
781 	default:
782 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
783 		m_freem(m);
784 		return (EAFNOSUPPORT);
785 	}
786 
787 	return (0);
788 }
789 
790 int
791 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
792 {
793 	struct ifiqueue *ifiq;
794 	unsigned int flow = 0;
795 
796 	m->m_pkthdr.ph_family = af;
797 	m->m_pkthdr.ph_ifidx = ifp->if_index;
798 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
799 
800 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
801 		flow = m->m_pkthdr.ph_flowid;
802 
803 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
804 
805 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
806 }
807 
808 struct ifih {
809 	SRPL_ENTRY(ifih)	  ifih_next;
810 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
811 				      void *);
812 	void			 *ifih_cookie;
813 	int			  ifih_refcnt;
814 	struct refcnt		  ifih_srpcnt;
815 };
816 
817 void	if_ih_ref(void *, void *);
818 void	if_ih_unref(void *, void *);
819 
820 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
821 
822 void
823 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
824     void *), void *cookie)
825 {
826 	struct ifih *ifih;
827 
828 	/* the kernel lock guarantees serialised modifications to if_inputs */
829 	KERNEL_ASSERT_LOCKED();
830 
831 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
832 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
833 			ifih->ifih_refcnt++;
834 			break;
835 		}
836 	}
837 
838 	if (ifih == NULL) {
839 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
840 
841 		ifih->ifih_input = input;
842 		ifih->ifih_cookie = cookie;
843 		ifih->ifih_refcnt = 1;
844 		refcnt_init(&ifih->ifih_srpcnt);
845 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
846 		    ifih, ifih_next);
847 	}
848 }
849 
850 void
851 if_ih_ref(void *null, void *i)
852 {
853 	struct ifih *ifih = i;
854 
855 	refcnt_take(&ifih->ifih_srpcnt);
856 }
857 
858 void
859 if_ih_unref(void *null, void *i)
860 {
861 	struct ifih *ifih = i;
862 
863 	refcnt_rele_wake(&ifih->ifih_srpcnt);
864 }
865 
866 void
867 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
868     void *), void *cookie)
869 {
870 	struct ifih *ifih;
871 
872 	/* the kernel lock guarantees serialised modifications to if_inputs */
873 	KERNEL_ASSERT_LOCKED();
874 
875 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
876 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
877 			break;
878 	}
879 
880 	KASSERT(ifih != NULL);
881 
882 	if (--ifih->ifih_refcnt == 0) {
883 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
884 		    ifih, ifih_next);
885 
886 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
887 		free(ifih, M_DEVBUF, sizeof(*ifih));
888 	}
889 }
890 
891 static void
892 if_ih_input(struct ifnet *ifp, struct mbuf *m)
893 {
894 	struct ifih *ifih;
895 	struct srp_ref sr;
896 
897 	/*
898 	 * Pass this mbuf to all input handlers of its
899 	 * interface until it is consumed.
900 	 */
901 	SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
902 		if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
903 			break;
904 	}
905 	SRPL_LEAVE(&sr);
906 
907 	if (ifih == NULL)
908 		m_freem(m);
909 }
910 
911 void
912 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
913 {
914 	struct mbuf *m;
915 
916 	if (ml_empty(ml))
917 		return;
918 
919 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
920 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
921 
922 	/*
923 	 * We grab the NET_LOCK() before processing any packet to
924 	 * ensure there's no contention on the routing table lock.
925 	 *
926 	 * Without it we could race with a userland thread to insert
927 	 * a L2 entry in ip{6,}_output().  Such race would result in
928 	 * one of the threads sleeping *inside* the IP output path.
929 	 *
930 	 * Since we have a NET_LOCK() we also use it to serialize access
931 	 * to PF globals, pipex globals, unicast and multicast addresses
932 	 * lists and the socket layer.
933 	 */
934 	NET_LOCK();
935 	while ((m = ml_dequeue(ml)) != NULL)
936 		if_ih_input(ifp, m);
937 	NET_UNLOCK();
938 }
939 
940 void
941 if_vinput(struct ifnet *ifp, struct mbuf *m)
942 {
943 #if NBPFILTER > 0
944 	caddr_t if_bpf;
945 #endif
946 
947 	m->m_pkthdr.ph_ifidx = ifp->if_index;
948 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
949 
950 	counters_pkt(ifp->if_counters,
951 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
952 
953 #if NBPFILTER > 0
954 	if_bpf = ifp->if_bpf;
955 	if (if_bpf) {
956 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
957 			m_freem(m);
958 			return;
959 		}
960 	}
961 #endif
962 
963 	if_ih_input(ifp, m);
964 }
965 
966 void
967 if_netisr(void *unused)
968 {
969 	int n, t = 0;
970 
971 	NET_LOCK();
972 
973 	while ((n = netisr) != 0) {
974 		/* Like sched_pause() but with a rwlock dance. */
975 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
976 			NET_UNLOCK();
977 			yield();
978 			NET_LOCK();
979 		}
980 
981 		atomic_clearbits_int(&netisr, n);
982 
983 #if NETHER > 0
984 		if (n & (1 << NETISR_ARP)) {
985 			KERNEL_LOCK();
986 			arpintr();
987 			KERNEL_UNLOCK();
988 		}
989 #endif
990 #if NPPP > 0
991 		if (n & (1 << NETISR_PPP)) {
992 			KERNEL_LOCK();
993 			pppintr();
994 			KERNEL_UNLOCK();
995 		}
996 #endif
997 #if NBRIDGE > 0
998 		if (n & (1 << NETISR_BRIDGE))
999 			bridgeintr();
1000 #endif
1001 #if NSWITCH > 0
1002 		if (n & (1 << NETISR_SWITCH)) {
1003 			KERNEL_LOCK();
1004 			switchintr();
1005 			KERNEL_UNLOCK();
1006 		}
1007 #endif
1008 #if NPPPOE > 0
1009 		if (n & (1 << NETISR_PPPOE)) {
1010 			KERNEL_LOCK();
1011 			pppoeintr();
1012 			KERNEL_UNLOCK();
1013 		}
1014 #endif
1015 #ifdef PIPEX
1016 		if (n & (1 << NETISR_PIPEX)) {
1017 			KERNEL_LOCK();
1018 			pipexintr();
1019 			KERNEL_UNLOCK();
1020 		}
1021 #endif
1022 		t |= n;
1023 	}
1024 
1025 #if NPFSYNC > 0
1026 	if (t & (1 << NETISR_PFSYNC)) {
1027 		KERNEL_LOCK();
1028 		pfsyncintr();
1029 		KERNEL_UNLOCK();
1030 	}
1031 #endif
1032 
1033 	NET_UNLOCK();
1034 }
1035 
1036 void
1037 if_hooks_run(struct task_list *hooks)
1038 {
1039 	struct task *t, *nt;
1040 	struct task cursor = { .t_func = NULL };
1041 	void (*func)(void *);
1042 	void *arg;
1043 
1044 	mtx_enter(&if_hooks_mtx);
1045 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1046 		if (t->t_func == NULL) { /* skip cursors */
1047 			nt = TAILQ_NEXT(t, t_entry);
1048 			continue;
1049 		}
1050 		func = t->t_func;
1051 		arg = t->t_arg;
1052 
1053 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1054 		mtx_leave(&if_hooks_mtx);
1055 
1056 		(*func)(arg);
1057 
1058 		mtx_enter(&if_hooks_mtx);
1059 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1060 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1061 	}
1062 	mtx_leave(&if_hooks_mtx);
1063 }
1064 
1065 void
1066 if_deactivate(struct ifnet *ifp)
1067 {
1068 	/*
1069 	 * Call detach hooks from head to tail.  To make sure detach
1070 	 * hooks are executed in the reverse order they were added, all
1071 	 * the hooks have to be added to the head!
1072 	 */
1073 
1074 	NET_LOCK();
1075 	if_hooks_run(&ifp->if_detachhooks);
1076 	NET_UNLOCK();
1077 }
1078 
1079 void
1080 if_detachhook_add(struct ifnet *ifp, struct task *t)
1081 {
1082 	mtx_enter(&if_hooks_mtx);
1083 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1084 	mtx_leave(&if_hooks_mtx);
1085 }
1086 
1087 void
1088 if_detachhook_del(struct ifnet *ifp, struct task *t)
1089 {
1090 	mtx_enter(&if_hooks_mtx);
1091 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1092 	mtx_leave(&if_hooks_mtx);
1093 }
1094 
1095 /*
1096  * Detach an interface from everything in the kernel.  Also deallocate
1097  * private resources.
1098  */
1099 void
1100 if_detach(struct ifnet *ifp)
1101 {
1102 	struct ifaddr *ifa;
1103 	struct ifg_list *ifg;
1104 	struct domain *dp;
1105 	int i, s;
1106 
1107 	/* Undo pseudo-driver changes. */
1108 	if_deactivate(ifp);
1109 
1110 	ifq_clr_oactive(&ifp->if_snd);
1111 
1112 	/* Other CPUs must not have a reference before we start destroying. */
1113 	if_idxmap_remove(ifp);
1114 
1115 #if NBPFILTER > 0
1116 	bpfdetach(ifp);
1117 #endif
1118 
1119 	NET_LOCK();
1120 	s = splnet();
1121 	ifp->if_qstart = if_detached_qstart;
1122 	ifp->if_ioctl = if_detached_ioctl;
1123 	ifp->if_watchdog = NULL;
1124 
1125 	/* Remove the watchdog timeout & task */
1126 	timeout_del(&ifp->if_slowtimo);
1127 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1128 
1129 	/* Remove the link state task */
1130 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1131 
1132 	rti_delete(ifp);
1133 #if NETHER > 0 && defined(NFSCLIENT)
1134 	if (ifp->if_index == revarp_ifidx)
1135 		revarp_ifidx = 0;
1136 #endif
1137 #ifdef MROUTING
1138 	vif_delete(ifp);
1139 #endif
1140 	in_ifdetach(ifp);
1141 #ifdef INET6
1142 	in6_ifdetach(ifp);
1143 #endif
1144 #if NPF > 0
1145 	pfi_detach_ifnet(ifp);
1146 #endif
1147 
1148 	/* Remove the interface from the list of all interfaces.  */
1149 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1150 
1151 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1152 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1153 
1154 	if_free_sadl(ifp);
1155 
1156 	/* We should not have any address left at this point. */
1157 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1158 #ifdef DIAGNOSTIC
1159 		printf("%s: address list non empty\n", ifp->if_xname);
1160 #endif
1161 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1162 			ifa_del(ifp, ifa);
1163 			ifa->ifa_ifp = NULL;
1164 			ifafree(ifa);
1165 		}
1166 	}
1167 
1168 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1169 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1170 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1171 
1172 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1173 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1174 			(*dp->dom_ifdetach)(ifp,
1175 			    ifp->if_afdata[dp->dom_family]);
1176 	}
1177 
1178 	/* Announce that the interface is gone. */
1179 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1180 	splx(s);
1181 	NET_UNLOCK();
1182 
1183 	if (ifp->if_counters != NULL)
1184 		if_counters_free(ifp);
1185 
1186 	for (i = 0; i < ifp->if_nifqs; i++)
1187 		ifq_destroy(ifp->if_ifqs[i]);
1188 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1189 		for (i = 1; i < ifp->if_nifqs; i++) {
1190 			free(ifp->if_ifqs[i], M_DEVBUF,
1191 			    sizeof(struct ifqueue));
1192 		}
1193 		free(ifp->if_ifqs, M_DEVBUF,
1194 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1195 	}
1196 
1197 	for (i = 0; i < ifp->if_niqs; i++)
1198 		ifiq_destroy(ifp->if_iqs[i]);
1199 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1200 		for (i = 1; i < ifp->if_niqs; i++) {
1201 			free(ifp->if_iqs[i], M_DEVBUF,
1202 			    sizeof(struct ifiqueue));
1203 		}
1204 		free(ifp->if_iqs, M_DEVBUF,
1205 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1206 	}
1207 }
1208 
1209 /*
1210  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1211  */
1212 int
1213 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1214 {
1215 	struct ifnet *ifp;
1216 	int connected = 0;
1217 
1218 	ifp = if_get(ifidx);
1219 	if (ifp == NULL)
1220 		return (0);
1221 
1222 	if (ifp0->if_index == ifp->if_index)
1223 		connected = 1;
1224 
1225 #if NBRIDGE > 0
1226 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1227 		connected = 1;
1228 #endif
1229 #if NCARP > 0
1230 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1231 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1232 		connected = 1;
1233 #endif
1234 
1235 	if_put(ifp);
1236 	return (connected);
1237 }
1238 
1239 /*
1240  * Create a clone network interface.
1241  */
1242 int
1243 if_clone_create(const char *name, int rdomain)
1244 {
1245 	struct if_clone *ifc;
1246 	struct ifnet *ifp;
1247 	int unit, ret;
1248 
1249 	ifc = if_clone_lookup(name, &unit);
1250 	if (ifc == NULL)
1251 		return (EINVAL);
1252 
1253 	if (ifunit(name) != NULL)
1254 		return (EEXIST);
1255 
1256 	ret = (*ifc->ifc_create)(ifc, unit);
1257 
1258 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1259 		return (ret);
1260 
1261 	NET_LOCK();
1262 	if_addgroup(ifp, ifc->ifc_name);
1263 	if (rdomain != 0)
1264 		if_setrdomain(ifp, rdomain);
1265 	NET_UNLOCK();
1266 
1267 	return (ret);
1268 }
1269 
1270 /*
1271  * Destroy a clone network interface.
1272  */
1273 int
1274 if_clone_destroy(const char *name)
1275 {
1276 	struct if_clone *ifc;
1277 	struct ifnet *ifp;
1278 	int ret;
1279 
1280 	ifc = if_clone_lookup(name, NULL);
1281 	if (ifc == NULL)
1282 		return (EINVAL);
1283 
1284 	ifp = ifunit(name);
1285 	if (ifp == NULL)
1286 		return (ENXIO);
1287 
1288 	if (ifc->ifc_destroy == NULL)
1289 		return (EOPNOTSUPP);
1290 
1291 	NET_LOCK();
1292 	if (ifp->if_flags & IFF_UP) {
1293 		int s;
1294 		s = splnet();
1295 		if_down(ifp);
1296 		splx(s);
1297 	}
1298 	NET_UNLOCK();
1299 	ret = (*ifc->ifc_destroy)(ifp);
1300 
1301 	return (ret);
1302 }
1303 
1304 /*
1305  * Look up a network interface cloner.
1306  */
1307 struct if_clone *
1308 if_clone_lookup(const char *name, int *unitp)
1309 {
1310 	struct if_clone *ifc;
1311 	const char *cp;
1312 	int unit;
1313 
1314 	/* separate interface name from unit */
1315 	for (cp = name;
1316 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1317 	    cp++)
1318 		continue;
1319 
1320 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1321 		return (NULL);	/* No name or unit number */
1322 
1323 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1324 		return (NULL);	/* unit number 0 padded */
1325 
1326 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1327 		if (strlen(ifc->ifc_name) == cp - name &&
1328 		    !strncmp(name, ifc->ifc_name, cp - name))
1329 			break;
1330 	}
1331 
1332 	if (ifc == NULL)
1333 		return (NULL);
1334 
1335 	unit = 0;
1336 	while (cp - name < IFNAMSIZ && *cp) {
1337 		if (*cp < '0' || *cp > '9' ||
1338 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1339 			/* Bogus unit number. */
1340 			return (NULL);
1341 		}
1342 		unit = (unit * 10) + (*cp++ - '0');
1343 	}
1344 
1345 	if (unitp != NULL)
1346 		*unitp = unit;
1347 	return (ifc);
1348 }
1349 
1350 /*
1351  * Register a network interface cloner.
1352  */
1353 void
1354 if_clone_attach(struct if_clone *ifc)
1355 {
1356 	/*
1357 	 * we are called at kernel boot by main(), when pseudo devices are
1358 	 * being attached. The main() is the only guy which may alter the
1359 	 * if_cloners. While system is running and main() is done with
1360 	 * initialization, the if_cloners becomes immutable.
1361 	 */
1362 	KASSERT(pdevinit_done == 0);
1363 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1364 	if_cloners_count++;
1365 }
1366 
1367 /*
1368  * Provide list of interface cloners to userspace.
1369  */
1370 int
1371 if_clone_list(struct if_clonereq *ifcr)
1372 {
1373 	char outbuf[IFNAMSIZ], *dst;
1374 	struct if_clone *ifc;
1375 	int count, error = 0;
1376 
1377 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1378 		/* Just asking how many there are. */
1379 		ifcr->ifcr_total = if_cloners_count;
1380 		return (0);
1381 	}
1382 
1383 	if (ifcr->ifcr_count < 0)
1384 		return (EINVAL);
1385 
1386 	ifcr->ifcr_total = if_cloners_count;
1387 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1388 
1389 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1390 		if (count == 0)
1391 			break;
1392 		bzero(outbuf, sizeof outbuf);
1393 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1394 		error = copyout(outbuf, dst, IFNAMSIZ);
1395 		if (error)
1396 			break;
1397 		count--;
1398 		dst += IFNAMSIZ;
1399 	}
1400 
1401 	return (error);
1402 }
1403 
1404 /*
1405  * set queue congestion marker
1406  */
1407 void
1408 if_congestion(void)
1409 {
1410 	extern int ticks;
1411 
1412 	ifq_congestion = ticks;
1413 }
1414 
1415 int
1416 if_congested(void)
1417 {
1418 	extern int ticks;
1419 	int diff;
1420 
1421 	diff = ticks - ifq_congestion;
1422 	if (diff < 0) {
1423 		ifq_congestion = ticks - hz;
1424 		return (0);
1425 	}
1426 
1427 	return (diff <= (hz / 100));
1428 }
1429 
1430 #define	equal(a1, a2)	\
1431 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1432 	(a1)->sa_len) == 0)
1433 
1434 /*
1435  * Locate an interface based on a complete address.
1436  */
1437 struct ifaddr *
1438 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1439 {
1440 	struct ifnet *ifp;
1441 	struct ifaddr *ifa;
1442 	u_int rdomain;
1443 
1444 	rdomain = rtable_l2(rtableid);
1445 	KERNEL_LOCK();
1446 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1447 		if (ifp->if_rdomain != rdomain)
1448 			continue;
1449 
1450 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1451 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1452 				continue;
1453 
1454 			if (equal(addr, ifa->ifa_addr)) {
1455 				KERNEL_UNLOCK();
1456 				return (ifa);
1457 			}
1458 		}
1459 	}
1460 	KERNEL_UNLOCK();
1461 	return (NULL);
1462 }
1463 
1464 /*
1465  * Locate the point to point interface with a given destination address.
1466  */
1467 struct ifaddr *
1468 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1469 {
1470 	struct ifnet *ifp;
1471 	struct ifaddr *ifa;
1472 
1473 	rdomain = rtable_l2(rdomain);
1474 	KERNEL_LOCK();
1475 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1476 		if (ifp->if_rdomain != rdomain)
1477 			continue;
1478 		if (ifp->if_flags & IFF_POINTOPOINT) {
1479 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1480 				if (ifa->ifa_addr->sa_family !=
1481 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1482 					continue;
1483 				if (equal(addr, ifa->ifa_dstaddr)) {
1484 					KERNEL_UNLOCK();
1485 					return (ifa);
1486 				}
1487 			}
1488 		}
1489 	}
1490 	KERNEL_UNLOCK();
1491 	return (NULL);
1492 }
1493 
1494 /*
1495  * Find an interface address specific to an interface best matching
1496  * a given address.
1497  */
1498 struct ifaddr *
1499 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1500 {
1501 	struct ifaddr *ifa;
1502 	char *cp, *cp2, *cp3;
1503 	char *cplim;
1504 	struct ifaddr *ifa_maybe = NULL;
1505 	u_int af = addr->sa_family;
1506 
1507 	if (af >= AF_MAX)
1508 		return (NULL);
1509 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1510 		if (ifa->ifa_addr->sa_family != af)
1511 			continue;
1512 		if (ifa_maybe == NULL)
1513 			ifa_maybe = ifa;
1514 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1515 			if (equal(addr, ifa->ifa_addr) ||
1516 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1517 				return (ifa);
1518 			continue;
1519 		}
1520 		cp = addr->sa_data;
1521 		cp2 = ifa->ifa_addr->sa_data;
1522 		cp3 = ifa->ifa_netmask->sa_data;
1523 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1524 		for (; cp3 < cplim; cp3++)
1525 			if ((*cp++ ^ *cp2++) & *cp3)
1526 				break;
1527 		if (cp3 == cplim)
1528 			return (ifa);
1529 	}
1530 	return (ifa_maybe);
1531 }
1532 
1533 void
1534 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1535 {
1536 }
1537 
1538 /*
1539  * Default action when installing a local route on a point-to-point
1540  * interface.
1541  */
1542 void
1543 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1544 {
1545 	struct ifnet *lo0ifp;
1546 	struct ifaddr *ifa, *lo0ifa;
1547 
1548 	switch (req) {
1549 	case RTM_ADD:
1550 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1551 			break;
1552 
1553 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1554 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1555 			    rt_key(rt)->sa_len) == 0)
1556 				break;
1557 		}
1558 
1559 		if (ifa == NULL)
1560 			break;
1561 
1562 		KASSERT(ifa == rt->rt_ifa);
1563 
1564 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1565 		KASSERT(lo0ifp != NULL);
1566 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1567 			if (lo0ifa->ifa_addr->sa_family ==
1568 			    ifa->ifa_addr->sa_family)
1569 				break;
1570 		}
1571 		if_put(lo0ifp);
1572 
1573 		if (lo0ifa == NULL)
1574 			break;
1575 
1576 		rt->rt_flags &= ~RTF_LLINFO;
1577 		break;
1578 	case RTM_DELETE:
1579 	case RTM_RESOLVE:
1580 	default:
1581 		break;
1582 	}
1583 }
1584 
1585 
1586 /*
1587  * Bring down all interfaces
1588  */
1589 void
1590 if_downall(void)
1591 {
1592 	struct ifreq ifrq;	/* XXX only partly built */
1593 	struct ifnet *ifp;
1594 
1595 	NET_LOCK();
1596 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1597 		if ((ifp->if_flags & IFF_UP) == 0)
1598 			continue;
1599 		if_down(ifp);
1600 		ifrq.ifr_flags = ifp->if_flags;
1601 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1602 	}
1603 	NET_UNLOCK();
1604 }
1605 
1606 /*
1607  * Mark an interface down and notify protocols of
1608  * the transition.
1609  */
1610 void
1611 if_down(struct ifnet *ifp)
1612 {
1613 	NET_ASSERT_LOCKED();
1614 
1615 	ifp->if_flags &= ~IFF_UP;
1616 	getmicrotime(&ifp->if_lastchange);
1617 	ifq_purge(&ifp->if_snd);
1618 
1619 	if_linkstate(ifp);
1620 }
1621 
1622 /*
1623  * Mark an interface up and notify protocols of
1624  * the transition.
1625  */
1626 void
1627 if_up(struct ifnet *ifp)
1628 {
1629 	NET_ASSERT_LOCKED();
1630 
1631 	ifp->if_flags |= IFF_UP;
1632 	getmicrotime(&ifp->if_lastchange);
1633 
1634 #ifdef INET6
1635 	/* Userland expects the kernel to set ::1 on default lo(4). */
1636 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1637 		in6_ifattach(ifp);
1638 #endif
1639 
1640 	if_linkstate(ifp);
1641 }
1642 
1643 /*
1644  * Notify userland, the routing table and hooks owner of
1645  * a link-state transition.
1646  */
1647 void
1648 if_linkstate_task(void *xifidx)
1649 {
1650 	unsigned int ifidx = (unsigned long)xifidx;
1651 	struct ifnet *ifp;
1652 
1653 	KERNEL_LOCK();
1654 	NET_LOCK();
1655 
1656 	ifp = if_get(ifidx);
1657 	if (ifp != NULL)
1658 		if_linkstate(ifp);
1659 	if_put(ifp);
1660 
1661 	NET_UNLOCK();
1662 	KERNEL_UNLOCK();
1663 }
1664 
1665 void
1666 if_linkstate(struct ifnet *ifp)
1667 {
1668 	NET_ASSERT_LOCKED();
1669 
1670 	rtm_ifchg(ifp);
1671 	rt_if_track(ifp);
1672 
1673 	if_hooks_run(&ifp->if_linkstatehooks);
1674 }
1675 
1676 void
1677 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1678 {
1679 	mtx_enter(&if_hooks_mtx);
1680 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1681 	mtx_leave(&if_hooks_mtx);
1682 }
1683 
1684 void
1685 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1686 {
1687 	mtx_enter(&if_hooks_mtx);
1688 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1689 	mtx_leave(&if_hooks_mtx);
1690 }
1691 
1692 /*
1693  * Schedule a link state change task.
1694  */
1695 void
1696 if_link_state_change(struct ifnet *ifp)
1697 {
1698 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1699 }
1700 
1701 /*
1702  * Handle interface watchdog timer routine.  Called
1703  * from softclock, we decrement timer (if set) and
1704  * call the appropriate interface routine on expiration.
1705  */
1706 void
1707 if_slowtimo(void *arg)
1708 {
1709 	struct ifnet *ifp = arg;
1710 	int s = splnet();
1711 
1712 	if (ifp->if_watchdog) {
1713 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1714 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1715 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1716 	}
1717 	splx(s);
1718 }
1719 
1720 void
1721 if_watchdog_task(void *xifidx)
1722 {
1723 	unsigned int ifidx = (unsigned long)xifidx;
1724 	struct ifnet *ifp;
1725 	int s;
1726 
1727 	ifp = if_get(ifidx);
1728 	if (ifp == NULL)
1729 		return;
1730 
1731 	KERNEL_LOCK();
1732 	s = splnet();
1733 	if (ifp->if_watchdog)
1734 		(*ifp->if_watchdog)(ifp);
1735 	splx(s);
1736 	KERNEL_UNLOCK();
1737 
1738 	if_put(ifp);
1739 }
1740 
1741 /*
1742  * Map interface name to interface structure pointer.
1743  */
1744 struct ifnet *
1745 ifunit(const char *name)
1746 {
1747 	struct ifnet *ifp;
1748 
1749 	KERNEL_ASSERT_LOCKED();
1750 
1751 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1752 		if (strcmp(ifp->if_xname, name) == 0)
1753 			return (ifp);
1754 	}
1755 	return (NULL);
1756 }
1757 
1758 /*
1759  * Map interface index to interface structure pointer.
1760  */
1761 struct ifnet *
1762 if_get(unsigned int index)
1763 {
1764 	struct srp_ref sr;
1765 	struct if_map *if_map;
1766 	struct srp *map;
1767 	struct ifnet *ifp = NULL;
1768 
1769 	if_map = srp_enter(&sr, &if_idxmap.map);
1770 	if (index < if_map->limit) {
1771 		map = (struct srp *)(if_map + 1);
1772 
1773 		ifp = srp_follow(&sr, &map[index]);
1774 		if (ifp != NULL) {
1775 			KASSERT(ifp->if_index == index);
1776 			if_ref(ifp);
1777 		}
1778 	}
1779 	srp_leave(&sr);
1780 
1781 	return (ifp);
1782 }
1783 
1784 struct ifnet *
1785 if_ref(struct ifnet *ifp)
1786 {
1787 	refcnt_take(&ifp->if_refcnt);
1788 
1789 	return (ifp);
1790 }
1791 
1792 void
1793 if_put(struct ifnet *ifp)
1794 {
1795 	if (ifp == NULL)
1796 		return;
1797 
1798 	refcnt_rele_wake(&ifp->if_refcnt);
1799 }
1800 
1801 int
1802 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1803 {
1804 	if (ifp->if_sadl == NULL)
1805 		return (EINVAL);
1806 
1807 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1808 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1809 
1810 	return (0);
1811 }
1812 
1813 int
1814 if_createrdomain(int rdomain, struct ifnet *ifp)
1815 {
1816 	int error;
1817 	struct ifnet *loifp;
1818 	char loifname[IFNAMSIZ];
1819 	unsigned int unit = rdomain;
1820 
1821 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1822 		return (error);
1823 	if (!rtable_empty(rdomain))
1824 		return (EEXIST);
1825 
1826 	/* Create rdomain including its loopback if with unit == rdomain */
1827 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1828 	error = if_clone_create(loifname, 0);
1829 	if ((loifp = ifunit(loifname)) == NULL)
1830 		return (ENXIO);
1831 	if (error && (ifp != loifp || error != EEXIST))
1832 		return (error);
1833 
1834 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1835 	loifp->if_rdomain = rdomain;
1836 
1837 	return (0);
1838 }
1839 
1840 int
1841 if_setrdomain(struct ifnet *ifp, int rdomain)
1842 {
1843 	struct ifreq ifr;
1844 	int error, up = 0, s;
1845 
1846 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1847 		return (EINVAL);
1848 
1849 	if (rdomain != ifp->if_rdomain &&
1850 	    (ifp->if_flags & IFF_LOOPBACK) &&
1851 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1852 		return (EPERM);
1853 
1854 	if (!rtable_exists(rdomain))
1855 		return (ESRCH);
1856 
1857 	/* make sure that the routing table is a real rdomain */
1858 	if (rdomain != rtable_l2(rdomain))
1859 		return (EINVAL);
1860 
1861 	if (rdomain != ifp->if_rdomain) {
1862 		s = splnet();
1863 		/*
1864 		 * We are tearing down the world.
1865 		 * Take down the IF so:
1866 		 * 1. everything that cares gets a message
1867 		 * 2. the automagic IPv6 bits are recreated
1868 		 */
1869 		if (ifp->if_flags & IFF_UP) {
1870 			up = 1;
1871 			if_down(ifp);
1872 		}
1873 		rti_delete(ifp);
1874 #ifdef MROUTING
1875 		vif_delete(ifp);
1876 #endif
1877 		in_ifdetach(ifp);
1878 #ifdef INET6
1879 		in6_ifdetach(ifp);
1880 #endif
1881 		splx(s);
1882 	}
1883 
1884 	/* Let devices like enc(4) or mpe(4) know about the change */
1885 	ifr.ifr_rdomainid = rdomain;
1886 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1887 	    (caddr_t)&ifr)) != ENOTTY)
1888 		return (error);
1889 	error = 0;
1890 
1891 	/* Add interface to the specified rdomain */
1892 	ifp->if_rdomain = rdomain;
1893 
1894 	/* If we took down the IF, bring it back */
1895 	if (up) {
1896 		s = splnet();
1897 		if_up(ifp);
1898 		splx(s);
1899 	}
1900 
1901 	return (0);
1902 }
1903 
1904 /*
1905  * Interface ioctls.
1906  */
1907 int
1908 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1909 {
1910 	struct ifnet *ifp;
1911 	struct ifreq *ifr = (struct ifreq *)data;
1912 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1913 	struct if_afreq *ifar = (struct if_afreq *)data;
1914 	char ifdescrbuf[IFDESCRSIZE];
1915 	char ifrtlabelbuf[RTLABEL_LEN];
1916 	int s, error = 0, oif_xflags;
1917 	size_t bytesdone;
1918 	unsigned short oif_flags;
1919 
1920 	switch (cmd) {
1921 	case SIOCIFCREATE:
1922 		if ((error = suser(p)) != 0)
1923 			return (error);
1924 		error = if_clone_create(ifr->ifr_name, 0);
1925 		return (error);
1926 	case SIOCIFDESTROY:
1927 		if ((error = suser(p)) != 0)
1928 			return (error);
1929 		error = if_clone_destroy(ifr->ifr_name);
1930 		return (error);
1931 	case SIOCSIFGATTR:
1932 		if ((error = suser(p)) != 0)
1933 			return (error);
1934 		NET_LOCK();
1935 		error = if_setgroupattribs(data);
1936 		NET_UNLOCK();
1937 		return (error);
1938 	case SIOCGIFCONF:
1939 	case SIOCIFGCLONERS:
1940 	case SIOCGIFGMEMB:
1941 	case SIOCGIFGATTR:
1942 	case SIOCGIFGLIST:
1943 	case SIOCGIFFLAGS:
1944 	case SIOCGIFXFLAGS:
1945 	case SIOCGIFMETRIC:
1946 	case SIOCGIFMTU:
1947 	case SIOCGIFHARDMTU:
1948 	case SIOCGIFDATA:
1949 	case SIOCGIFDESCR:
1950 	case SIOCGIFRTLABEL:
1951 	case SIOCGIFPRIORITY:
1952 	case SIOCGIFRDOMAIN:
1953 	case SIOCGIFGROUP:
1954 	case SIOCGIFLLPRIO:
1955 		return (ifioctl_get(cmd, data));
1956 	}
1957 
1958 	ifp = ifunit(ifr->ifr_name);
1959 	if (ifp == NULL)
1960 		return (ENXIO);
1961 	oif_flags = ifp->if_flags;
1962 	oif_xflags = ifp->if_xflags;
1963 
1964 	switch (cmd) {
1965 	case SIOCIFAFATTACH:
1966 	case SIOCIFAFDETACH:
1967 		if ((error = suser(p)) != 0)
1968 			break;
1969 		NET_LOCK();
1970 		switch (ifar->ifar_af) {
1971 		case AF_INET:
1972 			/* attach is a noop for AF_INET */
1973 			if (cmd == SIOCIFAFDETACH)
1974 				in_ifdetach(ifp);
1975 			break;
1976 #ifdef INET6
1977 		case AF_INET6:
1978 			if (cmd == SIOCIFAFATTACH)
1979 				error = in6_ifattach(ifp);
1980 			else
1981 				in6_ifdetach(ifp);
1982 			break;
1983 #endif /* INET6 */
1984 		default:
1985 			error = EAFNOSUPPORT;
1986 		}
1987 		NET_UNLOCK();
1988 		break;
1989 
1990 	case SIOCSIFFLAGS:
1991 		if ((error = suser(p)) != 0)
1992 			break;
1993 
1994 		NET_LOCK();
1995 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1996 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1997 
1998 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1999 		if (error != 0) {
2000 			ifp->if_flags = oif_flags;
2001 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2002 			s = splnet();
2003 			if (ISSET(ifp->if_flags, IFF_UP))
2004 				if_up(ifp);
2005 			else
2006 				if_down(ifp);
2007 			splx(s);
2008 		}
2009 		NET_UNLOCK();
2010 		break;
2011 
2012 	case SIOCSIFXFLAGS:
2013 		if ((error = suser(p)) != 0)
2014 			break;
2015 
2016 		NET_LOCK();
2017 #ifdef INET6
2018 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
2019 			error = in6_ifattach(ifp);
2020 			if (error != 0) {
2021 				NET_UNLOCK();
2022 				break;
2023 			}
2024 		}
2025 
2026 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2027 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2028 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2029 
2030 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2031 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2032 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2033 
2034 #endif	/* INET6 */
2035 
2036 #ifdef MPLS
2037 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2038 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2039 			s = splnet();
2040 			ifp->if_xflags |= IFXF_MPLS;
2041 			ifp->if_ll_output = ifp->if_output;
2042 			ifp->if_output = mpls_output;
2043 			splx(s);
2044 		}
2045 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2046 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2047 			s = splnet();
2048 			ifp->if_xflags &= ~IFXF_MPLS;
2049 			ifp->if_output = ifp->if_ll_output;
2050 			ifp->if_ll_output = NULL;
2051 			splx(s);
2052 		}
2053 #endif	/* MPLS */
2054 
2055 #ifndef SMALL_KERNEL
2056 		if (ifp->if_capabilities & IFCAP_WOL) {
2057 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2058 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2059 				s = splnet();
2060 				ifp->if_xflags |= IFXF_WOL;
2061 				error = ifp->if_wol(ifp, 1);
2062 				splx(s);
2063 			}
2064 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2065 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2066 				s = splnet();
2067 				ifp->if_xflags &= ~IFXF_WOL;
2068 				error = ifp->if_wol(ifp, 0);
2069 				splx(s);
2070 			}
2071 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2072 			ifr->ifr_flags &= ~IFXF_WOL;
2073 			error = ENOTSUP;
2074 		}
2075 #endif
2076 
2077 		if (error == 0)
2078 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2079 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2080 		NET_UNLOCK();
2081 		break;
2082 
2083 	case SIOCSIFMETRIC:
2084 		if ((error = suser(p)) != 0)
2085 			break;
2086 		NET_LOCK();
2087 		ifp->if_metric = ifr->ifr_metric;
2088 		NET_UNLOCK();
2089 		break;
2090 
2091 	case SIOCSIFMTU:
2092 		if ((error = suser(p)) != 0)
2093 			break;
2094 		NET_LOCK();
2095 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2096 		NET_UNLOCK();
2097 		if (!error)
2098 			rtm_ifchg(ifp);
2099 		break;
2100 
2101 	case SIOCSIFDESCR:
2102 		if ((error = suser(p)) != 0)
2103 			break;
2104 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2105 		    IFDESCRSIZE, &bytesdone);
2106 		if (error == 0) {
2107 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2108 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2109 		}
2110 		break;
2111 
2112 	case SIOCSIFRTLABEL:
2113 		if ((error = suser(p)) != 0)
2114 			break;
2115 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2116 		    RTLABEL_LEN, &bytesdone);
2117 		if (error == 0) {
2118 			rtlabel_unref(ifp->if_rtlabelid);
2119 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2120 		}
2121 		break;
2122 
2123 	case SIOCSIFPRIORITY:
2124 		if ((error = suser(p)) != 0)
2125 			break;
2126 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2127 			error = EINVAL;
2128 			break;
2129 		}
2130 		ifp->if_priority = ifr->ifr_metric;
2131 		break;
2132 
2133 	case SIOCSIFRDOMAIN:
2134 		if ((error = suser(p)) != 0)
2135 			break;
2136 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2137 		if (!error || error == EEXIST) {
2138 			NET_LOCK();
2139 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2140 			NET_UNLOCK();
2141 		}
2142 		break;
2143 
2144 	case SIOCAIFGROUP:
2145 		if ((error = suser(p)))
2146 			break;
2147 		NET_LOCK();
2148 		error = if_addgroup(ifp, ifgr->ifgr_group);
2149 		if (error == 0) {
2150 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2151 			if (error == ENOTTY)
2152 				error = 0;
2153 		}
2154 		NET_UNLOCK();
2155 		break;
2156 
2157 	case SIOCDIFGROUP:
2158 		if ((error = suser(p)))
2159 			break;
2160 		NET_LOCK();
2161 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2162 		if (error == ENOTTY)
2163 			error = 0;
2164 		if (error == 0)
2165 			error = if_delgroup(ifp, ifgr->ifgr_group);
2166 		NET_UNLOCK();
2167 		break;
2168 
2169 	case SIOCSIFLLADDR:
2170 		if ((error = suser(p)))
2171 			break;
2172 		if ((ifp->if_sadl == NULL) ||
2173 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2174 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2175 			error = EINVAL;
2176 			break;
2177 		}
2178 		NET_LOCK();
2179 		switch (ifp->if_type) {
2180 		case IFT_ETHER:
2181 		case IFT_CARP:
2182 		case IFT_XETHER:
2183 		case IFT_ISO88025:
2184 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2185 			if (error == ENOTTY)
2186 				error = 0;
2187 			if (error == 0)
2188 				error = if_setlladdr(ifp,
2189 				    ifr->ifr_addr.sa_data);
2190 			break;
2191 		default:
2192 			error = ENODEV;
2193 		}
2194 
2195 		if (error == 0)
2196 			ifnewlladdr(ifp);
2197 		NET_UNLOCK();
2198 		break;
2199 
2200 	case SIOCSIFLLPRIO:
2201 		if ((error = suser(p)))
2202 			break;
2203 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2204 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2205 			error = EINVAL;
2206 			break;
2207 		}
2208 		NET_LOCK();
2209 		ifp->if_llprio = ifr->ifr_llprio;
2210 		NET_UNLOCK();
2211 		break;
2212 
2213 	case SIOCGIFSFFPAGE:
2214 		error = suser(p);
2215 		if (error != 0)
2216 			break;
2217 
2218 		error = if_sffpage_check(data);
2219 		if (error != 0)
2220 			break;
2221 
2222 		/* don't take NET_LOCK because i2c reads take a long time */
2223 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2224 		break;
2225 
2226 	case SIOCSETKALIVE:
2227 	case SIOCDIFPHYADDR:
2228 	case SIOCSLIFPHYADDR:
2229 	case SIOCSLIFPHYRTABLE:
2230 	case SIOCSLIFPHYTTL:
2231 	case SIOCSLIFPHYDF:
2232 	case SIOCSLIFPHYECN:
2233 	case SIOCADDMULTI:
2234 	case SIOCDELMULTI:
2235 	case SIOCSIFMEDIA:
2236 	case SIOCSVNETID:
2237 	case SIOCDVNETID:
2238 	case SIOCSVNETFLOWID:
2239 	case SIOCSTXHPRIO:
2240 	case SIOCSRXHPRIO:
2241 	case SIOCSIFPAIR:
2242 	case SIOCSIFPARENT:
2243 	case SIOCDIFPARENT:
2244 	case SIOCSETMPWCFG:
2245 	case SIOCSETLABEL:
2246 	case SIOCDELLABEL:
2247 	case SIOCSPWE3CTRLWORD:
2248 	case SIOCSPWE3FAT:
2249 	case SIOCSPWE3NEIGHBOR:
2250 	case SIOCDPWE3NEIGHBOR:
2251 #if NBRIDGE > 0
2252 	case SIOCBRDGADD:
2253 	case SIOCBRDGDEL:
2254 	case SIOCBRDGSIFFLGS:
2255 	case SIOCBRDGSCACHE:
2256 	case SIOCBRDGADDS:
2257 	case SIOCBRDGDELS:
2258 	case SIOCBRDGSADDR:
2259 	case SIOCBRDGSTO:
2260 	case SIOCBRDGDADDR:
2261 	case SIOCBRDGFLUSH:
2262 	case SIOCBRDGADDL:
2263 	case SIOCBRDGSIFPROT:
2264 	case SIOCBRDGARL:
2265 	case SIOCBRDGFRL:
2266 	case SIOCBRDGSPRI:
2267 	case SIOCBRDGSHT:
2268 	case SIOCBRDGSFD:
2269 	case SIOCBRDGSMA:
2270 	case SIOCBRDGSIFPRIO:
2271 	case SIOCBRDGSIFCOST:
2272 	case SIOCBRDGSTXHC:
2273 	case SIOCBRDGSPROTO:
2274 	case SIOCSWGDPID:
2275 	case SIOCSWSPORTNO:
2276 	case SIOCSWGMAXFLOW:
2277 #endif
2278 		if ((error = suser(p)) != 0)
2279 			break;
2280 		/* FALLTHROUGH */
2281 	default:
2282 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2283 			(struct mbuf *) cmd, (struct mbuf *) data,
2284 			(struct mbuf *) ifp, p));
2285 		if (error != EOPNOTSUPP)
2286 			break;
2287 		switch (cmd) {
2288 		case SIOCAIFADDR:
2289 		case SIOCDIFADDR:
2290 		case SIOCSIFADDR:
2291 		case SIOCSIFNETMASK:
2292 		case SIOCSIFDSTADDR:
2293 		case SIOCSIFBRDADDR:
2294 #ifdef INET6
2295 		case SIOCAIFADDR_IN6:
2296 		case SIOCDIFADDR_IN6:
2297 #endif
2298 			error = suser(p);
2299 			break;
2300 		default:
2301 			error = 0;
2302 			break;
2303 		}
2304 		if (error)
2305 			break;
2306 		NET_LOCK();
2307 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2308 		NET_UNLOCK();
2309 		break;
2310 	}
2311 
2312 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2313 		rtm_ifchg(ifp);
2314 
2315 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2316 		getmicrotime(&ifp->if_lastchange);
2317 
2318 	return (error);
2319 }
2320 
2321 int
2322 ifioctl_get(u_long cmd, caddr_t data)
2323 {
2324 	struct ifnet *ifp;
2325 	struct ifreq *ifr = (struct ifreq *)data;
2326 	char ifdescrbuf[IFDESCRSIZE];
2327 	char ifrtlabelbuf[RTLABEL_LEN];
2328 	int error = 0;
2329 	size_t bytesdone;
2330 	const char *label;
2331 
2332 	switch(cmd) {
2333 	case SIOCGIFCONF:
2334 		NET_RLOCK_IN_IOCTL();
2335 		error = ifconf(data);
2336 		NET_RUNLOCK_IN_IOCTL();
2337 		return (error);
2338 	case SIOCIFGCLONERS:
2339 		error = if_clone_list((struct if_clonereq *)data);
2340 		return (error);
2341 	case SIOCGIFGMEMB:
2342 		NET_RLOCK_IN_IOCTL();
2343 		error = if_getgroupmembers(data);
2344 		NET_RUNLOCK_IN_IOCTL();
2345 		return (error);
2346 	case SIOCGIFGATTR:
2347 		NET_RLOCK_IN_IOCTL();
2348 		error = if_getgroupattribs(data);
2349 		NET_RUNLOCK_IN_IOCTL();
2350 		return (error);
2351 	case SIOCGIFGLIST:
2352 		NET_RLOCK_IN_IOCTL();
2353 		error = if_getgrouplist(data);
2354 		NET_RUNLOCK_IN_IOCTL();
2355 		return (error);
2356 	}
2357 
2358 	ifp = ifunit(ifr->ifr_name);
2359 	if (ifp == NULL)
2360 		return (ENXIO);
2361 
2362 	NET_RLOCK_IN_IOCTL();
2363 
2364 	switch(cmd) {
2365 	case SIOCGIFFLAGS:
2366 		ifr->ifr_flags = ifp->if_flags;
2367 		if (ifq_is_oactive(&ifp->if_snd))
2368 			ifr->ifr_flags |= IFF_OACTIVE;
2369 		break;
2370 
2371 	case SIOCGIFXFLAGS:
2372 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2373 		break;
2374 
2375 	case SIOCGIFMETRIC:
2376 		ifr->ifr_metric = ifp->if_metric;
2377 		break;
2378 
2379 	case SIOCGIFMTU:
2380 		ifr->ifr_mtu = ifp->if_mtu;
2381 		break;
2382 
2383 	case SIOCGIFHARDMTU:
2384 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2385 		break;
2386 
2387 	case SIOCGIFDATA: {
2388 		struct if_data ifdata;
2389 		if_getdata(ifp, &ifdata);
2390 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2391 		break;
2392 	}
2393 
2394 	case SIOCGIFDESCR:
2395 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2396 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2397 		    &bytesdone);
2398 		break;
2399 
2400 	case SIOCGIFRTLABEL:
2401 		if (ifp->if_rtlabelid &&
2402 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2403 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2404 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2405 			    RTLABEL_LEN, &bytesdone);
2406 		} else
2407 			error = ENOENT;
2408 		break;
2409 
2410 	case SIOCGIFPRIORITY:
2411 		ifr->ifr_metric = ifp->if_priority;
2412 		break;
2413 
2414 	case SIOCGIFRDOMAIN:
2415 		ifr->ifr_rdomainid = ifp->if_rdomain;
2416 		break;
2417 
2418 	case SIOCGIFGROUP:
2419 		error = if_getgroup(data, ifp);
2420 		break;
2421 
2422 	case SIOCGIFLLPRIO:
2423 		ifr->ifr_llprio = ifp->if_llprio;
2424 		break;
2425 
2426 	default:
2427 		panic("invalid ioctl %lu", cmd);
2428 	}
2429 
2430 	NET_RUNLOCK_IN_IOCTL();
2431 
2432 	return (error);
2433 }
2434 
2435 static int
2436 if_sffpage_check(const caddr_t data)
2437 {
2438 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2439 
2440 	switch (sff->sff_addr) {
2441 	case IFSFF_ADDR_EEPROM:
2442 	case IFSFF_ADDR_DDM:
2443 		break;
2444 	default:
2445 		return (EINVAL);
2446 	}
2447 
2448 	return (0);
2449 }
2450 
2451 int
2452 if_txhprio_l2_check(int hdrprio)
2453 {
2454 	switch (hdrprio) {
2455 	case IF_HDRPRIO_PACKET:
2456 		return (0);
2457 	default:
2458 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2459 			return (0);
2460 		break;
2461 	}
2462 
2463 	return (EINVAL);
2464 }
2465 
2466 int
2467 if_txhprio_l3_check(int hdrprio)
2468 {
2469 	switch (hdrprio) {
2470 	case IF_HDRPRIO_PACKET:
2471 	case IF_HDRPRIO_PAYLOAD:
2472 		return (0);
2473 	default:
2474 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2475 			return (0);
2476 		break;
2477 	}
2478 
2479 	return (EINVAL);
2480 }
2481 
2482 int
2483 if_rxhprio_l2_check(int hdrprio)
2484 {
2485 	switch (hdrprio) {
2486 	case IF_HDRPRIO_PACKET:
2487 	case IF_HDRPRIO_OUTER:
2488 		return (0);
2489 	default:
2490 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2491 			return (0);
2492 		break;
2493 	}
2494 
2495 	return (EINVAL);
2496 }
2497 
2498 int
2499 if_rxhprio_l3_check(int hdrprio)
2500 {
2501 	switch (hdrprio) {
2502 	case IF_HDRPRIO_PACKET:
2503 	case IF_HDRPRIO_PAYLOAD:
2504 	case IF_HDRPRIO_OUTER:
2505 		return (0);
2506 	default:
2507 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2508 			return (0);
2509 		break;
2510 	}
2511 
2512 	return (EINVAL);
2513 }
2514 
2515 /*
2516  * Return interface configuration
2517  * of system.  List may be used
2518  * in later ioctl's (above) to get
2519  * other information.
2520  */
2521 int
2522 ifconf(caddr_t data)
2523 {
2524 	struct ifconf *ifc = (struct ifconf *)data;
2525 	struct ifnet *ifp;
2526 	struct ifaddr *ifa;
2527 	struct ifreq ifr, *ifrp;
2528 	int space = ifc->ifc_len, error = 0;
2529 
2530 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2531 	if (space == 0) {
2532 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2533 			struct sockaddr *sa;
2534 
2535 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2536 				space += sizeof (ifr);
2537 			else
2538 				TAILQ_FOREACH(ifa,
2539 				    &ifp->if_addrlist, ifa_list) {
2540 					sa = ifa->ifa_addr;
2541 					if (sa->sa_len > sizeof(*sa))
2542 						space += sa->sa_len -
2543 						    sizeof(*sa);
2544 					space += sizeof(ifr);
2545 				}
2546 		}
2547 		ifc->ifc_len = space;
2548 		return (0);
2549 	}
2550 
2551 	ifrp = ifc->ifc_req;
2552 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2553 		if (space < sizeof(ifr))
2554 			break;
2555 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2556 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2557 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2558 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2559 			    sizeof(ifr));
2560 			if (error)
2561 				break;
2562 			space -= sizeof (ifr), ifrp++;
2563 		} else
2564 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2565 				struct sockaddr *sa = ifa->ifa_addr;
2566 
2567 				if (space < sizeof(ifr))
2568 					break;
2569 				if (sa->sa_len <= sizeof(*sa)) {
2570 					ifr.ifr_addr = *sa;
2571 					error = copyout((caddr_t)&ifr,
2572 					    (caddr_t)ifrp, sizeof (ifr));
2573 					ifrp++;
2574 				} else {
2575 					space -= sa->sa_len - sizeof(*sa);
2576 					if (space < sizeof (ifr))
2577 						break;
2578 					error = copyout((caddr_t)&ifr,
2579 					    (caddr_t)ifrp,
2580 					    sizeof(ifr.ifr_name));
2581 					if (error == 0)
2582 						error = copyout((caddr_t)sa,
2583 						    (caddr_t)&ifrp->ifr_addr,
2584 						    sa->sa_len);
2585 					ifrp = (struct ifreq *)(sa->sa_len +
2586 					    (caddr_t)&ifrp->ifr_addr);
2587 				}
2588 				if (error)
2589 					break;
2590 				space -= sizeof (ifr);
2591 			}
2592 	}
2593 	ifc->ifc_len -= space;
2594 	return (error);
2595 }
2596 
2597 void
2598 if_counters_alloc(struct ifnet *ifp)
2599 {
2600 	KASSERT(ifp->if_counters == NULL);
2601 
2602 	ifp->if_counters = counters_alloc(ifc_ncounters);
2603 }
2604 
2605 void
2606 if_counters_free(struct ifnet *ifp)
2607 {
2608 	KASSERT(ifp->if_counters != NULL);
2609 
2610 	counters_free(ifp->if_counters, ifc_ncounters);
2611 	ifp->if_counters = NULL;
2612 }
2613 
2614 void
2615 if_getdata(struct ifnet *ifp, struct if_data *data)
2616 {
2617 	unsigned int i;
2618 
2619 	*data = ifp->if_data;
2620 
2621 	if (ifp->if_counters != NULL) {
2622 		uint64_t counters[ifc_ncounters];
2623 
2624 		counters_read(ifp->if_counters, counters, nitems(counters));
2625 
2626 		data->ifi_ipackets += counters[ifc_ipackets];
2627 		data->ifi_ierrors += counters[ifc_ierrors];
2628 		data->ifi_opackets += counters[ifc_opackets];
2629 		data->ifi_oerrors += counters[ifc_oerrors];
2630 		data->ifi_collisions += counters[ifc_collisions];
2631 		data->ifi_ibytes += counters[ifc_ibytes];
2632 		data->ifi_obytes += counters[ifc_obytes];
2633 		data->ifi_imcasts += counters[ifc_imcasts];
2634 		data->ifi_omcasts += counters[ifc_omcasts];
2635 		data->ifi_iqdrops += counters[ifc_iqdrops];
2636 		data->ifi_oqdrops += counters[ifc_oqdrops];
2637 		data->ifi_noproto += counters[ifc_noproto];
2638 	}
2639 
2640 	for (i = 0; i < ifp->if_nifqs; i++) {
2641 		struct ifqueue *ifq = ifp->if_ifqs[i];
2642 
2643 		ifq_add_data(ifq, data);
2644 	}
2645 
2646 	for (i = 0; i < ifp->if_niqs; i++) {
2647 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2648 
2649 		ifiq_add_data(ifiq, data);
2650 	}
2651 }
2652 
2653 /*
2654  * Dummy functions replaced in ifnet during detach (if protocols decide to
2655  * fiddle with the if during detach.
2656  */
2657 void
2658 if_detached_qstart(struct ifqueue *ifq)
2659 {
2660 	ifq_purge(ifq);
2661 }
2662 
2663 int
2664 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2665 {
2666 	return ENODEV;
2667 }
2668 
2669 /*
2670  * Create interface group without members
2671  */
2672 struct ifg_group *
2673 if_creategroup(const char *groupname)
2674 {
2675 	struct ifg_group	*ifg;
2676 
2677 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2678 		return (NULL);
2679 
2680 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2681 	ifg->ifg_refcnt = 0;
2682 	ifg->ifg_carp_demoted = 0;
2683 	TAILQ_INIT(&ifg->ifg_members);
2684 #if NPF > 0
2685 	pfi_attach_ifgroup(ifg);
2686 #endif
2687 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2688 
2689 	return (ifg);
2690 }
2691 
2692 /*
2693  * Add a group to an interface
2694  */
2695 int
2696 if_addgroup(struct ifnet *ifp, const char *groupname)
2697 {
2698 	struct ifg_list		*ifgl;
2699 	struct ifg_group	*ifg = NULL;
2700 	struct ifg_member	*ifgm;
2701 
2702 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2703 	    groupname[strlen(groupname) - 1] <= '9')
2704 		return (EINVAL);
2705 
2706 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2707 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2708 			return (EEXIST);
2709 
2710 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2711 		return (ENOMEM);
2712 
2713 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2714 		free(ifgl, M_TEMP, sizeof(*ifgl));
2715 		return (ENOMEM);
2716 	}
2717 
2718 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2719 		if (!strcmp(ifg->ifg_group, groupname))
2720 			break;
2721 
2722 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2723 		free(ifgl, M_TEMP, sizeof(*ifgl));
2724 		free(ifgm, M_TEMP, sizeof(*ifgm));
2725 		return (ENOMEM);
2726 	}
2727 
2728 	ifg->ifg_refcnt++;
2729 	ifgl->ifgl_group = ifg;
2730 	ifgm->ifgm_ifp = ifp;
2731 
2732 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2733 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2734 
2735 #if NPF > 0
2736 	pfi_group_addmember(groupname, ifp);
2737 #endif
2738 
2739 	return (0);
2740 }
2741 
2742 /*
2743  * Remove a group from an interface
2744  */
2745 int
2746 if_delgroup(struct ifnet *ifp, const char *groupname)
2747 {
2748 	struct ifg_list		*ifgl;
2749 	struct ifg_member	*ifgm;
2750 
2751 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2752 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2753 			break;
2754 	if (ifgl == NULL)
2755 		return (ENOENT);
2756 
2757 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2758 
2759 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2760 		if (ifgm->ifgm_ifp == ifp)
2761 			break;
2762 
2763 	if (ifgm != NULL) {
2764 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2765 		free(ifgm, M_TEMP, sizeof(*ifgm));
2766 	}
2767 
2768 #if NPF > 0
2769 	pfi_group_change(groupname);
2770 #endif
2771 
2772 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2773 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2774 #if NPF > 0
2775 		pfi_detach_ifgroup(ifgl->ifgl_group);
2776 #endif
2777 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2778 	}
2779 
2780 	free(ifgl, M_TEMP, sizeof(*ifgl));
2781 
2782 	return (0);
2783 }
2784 
2785 /*
2786  * Stores all groups from an interface in memory pointed
2787  * to by data
2788  */
2789 int
2790 if_getgroup(caddr_t data, struct ifnet *ifp)
2791 {
2792 	int			 len, error;
2793 	struct ifg_list		*ifgl;
2794 	struct ifg_req		 ifgrq, *ifgp;
2795 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2796 
2797 	if (ifgr->ifgr_len == 0) {
2798 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2799 			ifgr->ifgr_len += sizeof(struct ifg_req);
2800 		return (0);
2801 	}
2802 
2803 	len = ifgr->ifgr_len;
2804 	ifgp = ifgr->ifgr_groups;
2805 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2806 		if (len < sizeof(ifgrq))
2807 			return (EINVAL);
2808 		bzero(&ifgrq, sizeof ifgrq);
2809 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2810 		    sizeof(ifgrq.ifgrq_group));
2811 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2812 		    sizeof(struct ifg_req))))
2813 			return (error);
2814 		len -= sizeof(ifgrq);
2815 		ifgp++;
2816 	}
2817 
2818 	return (0);
2819 }
2820 
2821 /*
2822  * Stores all members of a group in memory pointed to by data
2823  */
2824 int
2825 if_getgroupmembers(caddr_t data)
2826 {
2827 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2828 	struct ifg_group	*ifg;
2829 	struct ifg_member	*ifgm;
2830 	struct ifg_req		 ifgrq, *ifgp;
2831 	int			 len, error;
2832 
2833 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2834 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2835 			break;
2836 	if (ifg == NULL)
2837 		return (ENOENT);
2838 
2839 	if (ifgr->ifgr_len == 0) {
2840 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2841 			ifgr->ifgr_len += sizeof(ifgrq);
2842 		return (0);
2843 	}
2844 
2845 	len = ifgr->ifgr_len;
2846 	ifgp = ifgr->ifgr_groups;
2847 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2848 		if (len < sizeof(ifgrq))
2849 			return (EINVAL);
2850 		bzero(&ifgrq, sizeof ifgrq);
2851 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2852 		    sizeof(ifgrq.ifgrq_member));
2853 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2854 		    sizeof(struct ifg_req))))
2855 			return (error);
2856 		len -= sizeof(ifgrq);
2857 		ifgp++;
2858 	}
2859 
2860 	return (0);
2861 }
2862 
2863 int
2864 if_getgroupattribs(caddr_t data)
2865 {
2866 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2867 	struct ifg_group	*ifg;
2868 
2869 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2870 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2871 			break;
2872 	if (ifg == NULL)
2873 		return (ENOENT);
2874 
2875 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2876 
2877 	return (0);
2878 }
2879 
2880 int
2881 if_setgroupattribs(caddr_t data)
2882 {
2883 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2884 	struct ifg_group	*ifg;
2885 	struct ifg_member	*ifgm;
2886 	int			 demote;
2887 
2888 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2889 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2890 			break;
2891 	if (ifg == NULL)
2892 		return (ENOENT);
2893 
2894 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2895 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2896 	    demote + ifg->ifg_carp_demoted < 0)
2897 		return (EINVAL);
2898 
2899 	ifg->ifg_carp_demoted += demote;
2900 
2901 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2902 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2903 
2904 	return (0);
2905 }
2906 
2907 /*
2908  * Stores all groups in memory pointed to by data
2909  */
2910 int
2911 if_getgrouplist(caddr_t data)
2912 {
2913 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2914 	struct ifg_group	*ifg;
2915 	struct ifg_req		 ifgrq, *ifgp;
2916 	int			 len, error;
2917 
2918 	if (ifgr->ifgr_len == 0) {
2919 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2920 			ifgr->ifgr_len += sizeof(ifgrq);
2921 		return (0);
2922 	}
2923 
2924 	len = ifgr->ifgr_len;
2925 	ifgp = ifgr->ifgr_groups;
2926 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2927 		if (len < sizeof(ifgrq))
2928 			return (EINVAL);
2929 		bzero(&ifgrq, sizeof ifgrq);
2930 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2931 		    sizeof(ifgrq.ifgrq_group));
2932 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2933 		    sizeof(struct ifg_req))))
2934 			return (error);
2935 		len -= sizeof(ifgrq);
2936 		ifgp++;
2937 	}
2938 
2939 	return (0);
2940 }
2941 
2942 void
2943 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2944 {
2945 	switch (dst->sa_family) {
2946 	case AF_INET:
2947 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2948 		    mask && (mask->sa_len == 0 ||
2949 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2950 			if_group_egress_build();
2951 		break;
2952 #ifdef INET6
2953 	case AF_INET6:
2954 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2955 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2956 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2957 		    &in6addr_any)))
2958 			if_group_egress_build();
2959 		break;
2960 #endif
2961 	}
2962 }
2963 
2964 int
2965 if_group_egress_build(void)
2966 {
2967 	struct ifnet		*ifp;
2968 	struct ifg_group	*ifg;
2969 	struct ifg_member	*ifgm, *next;
2970 	struct sockaddr_in	 sa_in;
2971 #ifdef INET6
2972 	struct sockaddr_in6	 sa_in6;
2973 #endif
2974 	struct rtentry		*rt;
2975 
2976 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2977 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2978 			break;
2979 
2980 	if (ifg != NULL)
2981 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2982 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2983 
2984 	bzero(&sa_in, sizeof(sa_in));
2985 	sa_in.sin_len = sizeof(sa_in);
2986 	sa_in.sin_family = AF_INET;
2987 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2988 	while (rt != NULL) {
2989 		ifp = if_get(rt->rt_ifidx);
2990 		if (ifp != NULL) {
2991 			if_addgroup(ifp, IFG_EGRESS);
2992 			if_put(ifp);
2993 		}
2994 		rt = rtable_iterate(rt);
2995 	}
2996 
2997 #ifdef INET6
2998 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2999 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3000 	    RTP_ANY);
3001 	while (rt != NULL) {
3002 		ifp = if_get(rt->rt_ifidx);
3003 		if (ifp != NULL) {
3004 			if_addgroup(ifp, IFG_EGRESS);
3005 			if_put(ifp);
3006 		}
3007 		rt = rtable_iterate(rt);
3008 	}
3009 #endif /* INET6 */
3010 
3011 	return (0);
3012 }
3013 
3014 /*
3015  * Set/clear promiscuous mode on interface ifp based on the truth value
3016  * of pswitch.  The calls are reference counted so that only the first
3017  * "on" request actually has an effect, as does the final "off" request.
3018  * Results are undefined if the "off" and "on" requests are not matched.
3019  */
3020 int
3021 ifpromisc(struct ifnet *ifp, int pswitch)
3022 {
3023 	struct ifreq ifr;
3024 	unsigned short oif_flags;
3025 	int oif_pcount, error;
3026 
3027 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3028 
3029 	oif_flags = ifp->if_flags;
3030 	oif_pcount = ifp->if_pcount;
3031 	if (pswitch) {
3032 		if (ifp->if_pcount++ != 0)
3033 			return (0);
3034 		ifp->if_flags |= IFF_PROMISC;
3035 	} else {
3036 		if (--ifp->if_pcount > 0)
3037 			return (0);
3038 		ifp->if_flags &= ~IFF_PROMISC;
3039 	}
3040 
3041 	if ((ifp->if_flags & IFF_UP) == 0)
3042 		return (0);
3043 
3044 	memset(&ifr, 0, sizeof(ifr));
3045 	ifr.ifr_flags = ifp->if_flags;
3046 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3047 	if (error) {
3048 		ifp->if_flags = oif_flags;
3049 		ifp->if_pcount = oif_pcount;
3050 	}
3051 
3052 	return (error);
3053 }
3054 
3055 void
3056 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3057 {
3058 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3059 }
3060 
3061 void
3062 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3063 {
3064 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3065 }
3066 
3067 void
3068 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3069 {
3070 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3071 		panic("ifa_update_broadaddr does not support dynamic length");
3072 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3073 }
3074 
3075 #ifdef DDB
3076 /* debug function, can be called from ddb> */
3077 void
3078 ifa_print_all(void)
3079 {
3080 	struct ifnet *ifp;
3081 	struct ifaddr *ifa;
3082 
3083 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3084 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3085 			char addr[INET6_ADDRSTRLEN];
3086 
3087 			switch (ifa->ifa_addr->sa_family) {
3088 			case AF_INET:
3089 				printf("%s", inet_ntop(AF_INET,
3090 				    &satosin(ifa->ifa_addr)->sin_addr,
3091 				    addr, sizeof(addr)));
3092 				break;
3093 #ifdef INET6
3094 			case AF_INET6:
3095 				printf("%s", inet_ntop(AF_INET6,
3096 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3097 				    addr, sizeof(addr)));
3098 				break;
3099 #endif
3100 			}
3101 			printf(" on %s\n", ifp->if_xname);
3102 		}
3103 	}
3104 }
3105 #endif /* DDB */
3106 
3107 void
3108 ifnewlladdr(struct ifnet *ifp)
3109 {
3110 #ifdef INET6
3111 	struct ifaddr *ifa;
3112 #endif
3113 	struct ifreq ifrq;
3114 	short up;
3115 	int s;
3116 
3117 	s = splnet();
3118 	up = ifp->if_flags & IFF_UP;
3119 
3120 	if (up) {
3121 		/* go down for a moment... */
3122 		ifp->if_flags &= ~IFF_UP;
3123 		ifrq.ifr_flags = ifp->if_flags;
3124 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3125 	}
3126 
3127 	ifp->if_flags |= IFF_UP;
3128 	ifrq.ifr_flags = ifp->if_flags;
3129 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3130 
3131 #ifdef INET6
3132 	/*
3133 	 * Update the link-local address.  Don't do it if we're
3134 	 * a router to avoid confusing hosts on the network.
3135 	 */
3136 	if (!ip6_forwarding) {
3137 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3138 		if (ifa) {
3139 			in6_purgeaddr(ifa);
3140 			if_hooks_run(&ifp->if_addrhooks);
3141 			in6_ifattach(ifp);
3142 		}
3143 	}
3144 #endif
3145 	if (!up) {
3146 		/* go back down */
3147 		ifp->if_flags &= ~IFF_UP;
3148 		ifrq.ifr_flags = ifp->if_flags;
3149 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3150 	}
3151 	splx(s);
3152 }
3153 
3154 void
3155 if_addrhook_add(struct ifnet *ifp, struct task *t)
3156 {
3157 	mtx_enter(&if_hooks_mtx);
3158 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3159 	mtx_leave(&if_hooks_mtx);
3160 }
3161 
3162 void
3163 if_addrhook_del(struct ifnet *ifp, struct task *t)
3164 {
3165 	mtx_enter(&if_hooks_mtx);
3166 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3167 	mtx_leave(&if_hooks_mtx);
3168 }
3169 
3170 void
3171 if_addrhooks_run(struct ifnet *ifp)
3172 {
3173 	if_hooks_run(&ifp->if_addrhooks);
3174 }
3175 
3176 void
3177 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3178 {
3179 	extern int ticks;
3180 
3181 	memset(rxr, 0, sizeof(*rxr));
3182 
3183 	rxr->rxr_adjusted = ticks;
3184 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3185 	rxr->rxr_hwm = hwm;
3186 }
3187 
3188 static inline void
3189 if_rxr_adjust_cwm(struct if_rxring *rxr)
3190 {
3191 	extern int ticks;
3192 
3193 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3194 		return;
3195 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3196 		rxr->rxr_cwm++;
3197 
3198 	rxr->rxr_adjusted = ticks;
3199 }
3200 
3201 void
3202 if_rxr_livelocked(struct if_rxring *rxr)
3203 {
3204 	extern int ticks;
3205 
3206 	if (ticks - rxr->rxr_adjusted >= 1) {
3207 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3208 			rxr->rxr_cwm--;
3209 
3210 		rxr->rxr_adjusted = ticks;
3211 	}
3212 }
3213 
3214 u_int
3215 if_rxr_get(struct if_rxring *rxr, u_int max)
3216 {
3217 	extern int ticks;
3218 	u_int diff;
3219 
3220 	if (ticks - rxr->rxr_adjusted >= 1) {
3221 		/* we're free to try for an adjustment */
3222 		if_rxr_adjust_cwm(rxr);
3223 	}
3224 
3225 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3226 		return (0);
3227 
3228 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3229 	rxr->rxr_alive += diff;
3230 
3231 	return (diff);
3232 }
3233 
3234 int
3235 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3236 {
3237 	struct if_rxrinfo kifri;
3238 	int error;
3239 	u_int n;
3240 
3241 	error = copyin(uifri, &kifri, sizeof(kifri));
3242 	if (error)
3243 		return (error);
3244 
3245 	n = min(t, kifri.ifri_total);
3246 	kifri.ifri_total = t;
3247 
3248 	if (n > 0) {
3249 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3250 		if (error)
3251 			return (error);
3252 	}
3253 
3254 	return (copyout(&kifri, uifri, sizeof(kifri)));
3255 }
3256 
3257 int
3258 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3259     struct if_rxring *rxr)
3260 {
3261 	struct if_rxring_info ifr;
3262 
3263 	memset(&ifr, 0, sizeof(ifr));
3264 
3265 	if (name != NULL)
3266 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3267 
3268 	ifr.ifr_size = size;
3269 	ifr.ifr_info = *rxr;
3270 
3271 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3272 }
3273 
3274 /*
3275  * Network stack input queues.
3276  */
3277 
3278 void
3279 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3280 {
3281 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3282 	niq->ni_isr = isr;
3283 }
3284 
3285 int
3286 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3287 {
3288 	int rv;
3289 
3290 	rv = mq_enqueue(&niq->ni_q, m);
3291 	if (rv == 0)
3292 		schednetisr(niq->ni_isr);
3293 	else
3294 		if_congestion();
3295 
3296 	return (rv);
3297 }
3298 
3299 int
3300 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3301 {
3302 	int rv;
3303 
3304 	rv = mq_enlist(&niq->ni_q, ml);
3305 	if (rv == 0)
3306 		schednetisr(niq->ni_isr);
3307 	else
3308 		if_congestion();
3309 
3310 	return (rv);
3311 }
3312 
3313 __dead void
3314 unhandled_af(int af)
3315 {
3316 	panic("unhandled af %d", af);
3317 }
3318 
3319 /*
3320  * XXXSMP This tunable is here to work around the fact that IPsec
3321  * globals aren't ready to be accessed by multiple threads in
3322  * parallel.
3323  */
3324 int		 nettaskqs = NET_TASKQ;
3325 
3326 struct taskq *
3327 net_tq(unsigned int ifindex)
3328 {
3329 	struct taskq *t = NULL;
3330 
3331 	t = nettqmp[ifindex % nettaskqs];
3332 
3333 	return (t);
3334 }
3335