xref: /openbsd/sys/net/if.c (revision defbe25c)
1 /*	$OpenBSD: if.c,v 1.721 2024/10/17 05:02:12 jsg Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "ppp.h"
70 #include "pppoe.h"
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/timeout.h>
78 #include <sys/protosw.h>
79 #include <sys/kernel.h>
80 #include <sys/ioctl.h>
81 #include <sys/domain.h>
82 #include <sys/task.h>
83 #include <sys/atomic.h>
84 #include <sys/percpu.h>
85 #include <sys/proc.h>
86 #include <sys/stdint.h>	/* uintptr_t */
87 #include <sys/rwlock.h>
88 #include <sys/smr.h>
89 
90 #include <net/if.h>
91 #include <net/if_dl.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/netisr.h>
95 
96 #include "vlan.h"
97 #if NVLAN > 0
98 #include <net/if_vlan_var.h>
99 #endif
100 
101 #include <netinet/in.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/igmp.h>
104 #ifdef MROUTING
105 #include <netinet/ip_mroute.h>
106 #endif
107 #include <netinet/tcp.h>
108 #include <netinet/tcp_timer.h>
109 #include <netinet/tcp_var.h>
110 
111 #ifdef INET6
112 #include <netinet6/in6_var.h>
113 #include <netinet6/in6_ifattach.h>
114 #include <netinet6/nd6.h>
115 #include <netinet/ip6.h>
116 #include <netinet6/ip6_var.h>
117 #endif
118 
119 #ifdef MPLS
120 #include <netmpls/mpls.h>
121 #endif
122 
123 #if NBPFILTER > 0
124 #include <net/bpf.h>
125 #endif
126 
127 #if NBRIDGE > 0
128 #include <net/if_bridge.h>
129 #endif
130 
131 #if NCARP > 0
132 #include <netinet/ip_carp.h>
133 #endif
134 
135 #if NPF > 0
136 #include <net/pfvar.h>
137 #endif
138 
139 #include <sys/device.h>
140 
141 void	if_attachsetup(struct ifnet *);
142 void	if_attach_common(struct ifnet *);
143 void	if_remove(struct ifnet *);
144 int	if_createrdomain(int, struct ifnet *);
145 int	if_setrdomain(struct ifnet *, int);
146 void	if_slowtimo(void *);
147 
148 void	if_detached_qstart(struct ifqueue *);
149 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
150 
151 int	ifioctl_get(u_long, caddr_t);
152 int	ifconf(caddr_t);
153 static int
154 	if_sffpage_check(const caddr_t);
155 
156 int	if_getgroup(caddr_t, struct ifnet *);
157 int	if_getgroupmembers(caddr_t);
158 int	if_getgroupattribs(caddr_t);
159 int	if_setgroupattribs(caddr_t);
160 int	if_getgrouplist(caddr_t);
161 
162 void	if_linkstate(struct ifnet *);
163 void	if_linkstate_task(void *);
164 
165 int	if_clone_list(struct if_clonereq *);
166 struct if_clone	*if_clone_lookup(const char *, int *);
167 
168 int	if_group_egress_build(void);
169 
170 void	if_watchdog_task(void *);
171 
172 void	if_netisr(void *);
173 
174 #ifdef DDB
175 void	ifa_print_all(void);
176 #endif
177 
178 void	if_qstart_compat(struct ifqueue *);
179 
180 /*
181  * interface index map
182  *
183  * the kernel maintains a mapping of interface indexes to struct ifnet
184  * pointers.
185  *
186  * the map is an array of struct ifnet pointers prefixed by an if_map
187  * structure. the if_map structure stores the length of its array.
188  *
189  * as interfaces are attached to the system, the map is grown on demand
190  * up to USHRT_MAX entries.
191  *
192  * interface index 0 is reserved and represents no interface. this
193  * supports the use of the interface index as the scope for IPv6 link
194  * local addresses, where scope 0 means no scope has been specified.
195  * it also supports the use of interface index as the unique identifier
196  * for network interfaces in SNMP applications as per RFC2863. therefore
197  * if_get(0) returns NULL.
198  */
199 
200 struct ifnet *if_ref(struct ifnet *);
201 
202 /*
203  * struct if_idxmap
204  *
205  * infrastructure to manage updates and accesses to the current if_map.
206  *
207  * interface index 0 is special and represents "no interface", so we
208  * use the 0th slot in map to store the length of the array.
209  */
210 
211 struct if_idxmap {
212 	unsigned int		  serial;
213 	unsigned int		  count;
214 	struct ifnet		**map;		/* SMR protected */
215 	struct rwlock		  lock;
216 	unsigned char		 *usedidx;	/* bitmap of indices in use */
217 };
218 
219 struct if_idxmap_dtor {
220 	struct smr_entry	  smr;
221 	struct ifnet		**map;
222 };
223 
224 void	if_idxmap_init(unsigned int);
225 void	if_idxmap_free(void *);
226 void	if_idxmap_alloc(struct ifnet *);
227 void	if_idxmap_insert(struct ifnet *);
228 void	if_idxmap_remove(struct ifnet *);
229 
230 TAILQ_HEAD(, ifg_group) ifg_head =
231     TAILQ_HEAD_INITIALIZER(ifg_head);	/* [N] list of interface groups */
232 
233 LIST_HEAD(, if_clone) if_cloners =
234     LIST_HEAD_INITIALIZER(if_cloners);	/* [I] list of clonable interfaces */
235 int if_cloners_count;	/* [I] number of clonable interfaces */
236 
237 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
238 
239 /* hooks should only be added, deleted, and run from a process context */
240 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
241 void	if_hooks_run(struct task_list *);
242 
243 int	ifq_congestion;
244 
245 int		 netisr;
246 
247 struct softnet {
248 	char		 sn_name[16];
249 	struct taskq	*sn_taskq;
250 };
251 
252 #define	NET_TASKQ	4
253 struct softnet	softnets[NET_TASKQ];
254 
255 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
256 
257 /*
258  * Serialize socket operations to ensure no new sleeping points
259  * are introduced in IP output paths.
260  */
261 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
262 
263 /*
264  * Network interface utility routines.
265  */
266 void
ifinit(void)267 ifinit(void)
268 {
269 	unsigned int	i;
270 
271 	/*
272 	 * most machines boot with 4 or 5 interfaces, so size the initial map
273 	 * to accommodate this
274 	 */
275 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
276 
277 	for (i = 0; i < NET_TASKQ; i++) {
278 		struct softnet *sn = &softnets[i];
279 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
280 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
281 		    TASKQ_MPSAFE);
282 		if (sn->sn_taskq == NULL)
283 			panic("unable to create network taskq %d", i);
284 	}
285 }
286 
287 static struct if_idxmap if_idxmap;
288 
289 /*
290  * XXXSMP: For `ifnetlist' modification both kernel and net locks
291  * should be taken. For read-only access only one lock of them required.
292  */
293 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
294 
295 static inline unsigned int
if_idxmap_limit(struct ifnet ** if_map)296 if_idxmap_limit(struct ifnet **if_map)
297 {
298 	return ((uintptr_t)if_map[0]);
299 }
300 
301 static inline size_t
if_idxmap_usedidx_size(unsigned int limit)302 if_idxmap_usedidx_size(unsigned int limit)
303 {
304 	return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
305 }
306 
307 void
if_idxmap_init(unsigned int limit)308 if_idxmap_init(unsigned int limit)
309 {
310 	struct ifnet **if_map;
311 
312 	rw_init(&if_idxmap.lock, "idxmaplk");
313 	if_idxmap.serial = 1; /* skip ifidx 0 */
314 
315 	if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
316 	    M_WAITOK | M_ZERO);
317 
318 	if_map[0] = (struct ifnet *)(uintptr_t)limit;
319 
320 	if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
321 	    M_IFADDR, M_WAITOK | M_ZERO);
322 	setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
323 
324 	/* this is called early so there's nothing to race with */
325 	SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
326 }
327 
328 void
if_idxmap_alloc(struct ifnet * ifp)329 if_idxmap_alloc(struct ifnet *ifp)
330 {
331 	struct ifnet **if_map;
332 	unsigned int limit;
333 	unsigned int index, i;
334 
335 	refcnt_init(&ifp->if_refcnt);
336 
337 	rw_enter_write(&if_idxmap.lock);
338 
339 	if (++if_idxmap.count >= USHRT_MAX)
340 		panic("too many interfaces");
341 
342 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
343 	limit = if_idxmap_limit(if_map);
344 
345 	index = if_idxmap.serial++ & USHRT_MAX;
346 
347 	if (index >= limit) {
348 		struct if_idxmap_dtor *dtor;
349 		struct ifnet **oif_map;
350 		unsigned int olimit;
351 		unsigned char *nusedidx;
352 
353 		oif_map = if_map;
354 		olimit = limit;
355 
356 		limit = olimit * 2;
357 		if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
358 		    M_WAITOK | M_ZERO);
359 		if_map[0] = (struct ifnet *)(uintptr_t)limit;
360 
361 		for (i = 1; i < olimit; i++) {
362 			struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
363 			if (oifp == NULL)
364 				continue;
365 
366 			/*
367 			 * nif_map isn't visible yet, so don't need
368 			 * SMR_PTR_SET_LOCKED and its membar.
369 			 */
370 			if_map[i] = if_ref(oifp);
371 		}
372 
373 		nusedidx = malloc(if_idxmap_usedidx_size(limit),
374 		    M_IFADDR, M_WAITOK | M_ZERO);
375 		memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
376 
377 		/* use the old usedidx bitmap as an smr_entry for the if_map */
378 		dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
379 		if_idxmap.usedidx = nusedidx;
380 
381 		SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
382 
383 		dtor->map = oif_map;
384 		smr_init(&dtor->smr);
385 		smr_call(&dtor->smr, if_idxmap_free, dtor);
386 	}
387 
388 	/* pick the next free index */
389 	for (i = 0; i < USHRT_MAX; i++) {
390 		if (index != 0 && isclr(if_idxmap.usedidx, index))
391 			break;
392 
393 		index = if_idxmap.serial++ & USHRT_MAX;
394 	}
395 	KASSERT(index != 0 && index < limit);
396 	KASSERT(isclr(if_idxmap.usedidx, index));
397 
398 	setbit(if_idxmap.usedidx, index);
399 	ifp->if_index = index;
400 
401 	rw_exit_write(&if_idxmap.lock);
402 }
403 
404 void
if_idxmap_free(void * arg)405 if_idxmap_free(void *arg)
406 {
407 	struct if_idxmap_dtor *dtor = arg;
408 	struct ifnet **oif_map = dtor->map;
409 	unsigned int olimit = if_idxmap_limit(oif_map);
410 	unsigned int i;
411 
412 	for (i = 1; i < olimit; i++)
413 		if_put(oif_map[i]);
414 
415 	free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
416 	free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
417 }
418 
419 void
if_idxmap_insert(struct ifnet * ifp)420 if_idxmap_insert(struct ifnet *ifp)
421 {
422 	struct ifnet **if_map;
423 	unsigned int index = ifp->if_index;
424 
425 	rw_enter_write(&if_idxmap.lock);
426 
427 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
428 
429 	KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
430 	    "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
431 	    if_idxmap_limit(if_map));
432 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
433 	KASSERT(isset(if_idxmap.usedidx, index));
434 
435 	/* commit */
436 	SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
437 
438 	rw_exit_write(&if_idxmap.lock);
439 }
440 
441 void
if_idxmap_remove(struct ifnet * ifp)442 if_idxmap_remove(struct ifnet *ifp)
443 {
444 	struct ifnet **if_map;
445 	unsigned int index = ifp->if_index;
446 
447 	rw_enter_write(&if_idxmap.lock);
448 
449 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
450 
451 	KASSERT(index != 0 && index < if_idxmap_limit(if_map));
452 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
453 	KASSERT(isset(if_idxmap.usedidx, index));
454 
455 	SMR_PTR_SET_LOCKED(&if_map[index], NULL);
456 
457 	if_idxmap.count--;
458 	clrbit(if_idxmap.usedidx, index);
459 	/* end of if_idxmap modifications */
460 
461 	rw_exit_write(&if_idxmap.lock);
462 
463 	smr_barrier();
464 	if_put(ifp);
465 }
466 
467 /*
468  * Attach an interface to the
469  * list of "active" interfaces.
470  */
471 void
if_attachsetup(struct ifnet * ifp)472 if_attachsetup(struct ifnet *ifp)
473 {
474 	unsigned long ifidx;
475 
476 	NET_ASSERT_LOCKED();
477 
478 	if_addgroup(ifp, IFG_ALL);
479 
480 #ifdef INET6
481 	nd6_ifattach(ifp);
482 #endif
483 
484 #if NPF > 0
485 	pfi_attach_ifnet(ifp);
486 #endif
487 
488 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
489 	if_slowtimo(ifp);
490 
491 	if_idxmap_insert(ifp);
492 	KASSERT(if_get(0) == NULL);
493 
494 	ifidx = ifp->if_index;
495 
496 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
497 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
498 
499 	/* Announce the interface. */
500 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
501 }
502 
503 /*
504  * Allocate the link level name for the specified interface.  This
505  * is an attachment helper.  It must be called after ifp->if_addrlen
506  * is initialized, which may not be the case when if_attach() is
507  * called.
508  */
509 void
if_alloc_sadl(struct ifnet * ifp)510 if_alloc_sadl(struct ifnet *ifp)
511 {
512 	unsigned int socksize;
513 	int namelen, masklen;
514 	struct sockaddr_dl *sdl;
515 
516 	/*
517 	 * If the interface already has a link name, release it
518 	 * now.  This is useful for interfaces that can change
519 	 * link types, and thus switch link names often.
520 	 */
521 	if_free_sadl(ifp);
522 
523 	namelen = strlen(ifp->if_xname);
524 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
525 	socksize = masklen + ifp->if_addrlen;
526 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
527 	if (socksize < sizeof(*sdl))
528 		socksize = sizeof(*sdl);
529 	socksize = ROUNDUP(socksize);
530 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
531 	sdl->sdl_len = socksize;
532 	sdl->sdl_family = AF_LINK;
533 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
534 	sdl->sdl_nlen = namelen;
535 	sdl->sdl_alen = ifp->if_addrlen;
536 	sdl->sdl_index = ifp->if_index;
537 	sdl->sdl_type = ifp->if_type;
538 	ifp->if_sadl = sdl;
539 }
540 
541 /*
542  * Free the link level name for the specified interface.  This is
543  * a detach helper.  This is called from if_detach() or from
544  * link layer type specific detach functions.
545  */
546 void
if_free_sadl(struct ifnet * ifp)547 if_free_sadl(struct ifnet *ifp)
548 {
549 	if (ifp->if_sadl == NULL)
550 		return;
551 
552 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
553 	ifp->if_sadl = NULL;
554 }
555 
556 void
if_attachhead(struct ifnet * ifp)557 if_attachhead(struct ifnet *ifp)
558 {
559 	if_attach_common(ifp);
560 	NET_LOCK();
561 	TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
562 	if_attachsetup(ifp);
563 	NET_UNLOCK();
564 }
565 
566 void
if_attach(struct ifnet * ifp)567 if_attach(struct ifnet *ifp)
568 {
569 	if_attach_common(ifp);
570 	NET_LOCK();
571 	TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
572 	if_attachsetup(ifp);
573 	NET_UNLOCK();
574 }
575 
576 void
if_attach_queues(struct ifnet * ifp,unsigned int nqs)577 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
578 {
579 	struct ifqueue **map;
580 	struct ifqueue *ifq;
581 	int i;
582 
583 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
584 	KASSERT(nqs != 0);
585 
586 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
587 
588 	ifp->if_snd.ifq_softc = NULL;
589 	map[0] = &ifp->if_snd;
590 
591 	for (i = 1; i < nqs; i++) {
592 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
593 		ifq_init_maxlen(ifq, ifp->if_snd.ifq_maxlen);
594 		ifq_init(ifq, ifp, i);
595 		map[i] = ifq;
596 	}
597 
598 	ifp->if_ifqs = map;
599 	ifp->if_nifqs = nqs;
600 }
601 
602 void
if_attach_iqueues(struct ifnet * ifp,unsigned int niqs)603 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
604 {
605 	struct ifiqueue **map;
606 	struct ifiqueue *ifiq;
607 	unsigned int i;
608 
609 	KASSERT(niqs != 0);
610 
611 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
612 
613 	ifp->if_rcv.ifiq_softc = NULL;
614 	map[0] = &ifp->if_rcv;
615 
616 	for (i = 1; i < niqs; i++) {
617 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
618 		ifiq_init(ifiq, ifp, i);
619 		map[i] = ifiq;
620 	}
621 
622 	ifp->if_iqs = map;
623 	ifp->if_niqs = niqs;
624 }
625 
626 void
if_attach_common(struct ifnet * ifp)627 if_attach_common(struct ifnet *ifp)
628 {
629 	KASSERT(ifp->if_ioctl != NULL);
630 
631 	TAILQ_INIT(&ifp->if_addrlist);
632 	TAILQ_INIT(&ifp->if_maddrlist);
633 	TAILQ_INIT(&ifp->if_groups);
634 
635 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
636 		KASSERTMSG(ifp->if_qstart == NULL,
637 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
638 		ifp->if_qstart = if_qstart_compat;
639 	} else {
640 		KASSERTMSG(ifp->if_start == NULL,
641 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
642 		KASSERTMSG(ifp->if_qstart != NULL,
643 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
644 	}
645 
646 	if_idxmap_alloc(ifp);
647 
648 	ifq_init(&ifp->if_snd, ifp, 0);
649 
650 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
651 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
652 	ifp->if_nifqs = 1;
653 	if (ifp->if_txmit == 0)
654 		ifp->if_txmit = IF_TXMIT_DEFAULT;
655 
656 	ifiq_init(&ifp->if_rcv, ifp, 0);
657 
658 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
659 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
660 	ifp->if_niqs = 1;
661 
662 	TAILQ_INIT(&ifp->if_addrhooks);
663 	TAILQ_INIT(&ifp->if_linkstatehooks);
664 	TAILQ_INIT(&ifp->if_detachhooks);
665 
666 	if (ifp->if_rtrequest == NULL)
667 		ifp->if_rtrequest = if_rtrequest_dummy;
668 	if (ifp->if_enqueue == NULL)
669 		ifp->if_enqueue = if_enqueue_ifq;
670 #if NBPFILTER > 0
671 	if (ifp->if_bpf_mtap == NULL)
672 		ifp->if_bpf_mtap = bpf_mtap_ether;
673 #endif
674 	ifp->if_llprio = IFQ_DEFPRIO;
675 }
676 
677 void
if_attach_ifq(struct ifnet * ifp,const struct ifq_ops * newops,void * args)678 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
679 {
680 	/*
681 	 * only switch the ifq_ops on the first ifq on an interface.
682 	 *
683 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
684 	 * works on a single ifq. because the code uses the ifq_ops
685 	 * on the first ifq (if_snd) to select a queue for an mbuf,
686 	 * by switching only the first one we change both the algorithm
687 	 * and force the routing of all new packets to it.
688 	 */
689 	ifq_attach(&ifp->if_snd, newops, args);
690 }
691 
692 void
if_start(struct ifnet * ifp)693 if_start(struct ifnet *ifp)
694 {
695 	KASSERT(ifp->if_qstart == if_qstart_compat);
696 	if_qstart_compat(&ifp->if_snd);
697 }
698 void
if_qstart_compat(struct ifqueue * ifq)699 if_qstart_compat(struct ifqueue *ifq)
700 {
701 	struct ifnet *ifp = ifq->ifq_if;
702 	int s;
703 
704 	/*
705 	 * the stack assumes that an interface can have multiple
706 	 * transmit rings, but a lot of drivers are still written
707 	 * so that interfaces and send rings have a 1:1 mapping.
708 	 * this provides compatibility between the stack and the older
709 	 * drivers by translating from the only queue they have
710 	 * (ifp->if_snd) back to the interface and calling if_start.
711 	 */
712 
713 	KERNEL_LOCK();
714 	s = splnet();
715 	(*ifp->if_start)(ifp);
716 	splx(s);
717 	KERNEL_UNLOCK();
718 }
719 
720 int
if_enqueue(struct ifnet * ifp,struct mbuf * m)721 if_enqueue(struct ifnet *ifp, struct mbuf *m)
722 {
723 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
724 
725 #if NPF > 0
726 	if (m->m_pkthdr.pf.delay > 0)
727 		return (pf_delay_pkt(m, ifp->if_index));
728 #endif
729 
730 #if NBRIDGE > 0
731 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
732 		int error;
733 
734 		error = bridge_enqueue(ifp, m);
735 		return (error);
736 	}
737 #endif
738 
739 #if NPF > 0
740 	pf_pkt_addr_changed(m);
741 #endif	/* NPF > 0 */
742 
743 	return ((*ifp->if_enqueue)(ifp, m));
744 }
745 
746 int
if_enqueue_ifq(struct ifnet * ifp,struct mbuf * m)747 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
748 {
749 	struct ifqueue *ifq = &ifp->if_snd;
750 	int error;
751 
752 	if (ifp->if_nifqs > 1) {
753 		unsigned int idx;
754 
755 		/*
756 		 * use the operations on the first ifq to pick which of
757 		 * the array gets this mbuf.
758 		 */
759 
760 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
761 		ifq = ifp->if_ifqs[idx];
762 	}
763 
764 	error = ifq_enqueue(ifq, m);
765 	if (error)
766 		return (error);
767 
768 	ifq_start(ifq);
769 
770 	return (0);
771 }
772 
773 void
if_input(struct ifnet * ifp,struct mbuf_list * ml)774 if_input(struct ifnet *ifp, struct mbuf_list *ml)
775 {
776 	ifiq_input(&ifp->if_rcv, ml);
777 }
778 
779 int
if_input_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)780 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
781 {
782 	int keepflags, keepcksum;
783 	uint16_t keepmss;
784 
785 #if NBPFILTER > 0
786 	/*
787 	 * Only send packets to bpf if they are destined to local
788 	 * addresses.
789 	 *
790 	 * if_input_local() is also called for SIMPLEX interfaces to
791 	 * duplicate packets for local use.  But don't dup them to bpf.
792 	 */
793 	if (ifp->if_flags & IFF_LOOPBACK) {
794 		caddr_t if_bpf = ifp->if_bpf;
795 
796 		if (if_bpf)
797 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
798 	}
799 #endif
800 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
801 	/*
802 	 * Preserve outgoing checksum flags, in case the packet is
803 	 * forwarded to another interface.  Then the checksum, which
804 	 * is now incorrect, will be calculated before sending.
805 	 */
806 	keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
807 	    M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
808 	    M_TCP_TSO);
809 	keepmss = m->m_pkthdr.ph_mss;
810 	m_resethdr(m);
811 	m->m_flags |= M_LOOP | keepflags;
812 	m->m_pkthdr.csum_flags = keepcksum;
813 	m->m_pkthdr.ph_mss = keepmss;
814 	m->m_pkthdr.ph_ifidx = ifp->if_index;
815 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
816 
817 	if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
818 		if (ifp->if_mtu > 0 &&
819 		    ((af == AF_INET &&
820 		    ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
821 		    (af == AF_INET6 &&
822 		    ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) {
823 			tcpstat_inc(tcps_inswlro);
824 			tcpstat_add(tcps_inpktlro,
825 			    (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
826 		} else {
827 			tcpstat_inc(tcps_inbadlro);
828 			m_freem(m);
829 			return (EPROTONOSUPPORT);
830 		}
831 	}
832 
833 	if (ISSET(keepcksum, M_TCP_CSUM_OUT))
834 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
835 	if (ISSET(keepcksum, M_UDP_CSUM_OUT))
836 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
837 	if (ISSET(keepcksum, M_ICMP_CSUM_OUT))
838 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK;
839 
840 	/* do not count multicast loopback and simplex interfaces */
841 	if (ISSET(ifp->if_flags, IFF_LOOPBACK)) {
842 		counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
843 		    m->m_pkthdr.len);
844 	}
845 
846 	switch (af) {
847 	case AF_INET:
848 		if (ISSET(keepcksum, M_IPV4_CSUM_OUT))
849 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
850 		ipv4_input(ifp, m);
851 		break;
852 #ifdef INET6
853 	case AF_INET6:
854 		ipv6_input(ifp, m);
855 		break;
856 #endif /* INET6 */
857 #ifdef MPLS
858 	case AF_MPLS:
859 		mpls_input(ifp, m);
860 		break;
861 #endif /* MPLS */
862 	default:
863 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
864 		m_freem(m);
865 		return (EAFNOSUPPORT);
866 	}
867 
868 	return (0);
869 }
870 
871 int
if_output_ml(struct ifnet * ifp,struct mbuf_list * ml,struct sockaddr * dst,struct rtentry * rt)872 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
873     struct sockaddr *dst, struct rtentry *rt)
874 {
875 	struct mbuf *m;
876 	int error = 0;
877 
878 	while ((m = ml_dequeue(ml)) != NULL) {
879 		error = ifp->if_output(ifp, m, dst, rt);
880 		if (error)
881 			break;
882 	}
883 	if (error)
884 		ml_purge(ml);
885 
886 	return error;
887 }
888 
889 int
if_output_tso(struct ifnet * ifp,struct mbuf ** mp,struct sockaddr * dst,struct rtentry * rt,u_int mtu)890 if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
891     struct rtentry *rt, u_int mtu)
892 {
893 	uint32_t ifcap;
894 	int error;
895 
896 	switch (dst->sa_family) {
897 	case AF_INET:
898 		ifcap = IFCAP_TSOv4;
899 		break;
900 #ifdef INET6
901 	case AF_INET6:
902 		ifcap = IFCAP_TSOv6;
903 		break;
904 #endif
905 	default:
906 		unhandled_af(dst->sa_family);
907 	}
908 
909 	/*
910 	 * Try to send with TSO first.  When forwarding LRO may set
911 	 * maximum segment size in mbuf header.  Chop TCP segment
912 	 * even if it would fit interface MTU to preserve maximum
913 	 * path MTU.
914 	 */
915 	error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
916 	if (error || *mp == NULL)
917 		return error;
918 
919 	if ((*mp)->m_pkthdr.len <= mtu) {
920 		switch (dst->sa_family) {
921 		case AF_INET:
922 			in_hdr_cksum_out(*mp, ifp);
923 			in_proto_cksum_out(*mp, ifp);
924 			break;
925 #ifdef INET6
926 		case AF_INET6:
927 			in6_proto_cksum_out(*mp, ifp);
928 			break;
929 #endif
930 		}
931 		error = ifp->if_output(ifp, *mp, dst, rt);
932 		*mp = NULL;
933 		return error;
934 	}
935 
936 	/* mp still contains mbuf that has to be fragmented or dropped. */
937 	return 0;
938 }
939 
940 int
if_output_mq(struct ifnet * ifp,struct mbuf_queue * mq,unsigned int * total,struct sockaddr * dst,struct rtentry * rt)941 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
942     struct sockaddr *dst, struct rtentry *rt)
943 {
944 	struct mbuf_list ml;
945 	unsigned int len;
946 	int error;
947 
948 	mq_delist(mq, &ml);
949 	len = ml_len(&ml);
950 	error = if_output_ml(ifp, &ml, dst, rt);
951 
952 	/* XXXSMP we also discard if other CPU enqueues */
953 	if (mq_len(mq) > 0) {
954 		/* mbuf is back in queue. Discard. */
955 		atomic_sub_int(total, len + mq_purge(mq));
956 	} else
957 		atomic_sub_int(total, len);
958 
959 	return error;
960 }
961 
962 int
if_output_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)963 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
964 {
965 	struct ifiqueue *ifiq;
966 	unsigned int flow = 0;
967 
968 	m->m_pkthdr.ph_family = af;
969 	m->m_pkthdr.ph_ifidx = ifp->if_index;
970 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
971 
972 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
973 		flow = m->m_pkthdr.ph_flowid;
974 
975 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
976 
977 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
978 }
979 
980 void
if_input_process(struct ifnet * ifp,struct mbuf_list * ml)981 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
982 {
983 	struct mbuf *m;
984 
985 	if (ml_empty(ml))
986 		return;
987 
988 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
989 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
990 
991 	/*
992 	 * We grab the shared netlock for packet processing in the softnet
993 	 * threads.  Packets can regrab the exclusive lock via queues.
994 	 * ioctl, sysctl, and socket syscall may use shared lock if access is
995 	 * read only or MP safe.  Usually they hold the exclusive net lock.
996 	 */
997 
998 	NET_LOCK_SHARED();
999 	while ((m = ml_dequeue(ml)) != NULL)
1000 		(*ifp->if_input)(ifp, m);
1001 	NET_UNLOCK_SHARED();
1002 }
1003 
1004 void
if_vinput(struct ifnet * ifp,struct mbuf * m)1005 if_vinput(struct ifnet *ifp, struct mbuf *m)
1006 {
1007 #if NBPFILTER > 0
1008 	caddr_t if_bpf;
1009 #endif
1010 
1011 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1012 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1013 
1014 	counters_pkt(ifp->if_counters,
1015 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1016 
1017 #if NPF > 0
1018 	pf_pkt_addr_changed(m);
1019 #endif
1020 
1021 #if NBPFILTER > 0
1022 	if_bpf = ifp->if_bpf;
1023 	if (if_bpf) {
1024 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
1025 			m_freem(m);
1026 			return;
1027 		}
1028 	}
1029 #endif
1030 
1031 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
1032 		(*ifp->if_input)(ifp, m);
1033 	else
1034 		m_freem(m);
1035 }
1036 
1037 void
if_netisr(void * unused)1038 if_netisr(void *unused)
1039 {
1040 	int n, t = 0;
1041 
1042 	NET_LOCK();
1043 
1044 	while ((n = netisr) != 0) {
1045 		/* Like sched_pause() but with a rwlock dance. */
1046 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
1047 			NET_UNLOCK();
1048 			yield();
1049 			NET_LOCK();
1050 		}
1051 
1052 		atomic_clearbits_int(&netisr, n);
1053 
1054 #if NETHER > 0
1055 		if (n & (1 << NETISR_ARP))
1056 			arpintr();
1057 #endif
1058 		if (n & (1 << NETISR_IP))
1059 			ipintr();
1060 #ifdef INET6
1061 		if (n & (1 << NETISR_IPV6))
1062 			ip6intr();
1063 #endif
1064 #if NPPP > 0
1065 		if (n & (1 << NETISR_PPP)) {
1066 			KERNEL_LOCK();
1067 			pppintr();
1068 			KERNEL_UNLOCK();
1069 		}
1070 #endif
1071 #if NBRIDGE > 0
1072 		if (n & (1 << NETISR_BRIDGE))
1073 			bridgeintr();
1074 #endif
1075 #ifdef PIPEX
1076 		if (n & (1 << NETISR_PIPEX))
1077 			pipexintr();
1078 #endif
1079 #if NPPPOE > 0
1080 		if (n & (1 << NETISR_PPPOE)) {
1081 			KERNEL_LOCK();
1082 			pppoeintr();
1083 			KERNEL_UNLOCK();
1084 		}
1085 #endif
1086 		t |= n;
1087 	}
1088 
1089 	NET_UNLOCK();
1090 }
1091 
1092 void
if_hooks_run(struct task_list * hooks)1093 if_hooks_run(struct task_list *hooks)
1094 {
1095 	struct task *t, *nt;
1096 	struct task cursor = { .t_func = NULL };
1097 	void (*func)(void *);
1098 	void *arg;
1099 
1100 	mtx_enter(&if_hooks_mtx);
1101 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1102 		if (t->t_func == NULL) { /* skip cursors */
1103 			nt = TAILQ_NEXT(t, t_entry);
1104 			continue;
1105 		}
1106 		func = t->t_func;
1107 		arg = t->t_arg;
1108 
1109 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1110 		mtx_leave(&if_hooks_mtx);
1111 
1112 		(*func)(arg);
1113 
1114 		mtx_enter(&if_hooks_mtx);
1115 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1116 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1117 	}
1118 	mtx_leave(&if_hooks_mtx);
1119 }
1120 
1121 void
if_remove(struct ifnet * ifp)1122 if_remove(struct ifnet *ifp)
1123 {
1124 	/* Remove the interface from the list of all interfaces. */
1125 	NET_LOCK();
1126 	TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1127 	NET_UNLOCK();
1128 
1129 	/* Remove the interface from the interface index map. */
1130 	if_idxmap_remove(ifp);
1131 
1132 	/* Sleep until the last reference is released. */
1133 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
1134 }
1135 
1136 void
if_deactivate(struct ifnet * ifp)1137 if_deactivate(struct ifnet *ifp)
1138 {
1139 	/*
1140 	 * Call detach hooks from head to tail.  To make sure detach
1141 	 * hooks are executed in the reverse order they were added, all
1142 	 * the hooks have to be added to the head!
1143 	 */
1144 
1145 	NET_LOCK();
1146 	if_hooks_run(&ifp->if_detachhooks);
1147 	NET_UNLOCK();
1148 }
1149 
1150 void
if_detachhook_add(struct ifnet * ifp,struct task * t)1151 if_detachhook_add(struct ifnet *ifp, struct task *t)
1152 {
1153 	mtx_enter(&if_hooks_mtx);
1154 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1155 	mtx_leave(&if_hooks_mtx);
1156 }
1157 
1158 void
if_detachhook_del(struct ifnet * ifp,struct task * t)1159 if_detachhook_del(struct ifnet *ifp, struct task *t)
1160 {
1161 	mtx_enter(&if_hooks_mtx);
1162 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1163 	mtx_leave(&if_hooks_mtx);
1164 }
1165 
1166 /*
1167  * Detach an interface from everything in the kernel.  Also deallocate
1168  * private resources.
1169  */
1170 void
if_detach(struct ifnet * ifp)1171 if_detach(struct ifnet *ifp)
1172 {
1173 	struct ifaddr *ifa;
1174 	struct ifg_list *ifg;
1175 	int i, s;
1176 
1177 	/* Undo pseudo-driver changes. */
1178 	if_deactivate(ifp);
1179 
1180 	/* Other CPUs must not have a reference before we start destroying. */
1181 	if_remove(ifp);
1182 
1183 	ifp->if_qstart = if_detached_qstart;
1184 
1185 	/* Wait until the start routines finished. */
1186 	ifq_barrier(&ifp->if_snd);
1187 	ifq_clr_oactive(&ifp->if_snd);
1188 
1189 #if NBPFILTER > 0
1190 	bpfdetach(ifp);
1191 #endif
1192 
1193 	NET_LOCK();
1194 	s = splnet();
1195 	ifp->if_ioctl = if_detached_ioctl;
1196 	ifp->if_watchdog = NULL;
1197 
1198 	/* Remove the watchdog timeout & task */
1199 	timeout_del(&ifp->if_slowtimo);
1200 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1201 
1202 	/* Remove the link state task */
1203 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1204 
1205 	rti_delete(ifp);
1206 #if NETHER > 0 && defined(NFSCLIENT)
1207 	if (ifp->if_index == revarp_ifidx)
1208 		revarp_ifidx = 0;
1209 #endif
1210 #ifdef MROUTING
1211 	vif_delete(ifp);
1212 #endif
1213 	in_ifdetach(ifp);
1214 #ifdef INET6
1215 	in6_ifdetach(ifp);
1216 #endif
1217 #if NPF > 0
1218 	pfi_detach_ifnet(ifp);
1219 #endif
1220 
1221 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1222 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1223 
1224 	if_free_sadl(ifp);
1225 
1226 	/* We should not have any address left at this point. */
1227 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1228 #ifdef DIAGNOSTIC
1229 		printf("%s: address list non empty\n", ifp->if_xname);
1230 #endif
1231 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1232 			ifa_del(ifp, ifa);
1233 			ifa->ifa_ifp = NULL;
1234 			ifafree(ifa);
1235 		}
1236 	}
1237 	splx(s);
1238 	NET_UNLOCK();
1239 
1240 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1241 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1242 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1243 
1244 #ifdef INET6
1245 	nd6_ifdetach(ifp);
1246 #endif
1247 
1248 	/* Announce that the interface is gone. */
1249 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1250 
1251 	if (ifp->if_counters != NULL)
1252 		if_counters_free(ifp);
1253 
1254 	for (i = 0; i < ifp->if_nifqs; i++)
1255 		ifq_destroy(ifp->if_ifqs[i]);
1256 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1257 		for (i = 1; i < ifp->if_nifqs; i++) {
1258 			free(ifp->if_ifqs[i], M_DEVBUF,
1259 			    sizeof(struct ifqueue));
1260 		}
1261 		free(ifp->if_ifqs, M_DEVBUF,
1262 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1263 	}
1264 
1265 	for (i = 0; i < ifp->if_niqs; i++)
1266 		ifiq_destroy(ifp->if_iqs[i]);
1267 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1268 		for (i = 1; i < ifp->if_niqs; i++) {
1269 			free(ifp->if_iqs[i], M_DEVBUF,
1270 			    sizeof(struct ifiqueue));
1271 		}
1272 		free(ifp->if_iqs, M_DEVBUF,
1273 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1274 	}
1275 }
1276 
1277 /*
1278  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1279  */
1280 int
if_isconnected(const struct ifnet * ifp0,unsigned int ifidx)1281 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1282 {
1283 	struct ifnet *ifp;
1284 	int connected = 0;
1285 
1286 	ifp = if_get(ifidx);
1287 	if (ifp == NULL)
1288 		return (0);
1289 
1290 	if (ifp0->if_index == ifp->if_index)
1291 		connected = 1;
1292 
1293 #if NBRIDGE > 0
1294 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1295 		connected = 1;
1296 #endif
1297 #if NCARP > 0
1298 	if ((ifp0->if_type == IFT_CARP &&
1299 	    ifp0->if_carpdevidx == ifp->if_index) ||
1300 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1301 		connected = 1;
1302 #endif
1303 
1304 	if_put(ifp);
1305 	return (connected);
1306 }
1307 
1308 /*
1309  * Create a clone network interface.
1310  */
1311 int
if_clone_create(const char * name,int rdomain)1312 if_clone_create(const char *name, int rdomain)
1313 {
1314 	struct if_clone *ifc;
1315 	struct ifnet *ifp;
1316 	int unit, ret;
1317 
1318 	ifc = if_clone_lookup(name, &unit);
1319 	if (ifc == NULL)
1320 		return (EINVAL);
1321 
1322 	rw_enter_write(&if_cloners_lock);
1323 
1324 	if ((ifp = if_unit(name)) != NULL) {
1325 		ret = EEXIST;
1326 		goto unlock;
1327 	}
1328 
1329 	ret = (*ifc->ifc_create)(ifc, unit);
1330 
1331 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1332 		goto unlock;
1333 
1334 	NET_LOCK();
1335 	if_addgroup(ifp, ifc->ifc_name);
1336 	if (rdomain != 0)
1337 		if_setrdomain(ifp, rdomain);
1338 	NET_UNLOCK();
1339 unlock:
1340 	rw_exit_write(&if_cloners_lock);
1341 	if_put(ifp);
1342 
1343 	return (ret);
1344 }
1345 
1346 /*
1347  * Destroy a clone network interface.
1348  */
1349 int
if_clone_destroy(const char * name)1350 if_clone_destroy(const char *name)
1351 {
1352 	struct if_clone *ifc;
1353 	struct ifnet *ifp;
1354 	int ret;
1355 
1356 	ifc = if_clone_lookup(name, NULL);
1357 	if (ifc == NULL)
1358 		return (EINVAL);
1359 
1360 	if (ifc->ifc_destroy == NULL)
1361 		return (EOPNOTSUPP);
1362 
1363 	rw_enter_write(&if_cloners_lock);
1364 
1365 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1366 		if (strcmp(ifp->if_xname, name) == 0)
1367 			break;
1368 	}
1369 	if (ifp == NULL) {
1370 		rw_exit_write(&if_cloners_lock);
1371 		return (ENXIO);
1372 	}
1373 
1374 	NET_LOCK();
1375 	if (ifp->if_flags & IFF_UP) {
1376 		int s;
1377 		s = splnet();
1378 		if_down(ifp);
1379 		splx(s);
1380 	}
1381 	NET_UNLOCK();
1382 	ret = (*ifc->ifc_destroy)(ifp);
1383 
1384 	rw_exit_write(&if_cloners_lock);
1385 
1386 	return (ret);
1387 }
1388 
1389 /*
1390  * Look up a network interface cloner.
1391  */
1392 struct if_clone *
if_clone_lookup(const char * name,int * unitp)1393 if_clone_lookup(const char *name, int *unitp)
1394 {
1395 	struct if_clone *ifc;
1396 	const char *cp;
1397 	int unit;
1398 
1399 	/* separate interface name from unit */
1400 	for (cp = name;
1401 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1402 	    cp++)
1403 		continue;
1404 
1405 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1406 		return (NULL);	/* No name or unit number */
1407 
1408 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1409 		return (NULL);	/* unit number 0 padded */
1410 
1411 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1412 		if (strlen(ifc->ifc_name) == cp - name &&
1413 		    !strncmp(name, ifc->ifc_name, cp - name))
1414 			break;
1415 	}
1416 
1417 	if (ifc == NULL)
1418 		return (NULL);
1419 
1420 	unit = 0;
1421 	while (cp - name < IFNAMSIZ && *cp) {
1422 		if (*cp < '0' || *cp > '9' ||
1423 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1424 			/* Bogus unit number. */
1425 			return (NULL);
1426 		}
1427 		unit = (unit * 10) + (*cp++ - '0');
1428 	}
1429 
1430 	if (unitp != NULL)
1431 		*unitp = unit;
1432 	return (ifc);
1433 }
1434 
1435 /*
1436  * Register a network interface cloner.
1437  */
1438 void
if_clone_attach(struct if_clone * ifc)1439 if_clone_attach(struct if_clone *ifc)
1440 {
1441 	/*
1442 	 * we are called at kernel boot by main(), when pseudo devices are
1443 	 * being attached. The main() is the only guy which may alter the
1444 	 * if_cloners. While system is running and main() is done with
1445 	 * initialization, the if_cloners becomes immutable.
1446 	 */
1447 	KASSERT(pdevinit_done == 0);
1448 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1449 	if_cloners_count++;
1450 }
1451 
1452 /*
1453  * Provide list of interface cloners to userspace.
1454  */
1455 int
if_clone_list(struct if_clonereq * ifcr)1456 if_clone_list(struct if_clonereq *ifcr)
1457 {
1458 	char outbuf[IFNAMSIZ], *dst;
1459 	struct if_clone *ifc;
1460 	int count, error = 0;
1461 
1462 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1463 		/* Just asking how many there are. */
1464 		ifcr->ifcr_total = if_cloners_count;
1465 		return (0);
1466 	}
1467 
1468 	if (ifcr->ifcr_count < 0)
1469 		return (EINVAL);
1470 
1471 	ifcr->ifcr_total = if_cloners_count;
1472 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1473 
1474 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1475 		if (count == 0)
1476 			break;
1477 		bzero(outbuf, sizeof outbuf);
1478 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1479 		error = copyout(outbuf, dst, IFNAMSIZ);
1480 		if (error)
1481 			break;
1482 		count--;
1483 		dst += IFNAMSIZ;
1484 	}
1485 
1486 	return (error);
1487 }
1488 
1489 /*
1490  * set queue congestion marker
1491  */
1492 void
if_congestion(void)1493 if_congestion(void)
1494 {
1495 	extern int ticks;
1496 
1497 	ifq_congestion = ticks;
1498 }
1499 
1500 int
if_congested(void)1501 if_congested(void)
1502 {
1503 	extern int ticks;
1504 	int diff;
1505 
1506 	diff = ticks - ifq_congestion;
1507 	if (diff < 0) {
1508 		ifq_congestion = ticks - hz;
1509 		return (0);
1510 	}
1511 
1512 	return (diff <= (hz / 100));
1513 }
1514 
1515 #define	equal(a1, a2)	\
1516 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1517 	(a1)->sa_len) == 0)
1518 
1519 /*
1520  * Locate an interface based on a complete address.
1521  */
1522 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr,u_int rtableid)1523 ifa_ifwithaddr(const struct sockaddr *addr, u_int rtableid)
1524 {
1525 	struct ifnet *ifp;
1526 	struct ifaddr *ifa;
1527 	u_int rdomain;
1528 
1529 	NET_ASSERT_LOCKED();
1530 
1531 	rdomain = rtable_l2(rtableid);
1532 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1533 		if (ifp->if_rdomain != rdomain)
1534 			continue;
1535 
1536 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1537 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1538 				continue;
1539 
1540 			if (equal(addr, ifa->ifa_addr)) {
1541 				return (ifa);
1542 			}
1543 		}
1544 	}
1545 	return (NULL);
1546 }
1547 
1548 /*
1549  * Locate the point to point interface with a given destination address.
1550  */
1551 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr,u_int rdomain)1552 ifa_ifwithdstaddr(const struct sockaddr *addr, u_int rdomain)
1553 {
1554 	struct ifnet *ifp;
1555 	struct ifaddr *ifa;
1556 
1557 	NET_ASSERT_LOCKED();
1558 
1559 	rdomain = rtable_l2(rdomain);
1560 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1561 		if (ifp->if_rdomain != rdomain)
1562 			continue;
1563 		if (ifp->if_flags & IFF_POINTOPOINT) {
1564 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1565 				if (ifa->ifa_addr->sa_family !=
1566 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1567 					continue;
1568 				if (equal(addr, ifa->ifa_dstaddr)) {
1569 					return (ifa);
1570 				}
1571 			}
1572 		}
1573 	}
1574 	return (NULL);
1575 }
1576 
1577 /*
1578  * Find an interface address specific to an interface best matching
1579  * a given address.
1580  */
1581 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)1582 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1583 {
1584 	struct ifaddr *ifa;
1585 	const char *cp, *cp2, *cp3;
1586 	char *cplim;
1587 	struct ifaddr *ifa_maybe = NULL;
1588 	u_int af = addr->sa_family;
1589 
1590 	if (af >= AF_MAX)
1591 		return (NULL);
1592 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1593 		if (ifa->ifa_addr->sa_family != af)
1594 			continue;
1595 		if (ifa_maybe == NULL)
1596 			ifa_maybe = ifa;
1597 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1598 			if (equal(addr, ifa->ifa_addr) ||
1599 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1600 				return (ifa);
1601 			continue;
1602 		}
1603 		cp = addr->sa_data;
1604 		cp2 = ifa->ifa_addr->sa_data;
1605 		cp3 = ifa->ifa_netmask->sa_data;
1606 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1607 		for (; cp3 < cplim; cp3++)
1608 			if ((*cp++ ^ *cp2++) & *cp3)
1609 				break;
1610 		if (cp3 == cplim)
1611 			return (ifa);
1612 	}
1613 	return (ifa_maybe);
1614 }
1615 
1616 void
if_rtrequest_dummy(struct ifnet * ifp,int req,struct rtentry * rt)1617 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1618 {
1619 }
1620 
1621 /*
1622  * Default action when installing a local route on a point-to-point
1623  * interface.
1624  */
1625 void
p2p_rtrequest(struct ifnet * ifp,int req,struct rtentry * rt)1626 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1627 {
1628 	struct ifnet *lo0ifp;
1629 	struct ifaddr *ifa, *lo0ifa;
1630 
1631 	switch (req) {
1632 	case RTM_ADD:
1633 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1634 			break;
1635 
1636 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1637 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1638 			    rt_key(rt)->sa_len) == 0)
1639 				break;
1640 		}
1641 
1642 		if (ifa == NULL)
1643 			break;
1644 
1645 		KASSERT(ifa == rt->rt_ifa);
1646 
1647 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1648 		KASSERT(lo0ifp != NULL);
1649 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1650 			if (lo0ifa->ifa_addr->sa_family ==
1651 			    ifa->ifa_addr->sa_family)
1652 				break;
1653 		}
1654 		if_put(lo0ifp);
1655 
1656 		if (lo0ifa == NULL)
1657 			break;
1658 
1659 		rt->rt_flags &= ~RTF_LLINFO;
1660 		break;
1661 	case RTM_DELETE:
1662 	case RTM_RESOLVE:
1663 	default:
1664 		break;
1665 	}
1666 }
1667 
1668 int
p2p_bpf_mtap(caddr_t if_bpf,const struct mbuf * m,u_int dir)1669 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1670 {
1671 #if NBPFILTER > 0
1672 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1673 #else
1674 	return (0);
1675 #endif
1676 }
1677 
1678 void
p2p_input(struct ifnet * ifp,struct mbuf * m)1679 p2p_input(struct ifnet *ifp, struct mbuf *m)
1680 {
1681 	void (*input)(struct ifnet *, struct mbuf *);
1682 
1683 	switch (m->m_pkthdr.ph_family) {
1684 	case AF_INET:
1685 		input = ipv4_input;
1686 		break;
1687 #ifdef INET6
1688 	case AF_INET6:
1689 		input = ipv6_input;
1690 		break;
1691 #endif
1692 #ifdef MPLS
1693 	case AF_MPLS:
1694 		input = mpls_input;
1695 		break;
1696 #endif
1697 	default:
1698 		m_freem(m);
1699 		return;
1700 	}
1701 
1702 	(*input)(ifp, m);
1703 }
1704 
1705 /*
1706  * Bring down all interfaces
1707  */
1708 void
if_downall(void)1709 if_downall(void)
1710 {
1711 	struct ifreq ifrq;	/* XXX only partly built */
1712 	struct ifnet *ifp;
1713 
1714 	NET_LOCK();
1715 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1716 		if ((ifp->if_flags & IFF_UP) == 0)
1717 			continue;
1718 		if_down(ifp);
1719 		ifrq.ifr_flags = ifp->if_flags;
1720 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1721 	}
1722 	NET_UNLOCK();
1723 }
1724 
1725 /*
1726  * Mark an interface down and notify protocols of
1727  * the transition.
1728  */
1729 void
if_down(struct ifnet * ifp)1730 if_down(struct ifnet *ifp)
1731 {
1732 	NET_ASSERT_LOCKED();
1733 
1734 	ifp->if_flags &= ~IFF_UP;
1735 	getmicrotime(&ifp->if_lastchange);
1736 	ifq_purge(&ifp->if_snd);
1737 
1738 	if_linkstate(ifp);
1739 }
1740 
1741 /*
1742  * Mark an interface up and notify protocols of
1743  * the transition.
1744  */
1745 void
if_up(struct ifnet * ifp)1746 if_up(struct ifnet *ifp)
1747 {
1748 	NET_ASSERT_LOCKED();
1749 
1750 	ifp->if_flags |= IFF_UP;
1751 	getmicrotime(&ifp->if_lastchange);
1752 
1753 #ifdef INET6
1754 	/* Userland expects the kernel to set ::1 on default lo(4). */
1755 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1756 		in6_ifattach(ifp);
1757 #endif
1758 
1759 	if_linkstate(ifp);
1760 }
1761 
1762 /*
1763  * Notify userland, the routing table and hooks owner of
1764  * a link-state transition.
1765  */
1766 void
if_linkstate_task(void * xifidx)1767 if_linkstate_task(void *xifidx)
1768 {
1769 	unsigned int ifidx = (unsigned long)xifidx;
1770 	struct ifnet *ifp;
1771 
1772 	NET_LOCK();
1773 	KERNEL_LOCK();
1774 
1775 	ifp = if_get(ifidx);
1776 	if (ifp != NULL)
1777 		if_linkstate(ifp);
1778 	if_put(ifp);
1779 
1780 	KERNEL_UNLOCK();
1781 	NET_UNLOCK();
1782 }
1783 
1784 void
if_linkstate(struct ifnet * ifp)1785 if_linkstate(struct ifnet *ifp)
1786 {
1787 	NET_ASSERT_LOCKED();
1788 
1789 	if (panicstr == NULL) {
1790 		rtm_ifchg(ifp);
1791 		rt_if_track(ifp);
1792 	}
1793 
1794 	if_hooks_run(&ifp->if_linkstatehooks);
1795 }
1796 
1797 void
if_linkstatehook_add(struct ifnet * ifp,struct task * t)1798 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1799 {
1800 	mtx_enter(&if_hooks_mtx);
1801 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1802 	mtx_leave(&if_hooks_mtx);
1803 }
1804 
1805 void
if_linkstatehook_del(struct ifnet * ifp,struct task * t)1806 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1807 {
1808 	mtx_enter(&if_hooks_mtx);
1809 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1810 	mtx_leave(&if_hooks_mtx);
1811 }
1812 
1813 /*
1814  * Schedule a link state change task.
1815  */
1816 void
if_link_state_change(struct ifnet * ifp)1817 if_link_state_change(struct ifnet *ifp)
1818 {
1819 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1820 }
1821 
1822 /*
1823  * Handle interface watchdog timer routine.  Called
1824  * from softclock, we decrement timer (if set) and
1825  * call the appropriate interface routine on expiration.
1826  */
1827 void
if_slowtimo(void * arg)1828 if_slowtimo(void *arg)
1829 {
1830 	struct ifnet *ifp = arg;
1831 	int s = splnet();
1832 
1833 	if (ifp->if_watchdog) {
1834 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1835 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1836 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1837 	}
1838 	splx(s);
1839 }
1840 
1841 void
if_watchdog_task(void * xifidx)1842 if_watchdog_task(void *xifidx)
1843 {
1844 	unsigned int ifidx = (unsigned long)xifidx;
1845 	struct ifnet *ifp;
1846 	int s;
1847 
1848 	ifp = if_get(ifidx);
1849 	if (ifp == NULL)
1850 		return;
1851 
1852 	KERNEL_LOCK();
1853 	s = splnet();
1854 	if (ifp->if_watchdog)
1855 		(*ifp->if_watchdog)(ifp);
1856 	splx(s);
1857 	KERNEL_UNLOCK();
1858 
1859 	if_put(ifp);
1860 }
1861 
1862 /*
1863  * Map interface name to interface structure pointer.
1864  */
1865 struct ifnet *
if_unit(const char * name)1866 if_unit(const char *name)
1867 {
1868 	struct ifnet *ifp;
1869 
1870 	KERNEL_ASSERT_LOCKED();
1871 
1872 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1873 		if (strcmp(ifp->if_xname, name) == 0) {
1874 			if_ref(ifp);
1875 			return (ifp);
1876 		}
1877 	}
1878 
1879 	return (NULL);
1880 }
1881 
1882 /*
1883  * Map interface index to interface structure pointer.
1884  */
1885 struct ifnet *
if_get(unsigned int index)1886 if_get(unsigned int index)
1887 {
1888 	struct ifnet **if_map;
1889 	struct ifnet *ifp = NULL;
1890 
1891 	if (index == 0)
1892 		return (NULL);
1893 
1894 	smr_read_enter();
1895 	if_map = SMR_PTR_GET(&if_idxmap.map);
1896 	if (index < if_idxmap_limit(if_map)) {
1897 		ifp = SMR_PTR_GET(&if_map[index]);
1898 		if (ifp != NULL) {
1899 			KASSERT(ifp->if_index == index);
1900 			if_ref(ifp);
1901 		}
1902 	}
1903 	smr_read_leave();
1904 
1905 	return (ifp);
1906 }
1907 
1908 struct ifnet *
if_ref(struct ifnet * ifp)1909 if_ref(struct ifnet *ifp)
1910 {
1911 	refcnt_take(&ifp->if_refcnt);
1912 
1913 	return (ifp);
1914 }
1915 
1916 void
if_put(struct ifnet * ifp)1917 if_put(struct ifnet *ifp)
1918 {
1919 	if (ifp == NULL)
1920 		return;
1921 
1922 	refcnt_rele_wake(&ifp->if_refcnt);
1923 }
1924 
1925 int
if_setlladdr(struct ifnet * ifp,const uint8_t * lladdr)1926 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1927 {
1928 	if (ifp->if_sadl == NULL)
1929 		return (EINVAL);
1930 
1931 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1932 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1933 
1934 	return (0);
1935 }
1936 
1937 int
if_createrdomain(int rdomain,struct ifnet * ifp)1938 if_createrdomain(int rdomain, struct ifnet *ifp)
1939 {
1940 	int error;
1941 	struct ifnet *loifp;
1942 	char loifname[IFNAMSIZ];
1943 	unsigned int unit = rdomain;
1944 
1945 	if ((error = rtable_add(rdomain)) != 0)
1946 		return (error);
1947 	if (!rtable_empty(rdomain))
1948 		return (EEXIST);
1949 
1950 	/* Create rdomain including its loopback if with unit == rdomain */
1951 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1952 	error = if_clone_create(loifname, 0);
1953 	if ((loifp = if_unit(loifname)) == NULL)
1954 		return (ENXIO);
1955 	if (error && (ifp != loifp || error != EEXIST)) {
1956 		if_put(loifp);
1957 		return (error);
1958 	}
1959 
1960 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1961 	loifp->if_rdomain = rdomain;
1962 	if_put(loifp);
1963 
1964 	return (0);
1965 }
1966 
1967 int
if_setrdomain(struct ifnet * ifp,int rdomain)1968 if_setrdomain(struct ifnet *ifp, int rdomain)
1969 {
1970 	struct ifreq ifr;
1971 	int error, up = 0, s;
1972 
1973 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1974 		return (EINVAL);
1975 
1976 	if (rdomain != ifp->if_rdomain &&
1977 	    (ifp->if_flags & IFF_LOOPBACK) &&
1978 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1979 		return (EPERM);
1980 
1981 	if (!rtable_exists(rdomain))
1982 		return (ESRCH);
1983 
1984 	/* make sure that the routing table is a real rdomain */
1985 	if (rdomain != rtable_l2(rdomain))
1986 		return (EINVAL);
1987 
1988 	if (rdomain != ifp->if_rdomain) {
1989 		s = splnet();
1990 		/*
1991 		 * We are tearing down the world.
1992 		 * Take down the IF so:
1993 		 * 1. everything that cares gets a message
1994 		 * 2. the automagic IPv6 bits are recreated
1995 		 */
1996 		if (ifp->if_flags & IFF_UP) {
1997 			up = 1;
1998 			if_down(ifp);
1999 		}
2000 		rti_delete(ifp);
2001 #ifdef MROUTING
2002 		vif_delete(ifp);
2003 #endif
2004 		in_ifdetach(ifp);
2005 #ifdef INET6
2006 		in6_ifdetach(ifp);
2007 #endif
2008 		splx(s);
2009 	}
2010 
2011 	/* Let devices like enc(4) or mpe(4) know about the change */
2012 	ifr.ifr_rdomainid = rdomain;
2013 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
2014 	    (caddr_t)&ifr)) != ENOTTY)
2015 		return (error);
2016 	error = 0;
2017 
2018 	/* Add interface to the specified rdomain */
2019 	ifp->if_rdomain = rdomain;
2020 
2021 	/* If we took down the IF, bring it back */
2022 	if (up) {
2023 		s = splnet();
2024 		if_up(ifp);
2025 		splx(s);
2026 	}
2027 
2028 	return (0);
2029 }
2030 
2031 /*
2032  * Interface ioctls.
2033  */
2034 int
ifioctl(struct socket * so,u_long cmd,caddr_t data,struct proc * p)2035 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
2036 {
2037 	struct ifnet *ifp;
2038 	struct ifreq *ifr = (struct ifreq *)data;
2039 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
2040 	struct if_afreq *ifar = (struct if_afreq *)data;
2041 	char ifdescrbuf[IFDESCRSIZE];
2042 	char ifrtlabelbuf[RTLABEL_LEN];
2043 	int s, error = 0, oif_xflags;
2044 	size_t bytesdone;
2045 	unsigned short oif_flags;
2046 
2047 	switch (cmd) {
2048 	case SIOCIFCREATE:
2049 		if ((error = suser(p)) != 0)
2050 			return (error);
2051 		KERNEL_LOCK();
2052 		error = if_clone_create(ifr->ifr_name, 0);
2053 		KERNEL_UNLOCK();
2054 		return (error);
2055 	case SIOCIFDESTROY:
2056 		if ((error = suser(p)) != 0)
2057 			return (error);
2058 		KERNEL_LOCK();
2059 		error = if_clone_destroy(ifr->ifr_name);
2060 		KERNEL_UNLOCK();
2061 		return (error);
2062 	case SIOCSIFGATTR:
2063 		if ((error = suser(p)) != 0)
2064 			return (error);
2065 		KERNEL_LOCK();
2066 		NET_LOCK();
2067 		error = if_setgroupattribs(data);
2068 		NET_UNLOCK();
2069 		KERNEL_UNLOCK();
2070 		return (error);
2071 	case SIOCGIFCONF:
2072 	case SIOCIFGCLONERS:
2073 	case SIOCGIFGMEMB:
2074 	case SIOCGIFGATTR:
2075 	case SIOCGIFGLIST:
2076 	case SIOCGIFFLAGS:
2077 	case SIOCGIFXFLAGS:
2078 	case SIOCGIFMETRIC:
2079 	case SIOCGIFMTU:
2080 	case SIOCGIFHARDMTU:
2081 	case SIOCGIFDATA:
2082 	case SIOCGIFDESCR:
2083 	case SIOCGIFRTLABEL:
2084 	case SIOCGIFPRIORITY:
2085 	case SIOCGIFRDOMAIN:
2086 	case SIOCGIFGROUP:
2087 	case SIOCGIFLLPRIO:
2088 		error = ifioctl_get(cmd, data);
2089 		return (error);
2090 	}
2091 
2092 	KERNEL_LOCK();
2093 
2094 	ifp = if_unit(ifr->ifr_name);
2095 	if (ifp == NULL) {
2096 		KERNEL_UNLOCK();
2097 		return (ENXIO);
2098 	}
2099 	oif_flags = ifp->if_flags;
2100 	oif_xflags = ifp->if_xflags;
2101 
2102 	switch (cmd) {
2103 	case SIOCIFAFATTACH:
2104 	case SIOCIFAFDETACH:
2105 		if ((error = suser(p)) != 0)
2106 			break;
2107 		NET_LOCK();
2108 		switch (ifar->ifar_af) {
2109 		case AF_INET:
2110 			/* attach is a noop for AF_INET */
2111 			if (cmd == SIOCIFAFDETACH)
2112 				in_ifdetach(ifp);
2113 			break;
2114 #ifdef INET6
2115 		case AF_INET6:
2116 			if (cmd == SIOCIFAFATTACH)
2117 				error = in6_ifattach(ifp);
2118 			else
2119 				in6_ifdetach(ifp);
2120 			break;
2121 #endif /* INET6 */
2122 		default:
2123 			error = EAFNOSUPPORT;
2124 		}
2125 		NET_UNLOCK();
2126 		break;
2127 
2128 	case SIOCSIFXFLAGS:
2129 		if ((error = suser(p)) != 0)
2130 			break;
2131 
2132 		NET_LOCK();
2133 #ifdef INET6
2134 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2135 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2136 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2137 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2138 			error = in6_ifattach(ifp);
2139 			if (error != 0) {
2140 				NET_UNLOCK();
2141 				break;
2142 			}
2143 		}
2144 
2145 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2146 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2147 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2148 
2149 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2150 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2151 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2152 
2153 #endif	/* INET6 */
2154 
2155 #ifdef MPLS
2156 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2157 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2158 			s = splnet();
2159 			ifp->if_xflags |= IFXF_MPLS;
2160 			ifp->if_ll_output = ifp->if_output;
2161 			ifp->if_output = mpls_output;
2162 			splx(s);
2163 		}
2164 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2165 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2166 			s = splnet();
2167 			ifp->if_xflags &= ~IFXF_MPLS;
2168 			ifp->if_output = ifp->if_ll_output;
2169 			ifp->if_ll_output = NULL;
2170 			splx(s);
2171 		}
2172 #endif	/* MPLS */
2173 
2174 #ifndef SMALL_KERNEL
2175 		if (ifp->if_capabilities & IFCAP_WOL) {
2176 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2177 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2178 				s = splnet();
2179 				ifp->if_xflags |= IFXF_WOL;
2180 				error = ifp->if_wol(ifp, 1);
2181 				splx(s);
2182 			}
2183 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2184 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2185 				s = splnet();
2186 				ifp->if_xflags &= ~IFXF_WOL;
2187 				error = ifp->if_wol(ifp, 0);
2188 				splx(s);
2189 			}
2190 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2191 			ifr->ifr_flags &= ~IFXF_WOL;
2192 			error = ENOTSUP;
2193 		}
2194 #endif
2195 		if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2196 		    ISSET(ifp->if_xflags, IFXF_LRO))
2197 			error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2198 
2199 		if (error == 0)
2200 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2201 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2202 
2203 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2204 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2205 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2206 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2207 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2208 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2209 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2210 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2211 			goto forceup;
2212 		}
2213 
2214 		NET_UNLOCK();
2215 		break;
2216 
2217 	case SIOCSIFFLAGS:
2218 		if ((error = suser(p)) != 0)
2219 			break;
2220 
2221 		NET_LOCK();
2222 forceup:
2223 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2224 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2225 		error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2226 		if (error != 0) {
2227 			ifp->if_flags = oif_flags;
2228 			if (cmd == SIOCSIFXFLAGS)
2229 				ifp->if_xflags = oif_xflags;
2230 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2231 			s = splnet();
2232 			if (ISSET(ifp->if_flags, IFF_UP))
2233 				if_up(ifp);
2234 			else
2235 				if_down(ifp);
2236 			splx(s);
2237 		}
2238 		NET_UNLOCK();
2239 		break;
2240 
2241 	case SIOCSIFMETRIC:
2242 		if ((error = suser(p)) != 0)
2243 			break;
2244 		NET_LOCK();
2245 		ifp->if_metric = ifr->ifr_metric;
2246 		NET_UNLOCK();
2247 		break;
2248 
2249 	case SIOCSIFMTU:
2250 		if ((error = suser(p)) != 0)
2251 			break;
2252 		NET_LOCK();
2253 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2254 		NET_UNLOCK();
2255 		if (error == 0)
2256 			rtm_ifchg(ifp);
2257 		break;
2258 
2259 	case SIOCSIFDESCR:
2260 		if ((error = suser(p)) != 0)
2261 			break;
2262 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2263 		    IFDESCRSIZE, &bytesdone);
2264 		if (error == 0) {
2265 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2266 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2267 		}
2268 		break;
2269 
2270 	case SIOCSIFRTLABEL:
2271 		if ((error = suser(p)) != 0)
2272 			break;
2273 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2274 		    RTLABEL_LEN, &bytesdone);
2275 		if (error == 0) {
2276 			rtlabel_unref(ifp->if_rtlabelid);
2277 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2278 		}
2279 		break;
2280 
2281 	case SIOCSIFPRIORITY:
2282 		if ((error = suser(p)) != 0)
2283 			break;
2284 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2285 			error = EINVAL;
2286 			break;
2287 		}
2288 		ifp->if_priority = ifr->ifr_metric;
2289 		break;
2290 
2291 	case SIOCSIFRDOMAIN:
2292 		if ((error = suser(p)) != 0)
2293 			break;
2294 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2295 		if (!error || error == EEXIST) {
2296 			NET_LOCK();
2297 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2298 			NET_UNLOCK();
2299 		}
2300 		break;
2301 
2302 	case SIOCAIFGROUP:
2303 		if ((error = suser(p)))
2304 			break;
2305 		NET_LOCK();
2306 		error = if_addgroup(ifp, ifgr->ifgr_group);
2307 		if (error == 0) {
2308 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2309 			if (error == ENOTTY)
2310 				error = 0;
2311 		}
2312 		NET_UNLOCK();
2313 		break;
2314 
2315 	case SIOCDIFGROUP:
2316 		if ((error = suser(p)))
2317 			break;
2318 		NET_LOCK();
2319 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2320 		if (error == ENOTTY)
2321 			error = 0;
2322 		if (error == 0)
2323 			error = if_delgroup(ifp, ifgr->ifgr_group);
2324 		NET_UNLOCK();
2325 		break;
2326 
2327 	case SIOCSIFLLADDR:
2328 		if ((error = suser(p)))
2329 			break;
2330 		if ((ifp->if_sadl == NULL) ||
2331 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2332 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2333 			error = EINVAL;
2334 			break;
2335 		}
2336 		NET_LOCK();
2337 		switch (ifp->if_type) {
2338 		case IFT_ETHER:
2339 		case IFT_CARP:
2340 		case IFT_XETHER:
2341 		case IFT_ISO88025:
2342 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2343 			if (error == ENOTTY)
2344 				error = 0;
2345 			if (error == 0)
2346 				error = if_setlladdr(ifp,
2347 				    ifr->ifr_addr.sa_data);
2348 			break;
2349 		default:
2350 			error = ENODEV;
2351 		}
2352 
2353 		if (error == 0)
2354 			ifnewlladdr(ifp);
2355 		NET_UNLOCK();
2356 		if (error == 0)
2357 			rtm_ifchg(ifp);
2358 		break;
2359 
2360 	case SIOCSIFLLPRIO:
2361 		if ((error = suser(p)))
2362 			break;
2363 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2364 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2365 			error = EINVAL;
2366 			break;
2367 		}
2368 		NET_LOCK();
2369 		ifp->if_llprio = ifr->ifr_llprio;
2370 		NET_UNLOCK();
2371 		break;
2372 
2373 	case SIOCGIFSFFPAGE:
2374 		error = suser(p);
2375 		if (error != 0)
2376 			break;
2377 
2378 		error = if_sffpage_check(data);
2379 		if (error != 0)
2380 			break;
2381 
2382 		/* don't take NET_LOCK because i2c reads take a long time */
2383 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2384 		break;
2385 
2386 	case SIOCSIFMEDIA:
2387 		if ((error = suser(p)) != 0)
2388 			break;
2389 		/* FALLTHROUGH */
2390 	case SIOCGIFMEDIA:
2391 		/* net lock is not needed */
2392 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2393 		break;
2394 
2395 	case SIOCSETKALIVE:
2396 	case SIOCDIFPHYADDR:
2397 	case SIOCSLIFPHYADDR:
2398 	case SIOCSLIFPHYRTABLE:
2399 	case SIOCSLIFPHYTTL:
2400 	case SIOCSLIFPHYDF:
2401 	case SIOCSLIFPHYECN:
2402 	case SIOCADDMULTI:
2403 	case SIOCDELMULTI:
2404 	case SIOCSVNETID:
2405 	case SIOCDVNETID:
2406 	case SIOCSVNETFLOWID:
2407 	case SIOCSTXHPRIO:
2408 	case SIOCSRXHPRIO:
2409 	case SIOCSIFPAIR:
2410 	case SIOCSIFPARENT:
2411 	case SIOCDIFPARENT:
2412 	case SIOCSETMPWCFG:
2413 	case SIOCSETLABEL:
2414 	case SIOCDELLABEL:
2415 	case SIOCSPWE3CTRLWORD:
2416 	case SIOCSPWE3FAT:
2417 	case SIOCSPWE3NEIGHBOR:
2418 	case SIOCDPWE3NEIGHBOR:
2419 #if NBRIDGE > 0
2420 	case SIOCBRDGADD:
2421 	case SIOCBRDGDEL:
2422 	case SIOCBRDGSIFFLGS:
2423 	case SIOCBRDGSCACHE:
2424 	case SIOCBRDGADDS:
2425 	case SIOCBRDGDELS:
2426 	case SIOCBRDGSADDR:
2427 	case SIOCBRDGSTO:
2428 	case SIOCBRDGDADDR:
2429 	case SIOCBRDGFLUSH:
2430 	case SIOCBRDGADDL:
2431 	case SIOCBRDGSIFPROT:
2432 	case SIOCBRDGARL:
2433 	case SIOCBRDGFRL:
2434 	case SIOCBRDGSPRI:
2435 	case SIOCBRDGSHT:
2436 	case SIOCBRDGSFD:
2437 	case SIOCBRDGSMA:
2438 	case SIOCBRDGSIFPRIO:
2439 	case SIOCBRDGSIFCOST:
2440 	case SIOCBRDGSTXHC:
2441 	case SIOCBRDGSPROTO:
2442 #endif
2443 		if ((error = suser(p)) != 0)
2444 			break;
2445 		/* FALLTHROUGH */
2446 	default:
2447 		error = pru_control(so, cmd, data, ifp);
2448 		if (error != EOPNOTSUPP)
2449 			break;
2450 		switch (cmd) {
2451 		case SIOCAIFADDR:
2452 		case SIOCDIFADDR:
2453 		case SIOCSIFADDR:
2454 		case SIOCSIFNETMASK:
2455 		case SIOCSIFDSTADDR:
2456 		case SIOCSIFBRDADDR:
2457 #ifdef INET6
2458 		case SIOCAIFADDR_IN6:
2459 		case SIOCDIFADDR_IN6:
2460 #endif
2461 			error = suser(p);
2462 			break;
2463 		default:
2464 			error = 0;
2465 			break;
2466 		}
2467 		if (error)
2468 			break;
2469 		NET_LOCK();
2470 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2471 		NET_UNLOCK();
2472 		break;
2473 	}
2474 
2475 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2476 		/* if_up() and if_down() already sent an update, skip here */
2477 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2478 			rtm_ifchg(ifp);
2479 	}
2480 
2481 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2482 		getmicrotime(&ifp->if_lastchange);
2483 
2484 	KERNEL_UNLOCK();
2485 
2486 	if_put(ifp);
2487 
2488 	return (error);
2489 }
2490 
2491 int
ifioctl_get(u_long cmd,caddr_t data)2492 ifioctl_get(u_long cmd, caddr_t data)
2493 {
2494 	struct ifnet *ifp;
2495 	struct ifreq *ifr = (struct ifreq *)data;
2496 	char ifdescrbuf[IFDESCRSIZE];
2497 	char ifrtlabelbuf[RTLABEL_LEN];
2498 	int error = 0;
2499 	size_t bytesdone;
2500 
2501 	switch(cmd) {
2502 	case SIOCGIFCONF:
2503 		NET_LOCK_SHARED();
2504 		error = ifconf(data);
2505 		NET_UNLOCK_SHARED();
2506 		return (error);
2507 	case SIOCIFGCLONERS:
2508 		error = if_clone_list((struct if_clonereq *)data);
2509 		return (error);
2510 	case SIOCGIFGMEMB:
2511 		NET_LOCK_SHARED();
2512 		error = if_getgroupmembers(data);
2513 		NET_UNLOCK_SHARED();
2514 		return (error);
2515 	case SIOCGIFGATTR:
2516 		NET_LOCK_SHARED();
2517 		error = if_getgroupattribs(data);
2518 		NET_UNLOCK_SHARED();
2519 		return (error);
2520 	case SIOCGIFGLIST:
2521 		NET_LOCK_SHARED();
2522 		error = if_getgrouplist(data);
2523 		NET_UNLOCK_SHARED();
2524 		return (error);
2525 	}
2526 
2527 	KERNEL_LOCK();
2528 
2529 	ifp = if_unit(ifr->ifr_name);
2530 	if (ifp == NULL) {
2531 		KERNEL_UNLOCK();
2532 		return (ENXIO);
2533 	}
2534 
2535 	NET_LOCK_SHARED();
2536 
2537 	switch(cmd) {
2538 	case SIOCGIFFLAGS:
2539 		ifr->ifr_flags = ifp->if_flags;
2540 		if (ifq_is_oactive(&ifp->if_snd))
2541 			ifr->ifr_flags |= IFF_OACTIVE;
2542 		break;
2543 
2544 	case SIOCGIFXFLAGS:
2545 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2546 		break;
2547 
2548 	case SIOCGIFMETRIC:
2549 		ifr->ifr_metric = ifp->if_metric;
2550 		break;
2551 
2552 	case SIOCGIFMTU:
2553 		ifr->ifr_mtu = ifp->if_mtu;
2554 		break;
2555 
2556 	case SIOCGIFHARDMTU:
2557 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2558 		break;
2559 
2560 	case SIOCGIFDATA: {
2561 		struct if_data ifdata;
2562 		if_getdata(ifp, &ifdata);
2563 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2564 		break;
2565 	}
2566 
2567 	case SIOCGIFDESCR:
2568 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2569 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2570 		    &bytesdone);
2571 		break;
2572 
2573 	case SIOCGIFRTLABEL:
2574 		if (ifp->if_rtlabelid && rtlabel_id2name(ifp->if_rtlabelid,
2575 		    ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2576 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2577 			    RTLABEL_LEN, &bytesdone);
2578 		} else
2579 			error = ENOENT;
2580 		break;
2581 
2582 	case SIOCGIFPRIORITY:
2583 		ifr->ifr_metric = ifp->if_priority;
2584 		break;
2585 
2586 	case SIOCGIFRDOMAIN:
2587 		ifr->ifr_rdomainid = ifp->if_rdomain;
2588 		break;
2589 
2590 	case SIOCGIFGROUP:
2591 		error = if_getgroup(data, ifp);
2592 		break;
2593 
2594 	case SIOCGIFLLPRIO:
2595 		ifr->ifr_llprio = ifp->if_llprio;
2596 		break;
2597 
2598 	default:
2599 		panic("invalid ioctl %lu", cmd);
2600 	}
2601 
2602 	NET_UNLOCK_SHARED();
2603 
2604 	KERNEL_UNLOCK();
2605 
2606 	if_put(ifp);
2607 
2608 	return (error);
2609 }
2610 
2611 static int
if_sffpage_check(const caddr_t data)2612 if_sffpage_check(const caddr_t data)
2613 {
2614 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2615 
2616 	switch (sff->sff_addr) {
2617 	case IFSFF_ADDR_EEPROM:
2618 	case IFSFF_ADDR_DDM:
2619 		break;
2620 	default:
2621 		return (EINVAL);
2622 	}
2623 
2624 	return (0);
2625 }
2626 
2627 int
if_txhprio_l2_check(int hdrprio)2628 if_txhprio_l2_check(int hdrprio)
2629 {
2630 	switch (hdrprio) {
2631 	case IF_HDRPRIO_PACKET:
2632 		return (0);
2633 	default:
2634 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2635 			return (0);
2636 		break;
2637 	}
2638 
2639 	return (EINVAL);
2640 }
2641 
2642 int
if_txhprio_l3_check(int hdrprio)2643 if_txhprio_l3_check(int hdrprio)
2644 {
2645 	switch (hdrprio) {
2646 	case IF_HDRPRIO_PACKET:
2647 	case IF_HDRPRIO_PAYLOAD:
2648 		return (0);
2649 	default:
2650 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2651 			return (0);
2652 		break;
2653 	}
2654 
2655 	return (EINVAL);
2656 }
2657 
2658 int
if_rxhprio_l2_check(int hdrprio)2659 if_rxhprio_l2_check(int hdrprio)
2660 {
2661 	switch (hdrprio) {
2662 	case IF_HDRPRIO_PACKET:
2663 	case IF_HDRPRIO_OUTER:
2664 		return (0);
2665 	default:
2666 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2667 			return (0);
2668 		break;
2669 	}
2670 
2671 	return (EINVAL);
2672 }
2673 
2674 int
if_rxhprio_l3_check(int hdrprio)2675 if_rxhprio_l3_check(int hdrprio)
2676 {
2677 	switch (hdrprio) {
2678 	case IF_HDRPRIO_PACKET:
2679 	case IF_HDRPRIO_PAYLOAD:
2680 	case IF_HDRPRIO_OUTER:
2681 		return (0);
2682 	default:
2683 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2684 			return (0);
2685 		break;
2686 	}
2687 
2688 	return (EINVAL);
2689 }
2690 
2691 /*
2692  * Return interface configuration
2693  * of system.  List may be used
2694  * in later ioctl's (above) to get
2695  * other information.
2696  */
2697 int
ifconf(caddr_t data)2698 ifconf(caddr_t data)
2699 {
2700 	struct ifconf *ifc = (struct ifconf *)data;
2701 	struct ifnet *ifp;
2702 	struct ifaddr *ifa;
2703 	struct ifreq ifr, *ifrp;
2704 	int space = ifc->ifc_len, error = 0;
2705 
2706 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2707 	if (space == 0) {
2708 		TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2709 			struct sockaddr *sa;
2710 
2711 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2712 				space += sizeof (ifr);
2713 			else
2714 				TAILQ_FOREACH(ifa,
2715 				    &ifp->if_addrlist, ifa_list) {
2716 					sa = ifa->ifa_addr;
2717 					if (sa->sa_len > sizeof(*sa))
2718 						space += sa->sa_len -
2719 						    sizeof(*sa);
2720 					space += sizeof(ifr);
2721 				}
2722 		}
2723 		ifc->ifc_len = space;
2724 		return (0);
2725 	}
2726 
2727 	ifrp = ifc->ifc_req;
2728 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2729 		if (space < sizeof(ifr))
2730 			break;
2731 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2732 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2733 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2734 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2735 			    sizeof(ifr));
2736 			if (error)
2737 				break;
2738 			space -= sizeof (ifr), ifrp++;
2739 		} else
2740 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2741 				struct sockaddr *sa = ifa->ifa_addr;
2742 
2743 				if (space < sizeof(ifr))
2744 					break;
2745 				if (sa->sa_len <= sizeof(*sa)) {
2746 					ifr.ifr_addr = *sa;
2747 					error = copyout((caddr_t)&ifr,
2748 					    (caddr_t)ifrp, sizeof (ifr));
2749 					ifrp++;
2750 				} else {
2751 					space -= sa->sa_len - sizeof(*sa);
2752 					if (space < sizeof (ifr))
2753 						break;
2754 					error = copyout((caddr_t)&ifr,
2755 					    (caddr_t)ifrp,
2756 					    sizeof(ifr.ifr_name));
2757 					if (error == 0)
2758 						error = copyout((caddr_t)sa,
2759 						    (caddr_t)&ifrp->ifr_addr,
2760 						    sa->sa_len);
2761 					ifrp = (struct ifreq *)(sa->sa_len +
2762 					    (caddr_t)&ifrp->ifr_addr);
2763 				}
2764 				if (error)
2765 					break;
2766 				space -= sizeof (ifr);
2767 			}
2768 	}
2769 	ifc->ifc_len -= space;
2770 	return (error);
2771 }
2772 
2773 void
if_counters_alloc(struct ifnet * ifp)2774 if_counters_alloc(struct ifnet *ifp)
2775 {
2776 	KASSERT(ifp->if_counters == NULL);
2777 
2778 	ifp->if_counters = counters_alloc(ifc_ncounters);
2779 }
2780 
2781 void
if_counters_free(struct ifnet * ifp)2782 if_counters_free(struct ifnet *ifp)
2783 {
2784 	KASSERT(ifp->if_counters != NULL);
2785 
2786 	counters_free(ifp->if_counters, ifc_ncounters);
2787 	ifp->if_counters = NULL;
2788 }
2789 
2790 void
if_getdata(struct ifnet * ifp,struct if_data * data)2791 if_getdata(struct ifnet *ifp, struct if_data *data)
2792 {
2793 	unsigned int i;
2794 
2795 	*data = ifp->if_data;
2796 
2797 	if (ifp->if_counters != NULL) {
2798 		uint64_t counters[ifc_ncounters];
2799 
2800 		counters_read(ifp->if_counters, counters, nitems(counters),
2801 		    NULL);
2802 
2803 		data->ifi_ipackets += counters[ifc_ipackets];
2804 		data->ifi_ierrors += counters[ifc_ierrors];
2805 		data->ifi_opackets += counters[ifc_opackets];
2806 		data->ifi_oerrors += counters[ifc_oerrors];
2807 		data->ifi_collisions += counters[ifc_collisions];
2808 		data->ifi_ibytes += counters[ifc_ibytes];
2809 		data->ifi_obytes += counters[ifc_obytes];
2810 		data->ifi_imcasts += counters[ifc_imcasts];
2811 		data->ifi_omcasts += counters[ifc_omcasts];
2812 		data->ifi_iqdrops += counters[ifc_iqdrops];
2813 		data->ifi_oqdrops += counters[ifc_oqdrops];
2814 		data->ifi_noproto += counters[ifc_noproto];
2815 	}
2816 
2817 	for (i = 0; i < ifp->if_nifqs; i++) {
2818 		struct ifqueue *ifq = ifp->if_ifqs[i];
2819 
2820 		ifq_add_data(ifq, data);
2821 	}
2822 
2823 	for (i = 0; i < ifp->if_niqs; i++) {
2824 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2825 
2826 		ifiq_add_data(ifiq, data);
2827 	}
2828 }
2829 
2830 /*
2831  * Dummy functions replaced in ifnet during detach (if protocols decide to
2832  * fiddle with the if during detach.
2833  */
2834 void
if_detached_qstart(struct ifqueue * ifq)2835 if_detached_qstart(struct ifqueue *ifq)
2836 {
2837 	ifq_purge(ifq);
2838 }
2839 
2840 int
if_detached_ioctl(struct ifnet * ifp,u_long a,caddr_t b)2841 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2842 {
2843 	return ENODEV;
2844 }
2845 
2846 /*
2847  * Create interface group without members
2848  */
2849 struct ifg_group *
if_creategroup(const char * groupname)2850 if_creategroup(const char *groupname)
2851 {
2852 	struct ifg_group	*ifg;
2853 
2854 	if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL)
2855 		return (NULL);
2856 
2857 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2858 	ifg->ifg_refcnt = 1;
2859 	ifg->ifg_carp_demoted = 0;
2860 	TAILQ_INIT(&ifg->ifg_members);
2861 #if NPF > 0
2862 	pfi_attach_ifgroup(ifg);
2863 #endif
2864 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2865 
2866 	return (ifg);
2867 }
2868 
2869 /*
2870  * Add a group to an interface
2871  */
2872 int
if_addgroup(struct ifnet * ifp,const char * groupname)2873 if_addgroup(struct ifnet *ifp, const char *groupname)
2874 {
2875 	struct ifg_list		*ifgl;
2876 	struct ifg_group	*ifg = NULL;
2877 	struct ifg_member	*ifgm;
2878 	size_t			 namelen;
2879 
2880 	namelen = strlen(groupname);
2881 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2882 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2883 		return (EINVAL);
2884 
2885 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2886 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2887 			return (EEXIST);
2888 
2889 	if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL)
2890 		return (ENOMEM);
2891 
2892 	if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) {
2893 		free(ifgl, M_IFGROUP, sizeof(*ifgl));
2894 		return (ENOMEM);
2895 	}
2896 
2897 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2898 		if (!strcmp(ifg->ifg_group, groupname))
2899 			break;
2900 
2901 	if (ifg == NULL) {
2902 		ifg = if_creategroup(groupname);
2903 		if (ifg == NULL) {
2904 			free(ifgl, M_IFGROUP, sizeof(*ifgl));
2905 			free(ifgm, M_IFGROUP, sizeof(*ifgm));
2906 			return (ENOMEM);
2907 		}
2908 	} else
2909 		ifg->ifg_refcnt++;
2910 	KASSERT(ifg->ifg_refcnt != 0);
2911 
2912 	ifgl->ifgl_group = ifg;
2913 	ifgm->ifgm_ifp = ifp;
2914 
2915 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2916 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2917 
2918 #if NPF > 0
2919 	pfi_group_addmember(groupname);
2920 #endif
2921 
2922 	return (0);
2923 }
2924 
2925 /*
2926  * Remove a group from an interface
2927  */
2928 int
if_delgroup(struct ifnet * ifp,const char * groupname)2929 if_delgroup(struct ifnet *ifp, const char *groupname)
2930 {
2931 	struct ifg_list		*ifgl;
2932 	struct ifg_member	*ifgm;
2933 
2934 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2935 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2936 			break;
2937 	if (ifgl == NULL)
2938 		return (ENOENT);
2939 
2940 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2941 
2942 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2943 		if (ifgm->ifgm_ifp == ifp)
2944 			break;
2945 
2946 	if (ifgm != NULL) {
2947 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2948 		free(ifgm, M_IFGROUP, sizeof(*ifgm));
2949 	}
2950 
2951 #if NPF > 0
2952 	pfi_group_delmember(groupname);
2953 #endif
2954 
2955 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2956 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2957 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2958 #if NPF > 0
2959 		pfi_detach_ifgroup(ifgl->ifgl_group);
2960 #endif
2961 		free(ifgl->ifgl_group, M_IFGROUP, sizeof(*ifgl->ifgl_group));
2962 	}
2963 
2964 	free(ifgl, M_IFGROUP, sizeof(*ifgl));
2965 
2966 	return (0);
2967 }
2968 
2969 /*
2970  * Stores all groups from an interface in memory pointed
2971  * to by data
2972  */
2973 int
if_getgroup(caddr_t data,struct ifnet * ifp)2974 if_getgroup(caddr_t data, struct ifnet *ifp)
2975 {
2976 	int			 len, error;
2977 	struct ifg_list		*ifgl;
2978 	struct ifg_req		 ifgrq, *ifgp;
2979 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2980 
2981 	if (ifgr->ifgr_len == 0) {
2982 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2983 			ifgr->ifgr_len += sizeof(struct ifg_req);
2984 		return (0);
2985 	}
2986 
2987 	len = ifgr->ifgr_len;
2988 	ifgp = ifgr->ifgr_groups;
2989 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2990 		if (len < sizeof(ifgrq))
2991 			return (EINVAL);
2992 		bzero(&ifgrq, sizeof ifgrq);
2993 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2994 		    sizeof(ifgrq.ifgrq_group));
2995 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2996 		    sizeof(struct ifg_req))))
2997 			return (error);
2998 		len -= sizeof(ifgrq);
2999 		ifgp++;
3000 	}
3001 
3002 	return (0);
3003 }
3004 
3005 /*
3006  * Stores all members of a group in memory pointed to by data
3007  */
3008 int
if_getgroupmembers(caddr_t data)3009 if_getgroupmembers(caddr_t data)
3010 {
3011 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3012 	struct ifg_group	*ifg;
3013 	struct ifg_member	*ifgm;
3014 	struct ifg_req		 ifgrq, *ifgp;
3015 	int			 len, error;
3016 
3017 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3018 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3019 			break;
3020 	if (ifg == NULL)
3021 		return (ENOENT);
3022 
3023 	if (ifgr->ifgr_len == 0) {
3024 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3025 			ifgr->ifgr_len += sizeof(ifgrq);
3026 		return (0);
3027 	}
3028 
3029 	len = ifgr->ifgr_len;
3030 	ifgp = ifgr->ifgr_groups;
3031 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
3032 		if (len < sizeof(ifgrq))
3033 			return (EINVAL);
3034 		bzero(&ifgrq, sizeof ifgrq);
3035 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
3036 		    sizeof(ifgrq.ifgrq_member));
3037 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3038 		    sizeof(struct ifg_req))))
3039 			return (error);
3040 		len -= sizeof(ifgrq);
3041 		ifgp++;
3042 	}
3043 
3044 	return (0);
3045 }
3046 
3047 int
if_getgroupattribs(caddr_t data)3048 if_getgroupattribs(caddr_t data)
3049 {
3050 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3051 	struct ifg_group	*ifg;
3052 
3053 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3054 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3055 			break;
3056 	if (ifg == NULL)
3057 		return (ENOENT);
3058 
3059 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
3060 
3061 	return (0);
3062 }
3063 
3064 int
if_setgroupattribs(caddr_t data)3065 if_setgroupattribs(caddr_t data)
3066 {
3067 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3068 	struct ifg_group	*ifg;
3069 	struct ifg_member	*ifgm;
3070 	int			 demote;
3071 
3072 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3073 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3074 			break;
3075 	if (ifg == NULL)
3076 		return (ENOENT);
3077 
3078 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
3079 	if (demote + ifg->ifg_carp_demoted > 0xff ||
3080 	    demote + ifg->ifg_carp_demoted < 0)
3081 		return (EINVAL);
3082 
3083 	ifg->ifg_carp_demoted += demote;
3084 
3085 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3086 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3087 
3088 	return (0);
3089 }
3090 
3091 /*
3092  * Stores all groups in memory pointed to by data
3093  */
3094 int
if_getgrouplist(caddr_t data)3095 if_getgrouplist(caddr_t data)
3096 {
3097 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3098 	struct ifg_group	*ifg;
3099 	struct ifg_req		 ifgrq, *ifgp;
3100 	int			 len, error;
3101 
3102 	if (ifgr->ifgr_len == 0) {
3103 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3104 			ifgr->ifgr_len += sizeof(ifgrq);
3105 		return (0);
3106 	}
3107 
3108 	len = ifgr->ifgr_len;
3109 	ifgp = ifgr->ifgr_groups;
3110 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3111 		if (len < sizeof(ifgrq))
3112 			return (EINVAL);
3113 		bzero(&ifgrq, sizeof ifgrq);
3114 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3115 		    sizeof(ifgrq.ifgrq_group));
3116 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3117 		    sizeof(struct ifg_req))))
3118 			return (error);
3119 		len -= sizeof(ifgrq);
3120 		ifgp++;
3121 	}
3122 
3123 	return (0);
3124 }
3125 
3126 void
if_group_routechange(const struct sockaddr * dst,const struct sockaddr * mask)3127 if_group_routechange(const struct sockaddr *dst, const struct sockaddr *mask)
3128 {
3129 	switch (dst->sa_family) {
3130 	case AF_INET:
3131 		if (satosin_const(dst)->sin_addr.s_addr == INADDR_ANY &&
3132 		    mask && (mask->sa_len == 0 ||
3133 		    satosin_const(mask)->sin_addr.s_addr == INADDR_ANY))
3134 			if_group_egress_build();
3135 		break;
3136 #ifdef INET6
3137 	case AF_INET6:
3138 		if (IN6_ARE_ADDR_EQUAL(&(satosin6_const(dst))->sin6_addr,
3139 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
3140 		    IN6_ARE_ADDR_EQUAL(&(satosin6_const(mask))->sin6_addr,
3141 		    &in6addr_any)))
3142 			if_group_egress_build();
3143 		break;
3144 #endif
3145 	}
3146 }
3147 
3148 int
if_group_egress_build(void)3149 if_group_egress_build(void)
3150 {
3151 	struct ifnet		*ifp;
3152 	struct ifg_group	*ifg;
3153 	struct ifg_member	*ifgm, *next;
3154 	struct sockaddr_in	 sa_in;
3155 #ifdef INET6
3156 	struct sockaddr_in6	 sa_in6;
3157 #endif
3158 	struct rtentry		*rt;
3159 
3160 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3161 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3162 			break;
3163 
3164 	if (ifg != NULL)
3165 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3166 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3167 
3168 	bzero(&sa_in, sizeof(sa_in));
3169 	sa_in.sin_len = sizeof(sa_in);
3170 	sa_in.sin_family = AF_INET;
3171 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3172 	while (rt != NULL) {
3173 		ifp = if_get(rt->rt_ifidx);
3174 		if (ifp != NULL) {
3175 			if_addgroup(ifp, IFG_EGRESS);
3176 			if_put(ifp);
3177 		}
3178 		rt = rtable_iterate(rt);
3179 	}
3180 
3181 #ifdef INET6
3182 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3183 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3184 	    RTP_ANY);
3185 	while (rt != NULL) {
3186 		ifp = if_get(rt->rt_ifidx);
3187 		if (ifp != NULL) {
3188 			if_addgroup(ifp, IFG_EGRESS);
3189 			if_put(ifp);
3190 		}
3191 		rt = rtable_iterate(rt);
3192 	}
3193 #endif /* INET6 */
3194 
3195 	return (0);
3196 }
3197 
3198 /*
3199  * Set/clear promiscuous mode on interface ifp based on the truth value
3200  * of pswitch.  The calls are reference counted so that only the first
3201  * "on" request actually has an effect, as does the final "off" request.
3202  * Results are undefined if the "off" and "on" requests are not matched.
3203  */
3204 int
ifpromisc(struct ifnet * ifp,int pswitch)3205 ifpromisc(struct ifnet *ifp, int pswitch)
3206 {
3207 	struct ifreq ifr;
3208 	unsigned short oif_flags;
3209 	int oif_pcount, error;
3210 
3211 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3212 
3213 	oif_flags = ifp->if_flags;
3214 	oif_pcount = ifp->if_pcount;
3215 	if (pswitch) {
3216 		if (ifp->if_pcount++ != 0)
3217 			return (0);
3218 		ifp->if_flags |= IFF_PROMISC;
3219 	} else {
3220 		if (--ifp->if_pcount > 0)
3221 			return (0);
3222 		ifp->if_flags &= ~IFF_PROMISC;
3223 	}
3224 
3225 	if ((ifp->if_flags & IFF_UP) == 0)
3226 		return (0);
3227 
3228 	memset(&ifr, 0, sizeof(ifr));
3229 	ifr.ifr_flags = ifp->if_flags;
3230 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3231 	if (error) {
3232 		ifp->if_flags = oif_flags;
3233 		ifp->if_pcount = oif_pcount;
3234 	}
3235 
3236 	return (error);
3237 }
3238 
3239 /* Set/clear LRO flag and restart interface if needed. */
3240 int
ifsetlro(struct ifnet * ifp,int on)3241 ifsetlro(struct ifnet *ifp, int on)
3242 {
3243 	struct ifreq ifrq;
3244 	int error = 0;
3245 	int s = splnet();
3246 	struct if_parent parent;
3247 
3248 	memset(&parent, 0, sizeof(parent));
3249 	if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) {
3250 		struct ifnet *ifp0 = if_unit(parent.ifp_parent);
3251 
3252 		if (ifp0 != NULL) {
3253 			ifsetlro(ifp0, on);
3254 			if_put(ifp0);
3255 		}
3256 	}
3257 
3258 	if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3259 		error = ENOTSUP;
3260 		goto out;
3261 	}
3262 
3263 	NET_ASSERT_LOCKED();	/* for ioctl */
3264 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3265 
3266 	if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3267 		if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) {
3268 			error = EBUSY;
3269 			goto out;
3270 		}
3271 		SET(ifp->if_xflags, IFXF_LRO);
3272 	} else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3273 		CLR(ifp->if_xflags, IFXF_LRO);
3274 	else
3275 		goto out;
3276 
3277 	/* restart interface */
3278 	if (ISSET(ifp->if_flags, IFF_UP)) {
3279 		/* go down for a moment... */
3280 		CLR(ifp->if_flags, IFF_UP);
3281 		ifrq.ifr_flags = ifp->if_flags;
3282 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3283 
3284 		/* ... and up again */
3285 		SET(ifp->if_flags, IFF_UP);
3286 		ifrq.ifr_flags = ifp->if_flags;
3287 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3288 	}
3289  out:
3290 	splx(s);
3291 
3292 	return error;
3293 }
3294 
3295 void
ifa_add(struct ifnet * ifp,struct ifaddr * ifa)3296 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3297 {
3298 	NET_ASSERT_LOCKED_EXCLUSIVE();
3299 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3300 }
3301 
3302 void
ifa_del(struct ifnet * ifp,struct ifaddr * ifa)3303 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3304 {
3305 	NET_ASSERT_LOCKED_EXCLUSIVE();
3306 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3307 }
3308 
3309 void
ifa_update_broadaddr(struct ifnet * ifp,struct ifaddr * ifa,struct sockaddr * sa)3310 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3311 {
3312 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3313 		panic("ifa_update_broadaddr does not support dynamic length");
3314 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3315 }
3316 
3317 #ifdef DDB
3318 /* debug function, can be called from ddb> */
3319 void
ifa_print_all(void)3320 ifa_print_all(void)
3321 {
3322 	struct ifnet *ifp;
3323 	struct ifaddr *ifa;
3324 
3325 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3326 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3327 			char addr[INET6_ADDRSTRLEN];
3328 
3329 			switch (ifa->ifa_addr->sa_family) {
3330 			case AF_INET:
3331 				printf("%s", inet_ntop(AF_INET,
3332 				    &satosin(ifa->ifa_addr)->sin_addr,
3333 				    addr, sizeof(addr)));
3334 				break;
3335 #ifdef INET6
3336 			case AF_INET6:
3337 				printf("%s", inet_ntop(AF_INET6,
3338 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3339 				    addr, sizeof(addr)));
3340 				break;
3341 #endif
3342 			}
3343 			printf(" on %s\n", ifp->if_xname);
3344 		}
3345 	}
3346 }
3347 #endif /* DDB */
3348 
3349 void
ifnewlladdr(struct ifnet * ifp)3350 ifnewlladdr(struct ifnet *ifp)
3351 {
3352 #ifdef INET6
3353 	struct ifaddr *ifa;
3354 	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
3355 #endif
3356 	struct ifreq ifrq;
3357 	short up;
3358 
3359 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3360 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3361 
3362 	up = ifp->if_flags & IFF_UP;
3363 
3364 	if (up) {
3365 		/* go down for a moment... */
3366 		ifp->if_flags &= ~IFF_UP;
3367 		ifrq.ifr_flags = ifp->if_flags;
3368 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3369 	}
3370 
3371 	ifp->if_flags |= IFF_UP;
3372 	ifrq.ifr_flags = ifp->if_flags;
3373 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3374 
3375 #ifdef INET6
3376 	/*
3377 	 * Update the link-local address.  Don't do it if we're
3378 	 * a router to avoid confusing hosts on the network.
3379 	 */
3380 	if (!i_am_router) {
3381 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3382 		if (ifa) {
3383 			in6_purgeaddr(ifa);
3384 			if_hooks_run(&ifp->if_addrhooks);
3385 			in6_ifattach(ifp);
3386 		}
3387 	}
3388 #endif
3389 	if (!up) {
3390 		/* go back down */
3391 		ifp->if_flags &= ~IFF_UP;
3392 		ifrq.ifr_flags = ifp->if_flags;
3393 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3394 	}
3395 }
3396 
3397 void
if_addrhook_add(struct ifnet * ifp,struct task * t)3398 if_addrhook_add(struct ifnet *ifp, struct task *t)
3399 {
3400 	mtx_enter(&if_hooks_mtx);
3401 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3402 	mtx_leave(&if_hooks_mtx);
3403 }
3404 
3405 void
if_addrhook_del(struct ifnet * ifp,struct task * t)3406 if_addrhook_del(struct ifnet *ifp, struct task *t)
3407 {
3408 	mtx_enter(&if_hooks_mtx);
3409 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3410 	mtx_leave(&if_hooks_mtx);
3411 }
3412 
3413 void
if_addrhooks_run(struct ifnet * ifp)3414 if_addrhooks_run(struct ifnet *ifp)
3415 {
3416 	if_hooks_run(&ifp->if_addrhooks);
3417 }
3418 
3419 void
if_rxr_init(struct if_rxring * rxr,u_int lwm,u_int hwm)3420 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3421 {
3422 	extern int ticks;
3423 
3424 	memset(rxr, 0, sizeof(*rxr));
3425 
3426 	rxr->rxr_adjusted = ticks;
3427 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3428 	rxr->rxr_hwm = hwm;
3429 }
3430 
3431 static inline void
if_rxr_adjust_cwm(struct if_rxring * rxr)3432 if_rxr_adjust_cwm(struct if_rxring *rxr)
3433 {
3434 	extern int ticks;
3435 
3436 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3437 		return;
3438 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3439 		rxr->rxr_cwm++;
3440 
3441 	rxr->rxr_adjusted = ticks;
3442 }
3443 
3444 void
if_rxr_livelocked(struct if_rxring * rxr)3445 if_rxr_livelocked(struct if_rxring *rxr)
3446 {
3447 	extern int ticks;
3448 
3449 	if (ticks - rxr->rxr_adjusted >= 1) {
3450 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3451 			rxr->rxr_cwm--;
3452 
3453 		rxr->rxr_adjusted = ticks;
3454 	}
3455 }
3456 
3457 u_int
if_rxr_get(struct if_rxring * rxr,u_int max)3458 if_rxr_get(struct if_rxring *rxr, u_int max)
3459 {
3460 	extern int ticks;
3461 	u_int diff;
3462 
3463 	if (ticks - rxr->rxr_adjusted >= 1) {
3464 		/* we're free to try for an adjustment */
3465 		if_rxr_adjust_cwm(rxr);
3466 	}
3467 
3468 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3469 		return (0);
3470 
3471 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3472 	rxr->rxr_alive += diff;
3473 
3474 	return (diff);
3475 }
3476 
3477 int
if_rxr_info_ioctl(struct if_rxrinfo * uifri,u_int t,struct if_rxring_info * e)3478 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3479 {
3480 	struct if_rxrinfo kifri;
3481 	int error;
3482 	u_int n;
3483 
3484 	error = copyin(uifri, &kifri, sizeof(kifri));
3485 	if (error)
3486 		return (error);
3487 
3488 	n = min(t, kifri.ifri_total);
3489 	kifri.ifri_total = t;
3490 
3491 	if (n > 0) {
3492 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3493 		if (error)
3494 			return (error);
3495 	}
3496 
3497 	return (copyout(&kifri, uifri, sizeof(kifri)));
3498 }
3499 
3500 int
if_rxr_ioctl(struct if_rxrinfo * ifri,const char * name,u_int size,struct if_rxring * rxr)3501 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3502     struct if_rxring *rxr)
3503 {
3504 	struct if_rxring_info ifr;
3505 
3506 	memset(&ifr, 0, sizeof(ifr));
3507 
3508 	if (name != NULL)
3509 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3510 
3511 	ifr.ifr_size = size;
3512 	ifr.ifr_info = *rxr;
3513 
3514 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3515 }
3516 
3517 /*
3518  * Network stack input queues.
3519  */
3520 
3521 void
niq_init(struct niqueue * niq,u_int maxlen,u_int isr)3522 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3523 {
3524 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3525 	niq->ni_isr = isr;
3526 }
3527 
3528 int
niq_enqueue(struct niqueue * niq,struct mbuf * m)3529 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3530 {
3531 	int rv;
3532 
3533 	rv = mq_enqueue(&niq->ni_q, m);
3534 	if (rv == 0)
3535 		schednetisr(niq->ni_isr);
3536 	else
3537 		if_congestion();
3538 
3539 	return (rv);
3540 }
3541 
3542 int
niq_enlist(struct niqueue * niq,struct mbuf_list * ml)3543 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3544 {
3545 	int rv;
3546 
3547 	rv = mq_enlist(&niq->ni_q, ml);
3548 	if (rv == 0)
3549 		schednetisr(niq->ni_isr);
3550 	else
3551 		if_congestion();
3552 
3553 	return (rv);
3554 }
3555 
3556 __dead void
unhandled_af(int af)3557 unhandled_af(int af)
3558 {
3559 	panic("unhandled af %d", af);
3560 }
3561 
3562 struct taskq *
net_tq(unsigned int ifindex)3563 net_tq(unsigned int ifindex)
3564 {
3565 	struct softnet *sn;
3566 	static int nettaskqs;
3567 
3568 	if (nettaskqs == 0)
3569 		nettaskqs = min(NET_TASKQ, ncpus);
3570 
3571 	sn = &softnets[ifindex % nettaskqs];
3572 
3573 	return (sn->sn_taskq);
3574 }
3575 
3576 void
net_tq_barriers(const char * wmesg)3577 net_tq_barriers(const char *wmesg)
3578 {
3579 	struct task barriers[NET_TASKQ];
3580 	struct refcnt r = REFCNT_INITIALIZER();
3581 	int i;
3582 
3583 	for (i = 0; i < nitems(barriers); i++) {
3584 		task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
3585 		refcnt_take(&r);
3586 		task_add(softnets[i].sn_taskq, &barriers[i]);
3587 	}
3588 
3589 	refcnt_finalize(&r, wmesg);
3590 }
3591