xref: /openbsd/sys/net/if.c (revision d618cc99)
1 /*	$OpenBSD: if.c,v 1.718 2024/02/06 00:18:53 bluhm Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 #include <sys/smr.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include "vlan.h"
99 #if NVLAN > 0
100 #include <net/if_vlan_var.h>
101 #endif
102 
103 #include <netinet/in.h>
104 #include <netinet/if_ether.h>
105 #include <netinet/igmp.h>
106 #ifdef MROUTING
107 #include <netinet/ip_mroute.h>
108 #endif
109 #include <netinet/tcp.h>
110 #include <netinet/tcp_timer.h>
111 #include <netinet/tcp_var.h>
112 
113 #ifdef INET6
114 #include <netinet6/in6_var.h>
115 #include <netinet6/in6_ifattach.h>
116 #include <netinet6/nd6.h>
117 #include <netinet/ip6.h>
118 #include <netinet6/ip6_var.h>
119 #endif
120 
121 #ifdef MPLS
122 #include <netmpls/mpls.h>
123 #endif
124 
125 #if NBPFILTER > 0
126 #include <net/bpf.h>
127 #endif
128 
129 #if NBRIDGE > 0
130 #include <net/if_bridge.h>
131 #endif
132 
133 #if NCARP > 0
134 #include <netinet/ip_carp.h>
135 #endif
136 
137 #if NPF > 0
138 #include <net/pfvar.h>
139 #endif
140 
141 #include <sys/device.h>
142 
143 void	if_attachsetup(struct ifnet *);
144 void	if_attach_common(struct ifnet *);
145 void	if_remove(struct ifnet *);
146 int	if_createrdomain(int, struct ifnet *);
147 int	if_setrdomain(struct ifnet *, int);
148 void	if_slowtimo(void *);
149 
150 void	if_detached_qstart(struct ifqueue *);
151 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
152 
153 int	ifioctl_get(u_long, caddr_t);
154 int	ifconf(caddr_t);
155 static int
156 	if_sffpage_check(const caddr_t);
157 
158 int	if_getgroup(caddr_t, struct ifnet *);
159 int	if_getgroupmembers(caddr_t);
160 int	if_getgroupattribs(caddr_t);
161 int	if_setgroupattribs(caddr_t);
162 int	if_getgrouplist(caddr_t);
163 
164 void	if_linkstate(struct ifnet *);
165 void	if_linkstate_task(void *);
166 
167 int	if_clone_list(struct if_clonereq *);
168 struct if_clone	*if_clone_lookup(const char *, int *);
169 
170 int	if_group_egress_build(void);
171 
172 void	if_watchdog_task(void *);
173 
174 void	if_netisr(void *);
175 
176 #ifdef DDB
177 void	ifa_print_all(void);
178 #endif
179 
180 void	if_qstart_compat(struct ifqueue *);
181 
182 /*
183  * interface index map
184  *
185  * the kernel maintains a mapping of interface indexes to struct ifnet
186  * pointers.
187  *
188  * the map is an array of struct ifnet pointers prefixed by an if_map
189  * structure. the if_map structure stores the length of its array.
190  *
191  * as interfaces are attached to the system, the map is grown on demand
192  * up to USHRT_MAX entries.
193  *
194  * interface index 0 is reserved and represents no interface. this
195  * supports the use of the interface index as the scope for IPv6 link
196  * local addresses, where scope 0 means no scope has been specified.
197  * it also supports the use of interface index as the unique identifier
198  * for network interfaces in SNMP applications as per RFC2863. therefore
199  * if_get(0) returns NULL.
200  */
201 
202 struct ifnet *if_ref(struct ifnet *);
203 
204 /*
205  * struct if_idxmap
206  *
207  * infrastructure to manage updates and accesses to the current if_map.
208  *
209  * interface index 0 is special and represents "no interface", so we
210  * use the 0th slot in map to store the length of the array.
211  */
212 
213 struct if_idxmap {
214 	unsigned int		  serial;
215 	unsigned int		  count;
216 	struct ifnet		**map;		/* SMR protected */
217 	struct rwlock		  lock;
218 	unsigned char		 *usedidx;	/* bitmap of indices in use */
219 };
220 
221 struct if_idxmap_dtor {
222 	struct smr_entry	  smr;
223 	struct ifnet		**map;
224 };
225 
226 void	if_idxmap_init(unsigned int);
227 void	if_idxmap_free(void *);
228 void	if_idxmap_alloc(struct ifnet *);
229 void	if_idxmap_insert(struct ifnet *);
230 void	if_idxmap_remove(struct ifnet *);
231 
232 TAILQ_HEAD(, ifg_group) ifg_head =
233     TAILQ_HEAD_INITIALIZER(ifg_head);	/* [N] list of interface groups */
234 
235 LIST_HEAD(, if_clone) if_cloners =
236     LIST_HEAD_INITIALIZER(if_cloners);	/* [I] list of clonable interfaces */
237 int if_cloners_count;	/* [I] number of clonable interfaces */
238 
239 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
240 
241 /* hooks should only be added, deleted, and run from a process context */
242 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
243 void	if_hooks_run(struct task_list *);
244 
245 int	ifq_congestion;
246 
247 int		 netisr;
248 
249 struct softnet {
250 	char		 sn_name[16];
251 	struct taskq	*sn_taskq;
252 };
253 
254 #define	NET_TASKQ	4
255 struct softnet	softnets[NET_TASKQ];
256 
257 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
258 
259 /*
260  * Serialize socket operations to ensure no new sleeping points
261  * are introduced in IP output paths.
262  */
263 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
264 
265 /*
266  * Network interface utility routines.
267  */
268 void
ifinit(void)269 ifinit(void)
270 {
271 	unsigned int	i;
272 
273 	/*
274 	 * most machines boot with 4 or 5 interfaces, so size the initial map
275 	 * to accommodate this
276 	 */
277 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
278 
279 	for (i = 0; i < NET_TASKQ; i++) {
280 		struct softnet *sn = &softnets[i];
281 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
282 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
283 		    TASKQ_MPSAFE);
284 		if (sn->sn_taskq == NULL)
285 			panic("unable to create network taskq %d", i);
286 	}
287 }
288 
289 static struct if_idxmap if_idxmap;
290 
291 /*
292  * XXXSMP: For `ifnetlist' modification both kernel and net locks
293  * should be taken. For read-only access only one lock of them required.
294  */
295 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
296 
297 static inline unsigned int
if_idxmap_limit(struct ifnet ** if_map)298 if_idxmap_limit(struct ifnet **if_map)
299 {
300 	return ((uintptr_t)if_map[0]);
301 }
302 
303 static inline size_t
if_idxmap_usedidx_size(unsigned int limit)304 if_idxmap_usedidx_size(unsigned int limit)
305 {
306 	return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
307 }
308 
309 void
if_idxmap_init(unsigned int limit)310 if_idxmap_init(unsigned int limit)
311 {
312 	struct ifnet **if_map;
313 
314 	rw_init(&if_idxmap.lock, "idxmaplk");
315 	if_idxmap.serial = 1; /* skip ifidx 0 */
316 
317 	if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
318 	    M_WAITOK | M_ZERO);
319 
320 	if_map[0] = (struct ifnet *)(uintptr_t)limit;
321 
322 	if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
323 	    M_IFADDR, M_WAITOK | M_ZERO);
324 	setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
325 
326 	/* this is called early so there's nothing to race with */
327 	SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
328 }
329 
330 void
if_idxmap_alloc(struct ifnet * ifp)331 if_idxmap_alloc(struct ifnet *ifp)
332 {
333 	struct ifnet **if_map;
334 	unsigned int limit;
335 	unsigned int index, i;
336 
337 	refcnt_init(&ifp->if_refcnt);
338 
339 	rw_enter_write(&if_idxmap.lock);
340 
341 	if (++if_idxmap.count >= USHRT_MAX)
342 		panic("too many interfaces");
343 
344 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
345 	limit = if_idxmap_limit(if_map);
346 
347 	index = if_idxmap.serial++ & USHRT_MAX;
348 
349 	if (index >= limit) {
350 		struct if_idxmap_dtor *dtor;
351 		struct ifnet **oif_map;
352 		unsigned int olimit;
353 		unsigned char *nusedidx;
354 
355 		oif_map = if_map;
356 		olimit = limit;
357 
358 		limit = olimit * 2;
359 		if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
360 		    M_WAITOK | M_ZERO);
361 		if_map[0] = (struct ifnet *)(uintptr_t)limit;
362 
363 		for (i = 1; i < olimit; i++) {
364 			struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
365 			if (oifp == NULL)
366 				continue;
367 
368 			/*
369 			 * nif_map isn't visible yet, so don't need
370 			 * SMR_PTR_SET_LOCKED and its membar.
371 			 */
372 			if_map[i] = if_ref(oifp);
373 		}
374 
375 		nusedidx = malloc(if_idxmap_usedidx_size(limit),
376 		    M_IFADDR, M_WAITOK | M_ZERO);
377 		memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
378 
379 		/* use the old usedidx bitmap as an smr_entry for the if_map */
380 		dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
381 		if_idxmap.usedidx = nusedidx;
382 
383 		SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
384 
385 		dtor->map = oif_map;
386 		smr_init(&dtor->smr);
387 		smr_call(&dtor->smr, if_idxmap_free, dtor);
388 	}
389 
390 	/* pick the next free index */
391 	for (i = 0; i < USHRT_MAX; i++) {
392 		if (index != 0 && isclr(if_idxmap.usedidx, index))
393 			break;
394 
395 		index = if_idxmap.serial++ & USHRT_MAX;
396 	}
397 	KASSERT(index != 0 && index < limit);
398 	KASSERT(isclr(if_idxmap.usedidx, index));
399 
400 	setbit(if_idxmap.usedidx, index);
401 	ifp->if_index = index;
402 
403 	rw_exit_write(&if_idxmap.lock);
404 }
405 
406 void
if_idxmap_free(void * arg)407 if_idxmap_free(void *arg)
408 {
409 	struct if_idxmap_dtor *dtor = arg;
410 	struct ifnet **oif_map = dtor->map;
411 	unsigned int olimit = if_idxmap_limit(oif_map);
412 	unsigned int i;
413 
414 	for (i = 1; i < olimit; i++)
415 		if_put(oif_map[i]);
416 
417 	free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
418 	free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
419 }
420 
421 void
if_idxmap_insert(struct ifnet * ifp)422 if_idxmap_insert(struct ifnet *ifp)
423 {
424 	struct ifnet **if_map;
425 	unsigned int index = ifp->if_index;
426 
427 	rw_enter_write(&if_idxmap.lock);
428 
429 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
430 
431 	KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
432 	    "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
433 	    if_idxmap_limit(if_map));
434 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
435 	KASSERT(isset(if_idxmap.usedidx, index));
436 
437 	/* commit */
438 	SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
439 
440 	rw_exit_write(&if_idxmap.lock);
441 }
442 
443 void
if_idxmap_remove(struct ifnet * ifp)444 if_idxmap_remove(struct ifnet *ifp)
445 {
446 	struct ifnet **if_map;
447 	unsigned int index = ifp->if_index;
448 
449 	rw_enter_write(&if_idxmap.lock);
450 
451 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
452 
453 	KASSERT(index != 0 && index < if_idxmap_limit(if_map));
454 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
455 	KASSERT(isset(if_idxmap.usedidx, index));
456 
457 	SMR_PTR_SET_LOCKED(&if_map[index], NULL);
458 
459 	if_idxmap.count--;
460 	clrbit(if_idxmap.usedidx, index);
461 	/* end of if_idxmap modifications */
462 
463 	rw_exit_write(&if_idxmap.lock);
464 
465 	smr_barrier();
466 	if_put(ifp);
467 }
468 
469 /*
470  * Attach an interface to the
471  * list of "active" interfaces.
472  */
473 void
if_attachsetup(struct ifnet * ifp)474 if_attachsetup(struct ifnet *ifp)
475 {
476 	unsigned long ifidx;
477 
478 	NET_ASSERT_LOCKED();
479 
480 	if_addgroup(ifp, IFG_ALL);
481 
482 #ifdef INET6
483 	nd6_ifattach(ifp);
484 #endif
485 
486 #if NPF > 0
487 	pfi_attach_ifnet(ifp);
488 #endif
489 
490 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
491 	if_slowtimo(ifp);
492 
493 	if_idxmap_insert(ifp);
494 	KASSERT(if_get(0) == NULL);
495 
496 	ifidx = ifp->if_index;
497 
498 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
499 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
500 
501 	/* Announce the interface. */
502 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
503 }
504 
505 /*
506  * Allocate the link level name for the specified interface.  This
507  * is an attachment helper.  It must be called after ifp->if_addrlen
508  * is initialized, which may not be the case when if_attach() is
509  * called.
510  */
511 void
if_alloc_sadl(struct ifnet * ifp)512 if_alloc_sadl(struct ifnet *ifp)
513 {
514 	unsigned int socksize;
515 	int namelen, masklen;
516 	struct sockaddr_dl *sdl;
517 
518 	/*
519 	 * If the interface already has a link name, release it
520 	 * now.  This is useful for interfaces that can change
521 	 * link types, and thus switch link names often.
522 	 */
523 	if_free_sadl(ifp);
524 
525 	namelen = strlen(ifp->if_xname);
526 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
527 	socksize = masklen + ifp->if_addrlen;
528 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
529 	if (socksize < sizeof(*sdl))
530 		socksize = sizeof(*sdl);
531 	socksize = ROUNDUP(socksize);
532 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
533 	sdl->sdl_len = socksize;
534 	sdl->sdl_family = AF_LINK;
535 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
536 	sdl->sdl_nlen = namelen;
537 	sdl->sdl_alen = ifp->if_addrlen;
538 	sdl->sdl_index = ifp->if_index;
539 	sdl->sdl_type = ifp->if_type;
540 	ifp->if_sadl = sdl;
541 }
542 
543 /*
544  * Free the link level name for the specified interface.  This is
545  * a detach helper.  This is called from if_detach() or from
546  * link layer type specific detach functions.
547  */
548 void
if_free_sadl(struct ifnet * ifp)549 if_free_sadl(struct ifnet *ifp)
550 {
551 	if (ifp->if_sadl == NULL)
552 		return;
553 
554 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
555 	ifp->if_sadl = NULL;
556 }
557 
558 void
if_attachhead(struct ifnet * ifp)559 if_attachhead(struct ifnet *ifp)
560 {
561 	if_attach_common(ifp);
562 	NET_LOCK();
563 	TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
564 	if_attachsetup(ifp);
565 	NET_UNLOCK();
566 }
567 
568 void
if_attach(struct ifnet * ifp)569 if_attach(struct ifnet *ifp)
570 {
571 	if_attach_common(ifp);
572 	NET_LOCK();
573 	TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
574 	if_attachsetup(ifp);
575 	NET_UNLOCK();
576 }
577 
578 void
if_attach_queues(struct ifnet * ifp,unsigned int nqs)579 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
580 {
581 	struct ifqueue **map;
582 	struct ifqueue *ifq;
583 	int i;
584 
585 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
586 	KASSERT(nqs != 0);
587 
588 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
589 
590 	ifp->if_snd.ifq_softc = NULL;
591 	map[0] = &ifp->if_snd;
592 
593 	for (i = 1; i < nqs; i++) {
594 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
595 		ifq_init_maxlen(ifq, ifp->if_snd.ifq_maxlen);
596 		ifq_init(ifq, ifp, i);
597 		map[i] = ifq;
598 	}
599 
600 	ifp->if_ifqs = map;
601 	ifp->if_nifqs = nqs;
602 }
603 
604 void
if_attach_iqueues(struct ifnet * ifp,unsigned int niqs)605 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
606 {
607 	struct ifiqueue **map;
608 	struct ifiqueue *ifiq;
609 	unsigned int i;
610 
611 	KASSERT(niqs != 0);
612 
613 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
614 
615 	ifp->if_rcv.ifiq_softc = NULL;
616 	map[0] = &ifp->if_rcv;
617 
618 	for (i = 1; i < niqs; i++) {
619 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
620 		ifiq_init(ifiq, ifp, i);
621 		map[i] = ifiq;
622 	}
623 
624 	ifp->if_iqs = map;
625 	ifp->if_niqs = niqs;
626 }
627 
628 void
if_attach_common(struct ifnet * ifp)629 if_attach_common(struct ifnet *ifp)
630 {
631 	KASSERT(ifp->if_ioctl != NULL);
632 
633 	TAILQ_INIT(&ifp->if_addrlist);
634 	TAILQ_INIT(&ifp->if_maddrlist);
635 	TAILQ_INIT(&ifp->if_groups);
636 
637 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
638 		KASSERTMSG(ifp->if_qstart == NULL,
639 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
640 		ifp->if_qstart = if_qstart_compat;
641 	} else {
642 		KASSERTMSG(ifp->if_start == NULL,
643 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
644 		KASSERTMSG(ifp->if_qstart != NULL,
645 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
646 	}
647 
648 	if_idxmap_alloc(ifp);
649 
650 	ifq_init(&ifp->if_snd, ifp, 0);
651 
652 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
653 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
654 	ifp->if_nifqs = 1;
655 	if (ifp->if_txmit == 0)
656 		ifp->if_txmit = IF_TXMIT_DEFAULT;
657 
658 	ifiq_init(&ifp->if_rcv, ifp, 0);
659 
660 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
661 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
662 	ifp->if_niqs = 1;
663 
664 	TAILQ_INIT(&ifp->if_addrhooks);
665 	TAILQ_INIT(&ifp->if_linkstatehooks);
666 	TAILQ_INIT(&ifp->if_detachhooks);
667 
668 	if (ifp->if_rtrequest == NULL)
669 		ifp->if_rtrequest = if_rtrequest_dummy;
670 	if (ifp->if_enqueue == NULL)
671 		ifp->if_enqueue = if_enqueue_ifq;
672 #if NBPFILTER > 0
673 	if (ifp->if_bpf_mtap == NULL)
674 		ifp->if_bpf_mtap = bpf_mtap_ether;
675 #endif
676 	ifp->if_llprio = IFQ_DEFPRIO;
677 }
678 
679 void
if_attach_ifq(struct ifnet * ifp,const struct ifq_ops * newops,void * args)680 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
681 {
682 	/*
683 	 * only switch the ifq_ops on the first ifq on an interface.
684 	 *
685 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
686 	 * works on a single ifq. because the code uses the ifq_ops
687 	 * on the first ifq (if_snd) to select a queue for an mbuf,
688 	 * by switching only the first one we change both the algorithm
689 	 * and force the routing of all new packets to it.
690 	 */
691 	ifq_attach(&ifp->if_snd, newops, args);
692 }
693 
694 void
if_start(struct ifnet * ifp)695 if_start(struct ifnet *ifp)
696 {
697 	KASSERT(ifp->if_qstart == if_qstart_compat);
698 	if_qstart_compat(&ifp->if_snd);
699 }
700 void
if_qstart_compat(struct ifqueue * ifq)701 if_qstart_compat(struct ifqueue *ifq)
702 {
703 	struct ifnet *ifp = ifq->ifq_if;
704 	int s;
705 
706 	/*
707 	 * the stack assumes that an interface can have multiple
708 	 * transmit rings, but a lot of drivers are still written
709 	 * so that interfaces and send rings have a 1:1 mapping.
710 	 * this provides compatibility between the stack and the older
711 	 * drivers by translating from the only queue they have
712 	 * (ifp->if_snd) back to the interface and calling if_start.
713 	 */
714 
715 	KERNEL_LOCK();
716 	s = splnet();
717 	(*ifp->if_start)(ifp);
718 	splx(s);
719 	KERNEL_UNLOCK();
720 }
721 
722 int
if_enqueue(struct ifnet * ifp,struct mbuf * m)723 if_enqueue(struct ifnet *ifp, struct mbuf *m)
724 {
725 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
726 
727 #if NPF > 0
728 	if (m->m_pkthdr.pf.delay > 0)
729 		return (pf_delay_pkt(m, ifp->if_index));
730 #endif
731 
732 #if NBRIDGE > 0
733 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
734 		int error;
735 
736 		error = bridge_enqueue(ifp, m);
737 		return (error);
738 	}
739 #endif
740 
741 #if NPF > 0
742 	pf_pkt_addr_changed(m);
743 #endif	/* NPF > 0 */
744 
745 	return ((*ifp->if_enqueue)(ifp, m));
746 }
747 
748 int
if_enqueue_ifq(struct ifnet * ifp,struct mbuf * m)749 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
750 {
751 	struct ifqueue *ifq = &ifp->if_snd;
752 	int error;
753 
754 	if (ifp->if_nifqs > 1) {
755 		unsigned int idx;
756 
757 		/*
758 		 * use the operations on the first ifq to pick which of
759 		 * the array gets this mbuf.
760 		 */
761 
762 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
763 		ifq = ifp->if_ifqs[idx];
764 	}
765 
766 	error = ifq_enqueue(ifq, m);
767 	if (error)
768 		return (error);
769 
770 	ifq_start(ifq);
771 
772 	return (0);
773 }
774 
775 void
if_input(struct ifnet * ifp,struct mbuf_list * ml)776 if_input(struct ifnet *ifp, struct mbuf_list *ml)
777 {
778 	ifiq_input(&ifp->if_rcv, ml);
779 }
780 
781 int
if_input_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)782 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
783 {
784 	int keepflags, keepcksum;
785 	uint16_t keepmss;
786 
787 #if NBPFILTER > 0
788 	/*
789 	 * Only send packets to bpf if they are destined to local
790 	 * addresses.
791 	 *
792 	 * if_input_local() is also called for SIMPLEX interfaces to
793 	 * duplicate packets for local use.  But don't dup them to bpf.
794 	 */
795 	if (ifp->if_flags & IFF_LOOPBACK) {
796 		caddr_t if_bpf = ifp->if_bpf;
797 
798 		if (if_bpf)
799 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
800 	}
801 #endif
802 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
803 	/*
804 	 * Preserve outgoing checksum flags, in case the packet is
805 	 * forwarded to another interface.  Then the checksum, which
806 	 * is now incorrect, will be calculated before sending.
807 	 */
808 	keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
809 	    M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
810 	    M_TCP_TSO);
811 	keepmss = m->m_pkthdr.ph_mss;
812 	m_resethdr(m);
813 	m->m_flags |= M_LOOP | keepflags;
814 	m->m_pkthdr.csum_flags = keepcksum;
815 	m->m_pkthdr.ph_mss = keepmss;
816 	m->m_pkthdr.ph_ifidx = ifp->if_index;
817 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
818 
819 	if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
820 		if (ifp->if_mtu > 0 &&
821 		    ((af == AF_INET &&
822 		    ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
823 		    (af == AF_INET6 &&
824 		    ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) {
825 			tcpstat_inc(tcps_inswlro);
826 			tcpstat_add(tcps_inpktlro,
827 			    (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
828 		} else {
829 			tcpstat_inc(tcps_inbadlro);
830 			m_freem(m);
831 			return (EPROTONOSUPPORT);
832 		}
833 	}
834 
835 	if (ISSET(keepcksum, M_TCP_CSUM_OUT))
836 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
837 	if (ISSET(keepcksum, M_UDP_CSUM_OUT))
838 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
839 	if (ISSET(keepcksum, M_ICMP_CSUM_OUT))
840 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK;
841 
842 	/* do not count multicast loopback and simplex interfaces */
843 	if (ISSET(ifp->if_flags, IFF_LOOPBACK)) {
844 		counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
845 		    m->m_pkthdr.len);
846 	}
847 
848 	switch (af) {
849 	case AF_INET:
850 		if (ISSET(keepcksum, M_IPV4_CSUM_OUT))
851 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
852 		ipv4_input(ifp, m);
853 		break;
854 #ifdef INET6
855 	case AF_INET6:
856 		ipv6_input(ifp, m);
857 		break;
858 #endif /* INET6 */
859 #ifdef MPLS
860 	case AF_MPLS:
861 		mpls_input(ifp, m);
862 		break;
863 #endif /* MPLS */
864 	default:
865 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
866 		m_freem(m);
867 		return (EAFNOSUPPORT);
868 	}
869 
870 	return (0);
871 }
872 
873 int
if_output_ml(struct ifnet * ifp,struct mbuf_list * ml,struct sockaddr * dst,struct rtentry * rt)874 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
875     struct sockaddr *dst, struct rtentry *rt)
876 {
877 	struct mbuf *m;
878 	int error = 0;
879 
880 	while ((m = ml_dequeue(ml)) != NULL) {
881 		error = ifp->if_output(ifp, m, dst, rt);
882 		if (error)
883 			break;
884 	}
885 	if (error)
886 		ml_purge(ml);
887 
888 	return error;
889 }
890 
891 int
if_output_tso(struct ifnet * ifp,struct mbuf ** mp,struct sockaddr * dst,struct rtentry * rt,u_int mtu)892 if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
893     struct rtentry *rt, u_int mtu)
894 {
895 	uint32_t ifcap;
896 	int error;
897 
898 	switch (dst->sa_family) {
899 	case AF_INET:
900 		ifcap = IFCAP_TSOv4;
901 		break;
902 #ifdef INET6
903 	case AF_INET6:
904 		ifcap = IFCAP_TSOv6;
905 		break;
906 #endif
907 	default:
908 		unhandled_af(dst->sa_family);
909 	}
910 
911 	/*
912 	 * Try to send with TSO first.  When forwarding LRO may set
913 	 * maximum segment size in mbuf header.  Chop TCP segment
914 	 * even if it would fit interface MTU to preserve maximum
915 	 * path MTU.
916 	 */
917 	error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
918 	if (error || *mp == NULL)
919 		return error;
920 
921 	if ((*mp)->m_pkthdr.len <= mtu) {
922 		switch (dst->sa_family) {
923 		case AF_INET:
924 			in_hdr_cksum_out(*mp, ifp);
925 			in_proto_cksum_out(*mp, ifp);
926 			break;
927 #ifdef INET6
928 		case AF_INET6:
929 			in6_proto_cksum_out(*mp, ifp);
930 			break;
931 #endif
932 		}
933 		error = ifp->if_output(ifp, *mp, dst, rt);
934 		*mp = NULL;
935 		return error;
936 	}
937 
938 	/* mp still contains mbuf that has to be fragmented or dropped. */
939 	return 0;
940 }
941 
942 int
if_output_mq(struct ifnet * ifp,struct mbuf_queue * mq,unsigned int * total,struct sockaddr * dst,struct rtentry * rt)943 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
944     struct sockaddr *dst, struct rtentry *rt)
945 {
946 	struct mbuf_list ml;
947 	unsigned int len;
948 	int error;
949 
950 	mq_delist(mq, &ml);
951 	len = ml_len(&ml);
952 	error = if_output_ml(ifp, &ml, dst, rt);
953 
954 	/* XXXSMP we also discard if other CPU enqueues */
955 	if (mq_len(mq) > 0) {
956 		/* mbuf is back in queue. Discard. */
957 		atomic_sub_int(total, len + mq_purge(mq));
958 	} else
959 		atomic_sub_int(total, len);
960 
961 	return error;
962 }
963 
964 int
if_output_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)965 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
966 {
967 	struct ifiqueue *ifiq;
968 	unsigned int flow = 0;
969 
970 	m->m_pkthdr.ph_family = af;
971 	m->m_pkthdr.ph_ifidx = ifp->if_index;
972 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
973 
974 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
975 		flow = m->m_pkthdr.ph_flowid;
976 
977 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
978 
979 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
980 }
981 
982 void
if_input_process(struct ifnet * ifp,struct mbuf_list * ml)983 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
984 {
985 	struct mbuf *m;
986 
987 	if (ml_empty(ml))
988 		return;
989 
990 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
991 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
992 
993 	/*
994 	 * We grab the shared netlock for packet processing in the softnet
995 	 * threads.  Packets can regrab the exclusive lock via queues.
996 	 * ioctl, sysctl, and socket syscall may use shared lock if access is
997 	 * read only or MP safe.  Usually they hold the exclusive net lock.
998 	 */
999 
1000 	NET_LOCK_SHARED();
1001 	while ((m = ml_dequeue(ml)) != NULL)
1002 		(*ifp->if_input)(ifp, m);
1003 	NET_UNLOCK_SHARED();
1004 }
1005 
1006 void
if_vinput(struct ifnet * ifp,struct mbuf * m)1007 if_vinput(struct ifnet *ifp, struct mbuf *m)
1008 {
1009 #if NBPFILTER > 0
1010 	caddr_t if_bpf;
1011 #endif
1012 
1013 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1014 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1015 
1016 	counters_pkt(ifp->if_counters,
1017 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1018 
1019 #if NPF > 0
1020 	pf_pkt_addr_changed(m);
1021 #endif
1022 
1023 #if NBPFILTER > 0
1024 	if_bpf = ifp->if_bpf;
1025 	if (if_bpf) {
1026 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
1027 			m_freem(m);
1028 			return;
1029 		}
1030 	}
1031 #endif
1032 
1033 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
1034 		(*ifp->if_input)(ifp, m);
1035 	else
1036 		m_freem(m);
1037 }
1038 
1039 void
if_netisr(void * unused)1040 if_netisr(void *unused)
1041 {
1042 	int n, t = 0;
1043 
1044 	NET_LOCK();
1045 
1046 	while ((n = netisr) != 0) {
1047 		/* Like sched_pause() but with a rwlock dance. */
1048 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
1049 			NET_UNLOCK();
1050 			yield();
1051 			NET_LOCK();
1052 		}
1053 
1054 		atomic_clearbits_int(&netisr, n);
1055 
1056 #if NETHER > 0
1057 		if (n & (1 << NETISR_ARP))
1058 			arpintr();
1059 #endif
1060 		if (n & (1 << NETISR_IP))
1061 			ipintr();
1062 #ifdef INET6
1063 		if (n & (1 << NETISR_IPV6))
1064 			ip6intr();
1065 #endif
1066 #if NPPP > 0
1067 		if (n & (1 << NETISR_PPP)) {
1068 			KERNEL_LOCK();
1069 			pppintr();
1070 			KERNEL_UNLOCK();
1071 		}
1072 #endif
1073 #if NBRIDGE > 0
1074 		if (n & (1 << NETISR_BRIDGE))
1075 			bridgeintr();
1076 #endif
1077 #ifdef PIPEX
1078 		if (n & (1 << NETISR_PIPEX))
1079 			pipexintr();
1080 #endif
1081 #if NPPPOE > 0
1082 		if (n & (1 << NETISR_PPPOE)) {
1083 			KERNEL_LOCK();
1084 			pppoeintr();
1085 			KERNEL_UNLOCK();
1086 		}
1087 #endif
1088 		t |= n;
1089 	}
1090 
1091 	NET_UNLOCK();
1092 }
1093 
1094 void
if_hooks_run(struct task_list * hooks)1095 if_hooks_run(struct task_list *hooks)
1096 {
1097 	struct task *t, *nt;
1098 	struct task cursor = { .t_func = NULL };
1099 	void (*func)(void *);
1100 	void *arg;
1101 
1102 	mtx_enter(&if_hooks_mtx);
1103 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1104 		if (t->t_func == NULL) { /* skip cursors */
1105 			nt = TAILQ_NEXT(t, t_entry);
1106 			continue;
1107 		}
1108 		func = t->t_func;
1109 		arg = t->t_arg;
1110 
1111 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1112 		mtx_leave(&if_hooks_mtx);
1113 
1114 		(*func)(arg);
1115 
1116 		mtx_enter(&if_hooks_mtx);
1117 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1118 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1119 	}
1120 	mtx_leave(&if_hooks_mtx);
1121 }
1122 
1123 void
if_remove(struct ifnet * ifp)1124 if_remove(struct ifnet *ifp)
1125 {
1126 	/* Remove the interface from the list of all interfaces. */
1127 	NET_LOCK();
1128 	TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1129 	NET_UNLOCK();
1130 
1131 	/* Remove the interface from the interface index map. */
1132 	if_idxmap_remove(ifp);
1133 
1134 	/* Sleep until the last reference is released. */
1135 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
1136 }
1137 
1138 void
if_deactivate(struct ifnet * ifp)1139 if_deactivate(struct ifnet *ifp)
1140 {
1141 	/*
1142 	 * Call detach hooks from head to tail.  To make sure detach
1143 	 * hooks are executed in the reverse order they were added, all
1144 	 * the hooks have to be added to the head!
1145 	 */
1146 
1147 	NET_LOCK();
1148 	if_hooks_run(&ifp->if_detachhooks);
1149 	NET_UNLOCK();
1150 }
1151 
1152 void
if_detachhook_add(struct ifnet * ifp,struct task * t)1153 if_detachhook_add(struct ifnet *ifp, struct task *t)
1154 {
1155 	mtx_enter(&if_hooks_mtx);
1156 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1157 	mtx_leave(&if_hooks_mtx);
1158 }
1159 
1160 void
if_detachhook_del(struct ifnet * ifp,struct task * t)1161 if_detachhook_del(struct ifnet *ifp, struct task *t)
1162 {
1163 	mtx_enter(&if_hooks_mtx);
1164 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1165 	mtx_leave(&if_hooks_mtx);
1166 }
1167 
1168 /*
1169  * Detach an interface from everything in the kernel.  Also deallocate
1170  * private resources.
1171  */
1172 void
if_detach(struct ifnet * ifp)1173 if_detach(struct ifnet *ifp)
1174 {
1175 	struct ifaddr *ifa;
1176 	struct ifg_list *ifg;
1177 	int i, s;
1178 
1179 	/* Undo pseudo-driver changes. */
1180 	if_deactivate(ifp);
1181 
1182 	/* Other CPUs must not have a reference before we start destroying. */
1183 	if_remove(ifp);
1184 
1185 	ifp->if_qstart = if_detached_qstart;
1186 
1187 	/* Wait until the start routines finished. */
1188 	ifq_barrier(&ifp->if_snd);
1189 	ifq_clr_oactive(&ifp->if_snd);
1190 
1191 #if NBPFILTER > 0
1192 	bpfdetach(ifp);
1193 #endif
1194 
1195 	NET_LOCK();
1196 	s = splnet();
1197 	ifp->if_ioctl = if_detached_ioctl;
1198 	ifp->if_watchdog = NULL;
1199 
1200 	/* Remove the watchdog timeout & task */
1201 	timeout_del(&ifp->if_slowtimo);
1202 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1203 
1204 	/* Remove the link state task */
1205 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1206 
1207 	rti_delete(ifp);
1208 #if NETHER > 0 && defined(NFSCLIENT)
1209 	if (ifp->if_index == revarp_ifidx)
1210 		revarp_ifidx = 0;
1211 #endif
1212 #ifdef MROUTING
1213 	vif_delete(ifp);
1214 #endif
1215 	in_ifdetach(ifp);
1216 #ifdef INET6
1217 	in6_ifdetach(ifp);
1218 #endif
1219 #if NPF > 0
1220 	pfi_detach_ifnet(ifp);
1221 #endif
1222 
1223 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1224 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1225 
1226 	if_free_sadl(ifp);
1227 
1228 	/* We should not have any address left at this point. */
1229 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1230 #ifdef DIAGNOSTIC
1231 		printf("%s: address list non empty\n", ifp->if_xname);
1232 #endif
1233 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1234 			ifa_del(ifp, ifa);
1235 			ifa->ifa_ifp = NULL;
1236 			ifafree(ifa);
1237 		}
1238 	}
1239 	splx(s);
1240 	NET_UNLOCK();
1241 
1242 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1243 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1244 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1245 
1246 #ifdef INET6
1247 	nd6_ifdetach(ifp);
1248 #endif
1249 
1250 	/* Announce that the interface is gone. */
1251 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1252 
1253 	if (ifp->if_counters != NULL)
1254 		if_counters_free(ifp);
1255 
1256 	for (i = 0; i < ifp->if_nifqs; i++)
1257 		ifq_destroy(ifp->if_ifqs[i]);
1258 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1259 		for (i = 1; i < ifp->if_nifqs; i++) {
1260 			free(ifp->if_ifqs[i], M_DEVBUF,
1261 			    sizeof(struct ifqueue));
1262 		}
1263 		free(ifp->if_ifqs, M_DEVBUF,
1264 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1265 	}
1266 
1267 	for (i = 0; i < ifp->if_niqs; i++)
1268 		ifiq_destroy(ifp->if_iqs[i]);
1269 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1270 		for (i = 1; i < ifp->if_niqs; i++) {
1271 			free(ifp->if_iqs[i], M_DEVBUF,
1272 			    sizeof(struct ifiqueue));
1273 		}
1274 		free(ifp->if_iqs, M_DEVBUF,
1275 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1276 	}
1277 }
1278 
1279 /*
1280  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1281  */
1282 int
if_isconnected(const struct ifnet * ifp0,unsigned int ifidx)1283 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1284 {
1285 	struct ifnet *ifp;
1286 	int connected = 0;
1287 
1288 	ifp = if_get(ifidx);
1289 	if (ifp == NULL)
1290 		return (0);
1291 
1292 	if (ifp0->if_index == ifp->if_index)
1293 		connected = 1;
1294 
1295 #if NBRIDGE > 0
1296 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1297 		connected = 1;
1298 #endif
1299 #if NCARP > 0
1300 	if ((ifp0->if_type == IFT_CARP &&
1301 	    ifp0->if_carpdevidx == ifp->if_index) ||
1302 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1303 		connected = 1;
1304 #endif
1305 
1306 	if_put(ifp);
1307 	return (connected);
1308 }
1309 
1310 /*
1311  * Create a clone network interface.
1312  */
1313 int
if_clone_create(const char * name,int rdomain)1314 if_clone_create(const char *name, int rdomain)
1315 {
1316 	struct if_clone *ifc;
1317 	struct ifnet *ifp;
1318 	int unit, ret;
1319 
1320 	ifc = if_clone_lookup(name, &unit);
1321 	if (ifc == NULL)
1322 		return (EINVAL);
1323 
1324 	rw_enter_write(&if_cloners_lock);
1325 
1326 	if ((ifp = if_unit(name)) != NULL) {
1327 		ret = EEXIST;
1328 		goto unlock;
1329 	}
1330 
1331 	ret = (*ifc->ifc_create)(ifc, unit);
1332 
1333 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1334 		goto unlock;
1335 
1336 	NET_LOCK();
1337 	if_addgroup(ifp, ifc->ifc_name);
1338 	if (rdomain != 0)
1339 		if_setrdomain(ifp, rdomain);
1340 	NET_UNLOCK();
1341 unlock:
1342 	rw_exit_write(&if_cloners_lock);
1343 	if_put(ifp);
1344 
1345 	return (ret);
1346 }
1347 
1348 /*
1349  * Destroy a clone network interface.
1350  */
1351 int
if_clone_destroy(const char * name)1352 if_clone_destroy(const char *name)
1353 {
1354 	struct if_clone *ifc;
1355 	struct ifnet *ifp;
1356 	int ret;
1357 
1358 	ifc = if_clone_lookup(name, NULL);
1359 	if (ifc == NULL)
1360 		return (EINVAL);
1361 
1362 	if (ifc->ifc_destroy == NULL)
1363 		return (EOPNOTSUPP);
1364 
1365 	rw_enter_write(&if_cloners_lock);
1366 
1367 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1368 		if (strcmp(ifp->if_xname, name) == 0)
1369 			break;
1370 	}
1371 	if (ifp == NULL) {
1372 		rw_exit_write(&if_cloners_lock);
1373 		return (ENXIO);
1374 	}
1375 
1376 	NET_LOCK();
1377 	if (ifp->if_flags & IFF_UP) {
1378 		int s;
1379 		s = splnet();
1380 		if_down(ifp);
1381 		splx(s);
1382 	}
1383 	NET_UNLOCK();
1384 	ret = (*ifc->ifc_destroy)(ifp);
1385 
1386 	rw_exit_write(&if_cloners_lock);
1387 
1388 	return (ret);
1389 }
1390 
1391 /*
1392  * Look up a network interface cloner.
1393  */
1394 struct if_clone *
if_clone_lookup(const char * name,int * unitp)1395 if_clone_lookup(const char *name, int *unitp)
1396 {
1397 	struct if_clone *ifc;
1398 	const char *cp;
1399 	int unit;
1400 
1401 	/* separate interface name from unit */
1402 	for (cp = name;
1403 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1404 	    cp++)
1405 		continue;
1406 
1407 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1408 		return (NULL);	/* No name or unit number */
1409 
1410 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1411 		return (NULL);	/* unit number 0 padded */
1412 
1413 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1414 		if (strlen(ifc->ifc_name) == cp - name &&
1415 		    !strncmp(name, ifc->ifc_name, cp - name))
1416 			break;
1417 	}
1418 
1419 	if (ifc == NULL)
1420 		return (NULL);
1421 
1422 	unit = 0;
1423 	while (cp - name < IFNAMSIZ && *cp) {
1424 		if (*cp < '0' || *cp > '9' ||
1425 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1426 			/* Bogus unit number. */
1427 			return (NULL);
1428 		}
1429 		unit = (unit * 10) + (*cp++ - '0');
1430 	}
1431 
1432 	if (unitp != NULL)
1433 		*unitp = unit;
1434 	return (ifc);
1435 }
1436 
1437 /*
1438  * Register a network interface cloner.
1439  */
1440 void
if_clone_attach(struct if_clone * ifc)1441 if_clone_attach(struct if_clone *ifc)
1442 {
1443 	/*
1444 	 * we are called at kernel boot by main(), when pseudo devices are
1445 	 * being attached. The main() is the only guy which may alter the
1446 	 * if_cloners. While system is running and main() is done with
1447 	 * initialization, the if_cloners becomes immutable.
1448 	 */
1449 	KASSERT(pdevinit_done == 0);
1450 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1451 	if_cloners_count++;
1452 }
1453 
1454 /*
1455  * Provide list of interface cloners to userspace.
1456  */
1457 int
if_clone_list(struct if_clonereq * ifcr)1458 if_clone_list(struct if_clonereq *ifcr)
1459 {
1460 	char outbuf[IFNAMSIZ], *dst;
1461 	struct if_clone *ifc;
1462 	int count, error = 0;
1463 
1464 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1465 		/* Just asking how many there are. */
1466 		ifcr->ifcr_total = if_cloners_count;
1467 		return (0);
1468 	}
1469 
1470 	if (ifcr->ifcr_count < 0)
1471 		return (EINVAL);
1472 
1473 	ifcr->ifcr_total = if_cloners_count;
1474 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1475 
1476 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1477 		if (count == 0)
1478 			break;
1479 		bzero(outbuf, sizeof outbuf);
1480 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1481 		error = copyout(outbuf, dst, IFNAMSIZ);
1482 		if (error)
1483 			break;
1484 		count--;
1485 		dst += IFNAMSIZ;
1486 	}
1487 
1488 	return (error);
1489 }
1490 
1491 /*
1492  * set queue congestion marker
1493  */
1494 void
if_congestion(void)1495 if_congestion(void)
1496 {
1497 	extern int ticks;
1498 
1499 	ifq_congestion = ticks;
1500 }
1501 
1502 int
if_congested(void)1503 if_congested(void)
1504 {
1505 	extern int ticks;
1506 	int diff;
1507 
1508 	diff = ticks - ifq_congestion;
1509 	if (diff < 0) {
1510 		ifq_congestion = ticks - hz;
1511 		return (0);
1512 	}
1513 
1514 	return (diff <= (hz / 100));
1515 }
1516 
1517 #define	equal(a1, a2)	\
1518 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1519 	(a1)->sa_len) == 0)
1520 
1521 /*
1522  * Locate an interface based on a complete address.
1523  */
1524 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr,u_int rtableid)1525 ifa_ifwithaddr(const struct sockaddr *addr, u_int rtableid)
1526 {
1527 	struct ifnet *ifp;
1528 	struct ifaddr *ifa;
1529 	u_int rdomain;
1530 
1531 	NET_ASSERT_LOCKED();
1532 
1533 	rdomain = rtable_l2(rtableid);
1534 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1535 		if (ifp->if_rdomain != rdomain)
1536 			continue;
1537 
1538 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1539 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1540 				continue;
1541 
1542 			if (equal(addr, ifa->ifa_addr)) {
1543 				return (ifa);
1544 			}
1545 		}
1546 	}
1547 	return (NULL);
1548 }
1549 
1550 /*
1551  * Locate the point to point interface with a given destination address.
1552  */
1553 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr,u_int rdomain)1554 ifa_ifwithdstaddr(const struct sockaddr *addr, u_int rdomain)
1555 {
1556 	struct ifnet *ifp;
1557 	struct ifaddr *ifa;
1558 
1559 	NET_ASSERT_LOCKED();
1560 
1561 	rdomain = rtable_l2(rdomain);
1562 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1563 		if (ifp->if_rdomain != rdomain)
1564 			continue;
1565 		if (ifp->if_flags & IFF_POINTOPOINT) {
1566 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1567 				if (ifa->ifa_addr->sa_family !=
1568 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1569 					continue;
1570 				if (equal(addr, ifa->ifa_dstaddr)) {
1571 					return (ifa);
1572 				}
1573 			}
1574 		}
1575 	}
1576 	return (NULL);
1577 }
1578 
1579 /*
1580  * Find an interface address specific to an interface best matching
1581  * a given address.
1582  */
1583 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)1584 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1585 {
1586 	struct ifaddr *ifa;
1587 	const char *cp, *cp2, *cp3;
1588 	char *cplim;
1589 	struct ifaddr *ifa_maybe = NULL;
1590 	u_int af = addr->sa_family;
1591 
1592 	if (af >= AF_MAX)
1593 		return (NULL);
1594 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1595 		if (ifa->ifa_addr->sa_family != af)
1596 			continue;
1597 		if (ifa_maybe == NULL)
1598 			ifa_maybe = ifa;
1599 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1600 			if (equal(addr, ifa->ifa_addr) ||
1601 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1602 				return (ifa);
1603 			continue;
1604 		}
1605 		cp = addr->sa_data;
1606 		cp2 = ifa->ifa_addr->sa_data;
1607 		cp3 = ifa->ifa_netmask->sa_data;
1608 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1609 		for (; cp3 < cplim; cp3++)
1610 			if ((*cp++ ^ *cp2++) & *cp3)
1611 				break;
1612 		if (cp3 == cplim)
1613 			return (ifa);
1614 	}
1615 	return (ifa_maybe);
1616 }
1617 
1618 void
if_rtrequest_dummy(struct ifnet * ifp,int req,struct rtentry * rt)1619 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1620 {
1621 }
1622 
1623 /*
1624  * Default action when installing a local route on a point-to-point
1625  * interface.
1626  */
1627 void
p2p_rtrequest(struct ifnet * ifp,int req,struct rtentry * rt)1628 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1629 {
1630 	struct ifnet *lo0ifp;
1631 	struct ifaddr *ifa, *lo0ifa;
1632 
1633 	switch (req) {
1634 	case RTM_ADD:
1635 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1636 			break;
1637 
1638 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1639 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1640 			    rt_key(rt)->sa_len) == 0)
1641 				break;
1642 		}
1643 
1644 		if (ifa == NULL)
1645 			break;
1646 
1647 		KASSERT(ifa == rt->rt_ifa);
1648 
1649 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1650 		KASSERT(lo0ifp != NULL);
1651 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1652 			if (lo0ifa->ifa_addr->sa_family ==
1653 			    ifa->ifa_addr->sa_family)
1654 				break;
1655 		}
1656 		if_put(lo0ifp);
1657 
1658 		if (lo0ifa == NULL)
1659 			break;
1660 
1661 		rt->rt_flags &= ~RTF_LLINFO;
1662 		break;
1663 	case RTM_DELETE:
1664 	case RTM_RESOLVE:
1665 	default:
1666 		break;
1667 	}
1668 }
1669 
1670 int
p2p_bpf_mtap(caddr_t if_bpf,const struct mbuf * m,u_int dir)1671 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1672 {
1673 #if NBPFILTER > 0
1674 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1675 #else
1676 	return (0);
1677 #endif
1678 }
1679 
1680 void
p2p_input(struct ifnet * ifp,struct mbuf * m)1681 p2p_input(struct ifnet *ifp, struct mbuf *m)
1682 {
1683 	void (*input)(struct ifnet *, struct mbuf *);
1684 
1685 	switch (m->m_pkthdr.ph_family) {
1686 	case AF_INET:
1687 		input = ipv4_input;
1688 		break;
1689 #ifdef INET6
1690 	case AF_INET6:
1691 		input = ipv6_input;
1692 		break;
1693 #endif
1694 #ifdef MPLS
1695 	case AF_MPLS:
1696 		input = mpls_input;
1697 		break;
1698 #endif
1699 	default:
1700 		m_freem(m);
1701 		return;
1702 	}
1703 
1704 	(*input)(ifp, m);
1705 }
1706 
1707 /*
1708  * Bring down all interfaces
1709  */
1710 void
if_downall(void)1711 if_downall(void)
1712 {
1713 	struct ifreq ifrq;	/* XXX only partly built */
1714 	struct ifnet *ifp;
1715 
1716 	NET_LOCK();
1717 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1718 		if ((ifp->if_flags & IFF_UP) == 0)
1719 			continue;
1720 		if_down(ifp);
1721 		ifrq.ifr_flags = ifp->if_flags;
1722 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1723 	}
1724 	NET_UNLOCK();
1725 }
1726 
1727 /*
1728  * Mark an interface down and notify protocols of
1729  * the transition.
1730  */
1731 void
if_down(struct ifnet * ifp)1732 if_down(struct ifnet *ifp)
1733 {
1734 	NET_ASSERT_LOCKED();
1735 
1736 	ifp->if_flags &= ~IFF_UP;
1737 	getmicrotime(&ifp->if_lastchange);
1738 	ifq_purge(&ifp->if_snd);
1739 
1740 	if_linkstate(ifp);
1741 }
1742 
1743 /*
1744  * Mark an interface up and notify protocols of
1745  * the transition.
1746  */
1747 void
if_up(struct ifnet * ifp)1748 if_up(struct ifnet *ifp)
1749 {
1750 	NET_ASSERT_LOCKED();
1751 
1752 	ifp->if_flags |= IFF_UP;
1753 	getmicrotime(&ifp->if_lastchange);
1754 
1755 #ifdef INET6
1756 	/* Userland expects the kernel to set ::1 on default lo(4). */
1757 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1758 		in6_ifattach(ifp);
1759 #endif
1760 
1761 	if_linkstate(ifp);
1762 }
1763 
1764 /*
1765  * Notify userland, the routing table and hooks owner of
1766  * a link-state transition.
1767  */
1768 void
if_linkstate_task(void * xifidx)1769 if_linkstate_task(void *xifidx)
1770 {
1771 	unsigned int ifidx = (unsigned long)xifidx;
1772 	struct ifnet *ifp;
1773 
1774 	NET_LOCK();
1775 	KERNEL_LOCK();
1776 
1777 	ifp = if_get(ifidx);
1778 	if (ifp != NULL)
1779 		if_linkstate(ifp);
1780 	if_put(ifp);
1781 
1782 	KERNEL_UNLOCK();
1783 	NET_UNLOCK();
1784 }
1785 
1786 void
if_linkstate(struct ifnet * ifp)1787 if_linkstate(struct ifnet *ifp)
1788 {
1789 	NET_ASSERT_LOCKED();
1790 
1791 	if (panicstr == NULL) {
1792 		rtm_ifchg(ifp);
1793 		rt_if_track(ifp);
1794 	}
1795 
1796 	if_hooks_run(&ifp->if_linkstatehooks);
1797 }
1798 
1799 void
if_linkstatehook_add(struct ifnet * ifp,struct task * t)1800 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1801 {
1802 	mtx_enter(&if_hooks_mtx);
1803 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1804 	mtx_leave(&if_hooks_mtx);
1805 }
1806 
1807 void
if_linkstatehook_del(struct ifnet * ifp,struct task * t)1808 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1809 {
1810 	mtx_enter(&if_hooks_mtx);
1811 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1812 	mtx_leave(&if_hooks_mtx);
1813 }
1814 
1815 /*
1816  * Schedule a link state change task.
1817  */
1818 void
if_link_state_change(struct ifnet * ifp)1819 if_link_state_change(struct ifnet *ifp)
1820 {
1821 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1822 }
1823 
1824 /*
1825  * Handle interface watchdog timer routine.  Called
1826  * from softclock, we decrement timer (if set) and
1827  * call the appropriate interface routine on expiration.
1828  */
1829 void
if_slowtimo(void * arg)1830 if_slowtimo(void *arg)
1831 {
1832 	struct ifnet *ifp = arg;
1833 	int s = splnet();
1834 
1835 	if (ifp->if_watchdog) {
1836 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1837 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1838 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1839 	}
1840 	splx(s);
1841 }
1842 
1843 void
if_watchdog_task(void * xifidx)1844 if_watchdog_task(void *xifidx)
1845 {
1846 	unsigned int ifidx = (unsigned long)xifidx;
1847 	struct ifnet *ifp;
1848 	int s;
1849 
1850 	ifp = if_get(ifidx);
1851 	if (ifp == NULL)
1852 		return;
1853 
1854 	KERNEL_LOCK();
1855 	s = splnet();
1856 	if (ifp->if_watchdog)
1857 		(*ifp->if_watchdog)(ifp);
1858 	splx(s);
1859 	KERNEL_UNLOCK();
1860 
1861 	if_put(ifp);
1862 }
1863 
1864 /*
1865  * Map interface name to interface structure pointer.
1866  */
1867 struct ifnet *
if_unit(const char * name)1868 if_unit(const char *name)
1869 {
1870 	struct ifnet *ifp;
1871 
1872 	KERNEL_ASSERT_LOCKED();
1873 
1874 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1875 		if (strcmp(ifp->if_xname, name) == 0) {
1876 			if_ref(ifp);
1877 			return (ifp);
1878 		}
1879 	}
1880 
1881 	return (NULL);
1882 }
1883 
1884 /*
1885  * Map interface index to interface structure pointer.
1886  */
1887 struct ifnet *
if_get(unsigned int index)1888 if_get(unsigned int index)
1889 {
1890 	struct ifnet **if_map;
1891 	struct ifnet *ifp = NULL;
1892 
1893 	if (index == 0)
1894 		return (NULL);
1895 
1896 	smr_read_enter();
1897 	if_map = SMR_PTR_GET(&if_idxmap.map);
1898 	if (index < if_idxmap_limit(if_map)) {
1899 		ifp = SMR_PTR_GET(&if_map[index]);
1900 		if (ifp != NULL) {
1901 			KASSERT(ifp->if_index == index);
1902 			if_ref(ifp);
1903 		}
1904 	}
1905 	smr_read_leave();
1906 
1907 	return (ifp);
1908 }
1909 
1910 struct ifnet *
if_ref(struct ifnet * ifp)1911 if_ref(struct ifnet *ifp)
1912 {
1913 	refcnt_take(&ifp->if_refcnt);
1914 
1915 	return (ifp);
1916 }
1917 
1918 void
if_put(struct ifnet * ifp)1919 if_put(struct ifnet *ifp)
1920 {
1921 	if (ifp == NULL)
1922 		return;
1923 
1924 	refcnt_rele_wake(&ifp->if_refcnt);
1925 }
1926 
1927 int
if_setlladdr(struct ifnet * ifp,const uint8_t * lladdr)1928 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1929 {
1930 	if (ifp->if_sadl == NULL)
1931 		return (EINVAL);
1932 
1933 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1934 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1935 
1936 	return (0);
1937 }
1938 
1939 int
if_createrdomain(int rdomain,struct ifnet * ifp)1940 if_createrdomain(int rdomain, struct ifnet *ifp)
1941 {
1942 	int error;
1943 	struct ifnet *loifp;
1944 	char loifname[IFNAMSIZ];
1945 	unsigned int unit = rdomain;
1946 
1947 	if ((error = rtable_add(rdomain)) != 0)
1948 		return (error);
1949 	if (!rtable_empty(rdomain))
1950 		return (EEXIST);
1951 
1952 	/* Create rdomain including its loopback if with unit == rdomain */
1953 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1954 	error = if_clone_create(loifname, 0);
1955 	if ((loifp = if_unit(loifname)) == NULL)
1956 		return (ENXIO);
1957 	if (error && (ifp != loifp || error != EEXIST)) {
1958 		if_put(loifp);
1959 		return (error);
1960 	}
1961 
1962 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1963 	loifp->if_rdomain = rdomain;
1964 	if_put(loifp);
1965 
1966 	return (0);
1967 }
1968 
1969 int
if_setrdomain(struct ifnet * ifp,int rdomain)1970 if_setrdomain(struct ifnet *ifp, int rdomain)
1971 {
1972 	struct ifreq ifr;
1973 	int error, up = 0, s;
1974 
1975 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1976 		return (EINVAL);
1977 
1978 	if (rdomain != ifp->if_rdomain &&
1979 	    (ifp->if_flags & IFF_LOOPBACK) &&
1980 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1981 		return (EPERM);
1982 
1983 	if (!rtable_exists(rdomain))
1984 		return (ESRCH);
1985 
1986 	/* make sure that the routing table is a real rdomain */
1987 	if (rdomain != rtable_l2(rdomain))
1988 		return (EINVAL);
1989 
1990 	if (rdomain != ifp->if_rdomain) {
1991 		s = splnet();
1992 		/*
1993 		 * We are tearing down the world.
1994 		 * Take down the IF so:
1995 		 * 1. everything that cares gets a message
1996 		 * 2. the automagic IPv6 bits are recreated
1997 		 */
1998 		if (ifp->if_flags & IFF_UP) {
1999 			up = 1;
2000 			if_down(ifp);
2001 		}
2002 		rti_delete(ifp);
2003 #ifdef MROUTING
2004 		vif_delete(ifp);
2005 #endif
2006 		in_ifdetach(ifp);
2007 #ifdef INET6
2008 		in6_ifdetach(ifp);
2009 #endif
2010 		splx(s);
2011 	}
2012 
2013 	/* Let devices like enc(4) or mpe(4) know about the change */
2014 	ifr.ifr_rdomainid = rdomain;
2015 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
2016 	    (caddr_t)&ifr)) != ENOTTY)
2017 		return (error);
2018 	error = 0;
2019 
2020 	/* Add interface to the specified rdomain */
2021 	ifp->if_rdomain = rdomain;
2022 
2023 	/* If we took down the IF, bring it back */
2024 	if (up) {
2025 		s = splnet();
2026 		if_up(ifp);
2027 		splx(s);
2028 	}
2029 
2030 	return (0);
2031 }
2032 
2033 /*
2034  * Interface ioctls.
2035  */
2036 int
ifioctl(struct socket * so,u_long cmd,caddr_t data,struct proc * p)2037 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
2038 {
2039 	struct ifnet *ifp;
2040 	struct ifreq *ifr = (struct ifreq *)data;
2041 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
2042 	struct if_afreq *ifar = (struct if_afreq *)data;
2043 	char ifdescrbuf[IFDESCRSIZE];
2044 	char ifrtlabelbuf[RTLABEL_LEN];
2045 	int s, error = 0, oif_xflags;
2046 	size_t bytesdone;
2047 	unsigned short oif_flags;
2048 
2049 	switch (cmd) {
2050 	case SIOCIFCREATE:
2051 		if ((error = suser(p)) != 0)
2052 			return (error);
2053 		KERNEL_LOCK();
2054 		error = if_clone_create(ifr->ifr_name, 0);
2055 		KERNEL_UNLOCK();
2056 		return (error);
2057 	case SIOCIFDESTROY:
2058 		if ((error = suser(p)) != 0)
2059 			return (error);
2060 		KERNEL_LOCK();
2061 		error = if_clone_destroy(ifr->ifr_name);
2062 		KERNEL_UNLOCK();
2063 		return (error);
2064 	case SIOCSIFGATTR:
2065 		if ((error = suser(p)) != 0)
2066 			return (error);
2067 		KERNEL_LOCK();
2068 		NET_LOCK();
2069 		error = if_setgroupattribs(data);
2070 		NET_UNLOCK();
2071 		KERNEL_UNLOCK();
2072 		return (error);
2073 	case SIOCGIFCONF:
2074 	case SIOCIFGCLONERS:
2075 	case SIOCGIFGMEMB:
2076 	case SIOCGIFGATTR:
2077 	case SIOCGIFGLIST:
2078 	case SIOCGIFFLAGS:
2079 	case SIOCGIFXFLAGS:
2080 	case SIOCGIFMETRIC:
2081 	case SIOCGIFMTU:
2082 	case SIOCGIFHARDMTU:
2083 	case SIOCGIFDATA:
2084 	case SIOCGIFDESCR:
2085 	case SIOCGIFRTLABEL:
2086 	case SIOCGIFPRIORITY:
2087 	case SIOCGIFRDOMAIN:
2088 	case SIOCGIFGROUP:
2089 	case SIOCGIFLLPRIO:
2090 		error = ifioctl_get(cmd, data);
2091 		return (error);
2092 	}
2093 
2094 	KERNEL_LOCK();
2095 
2096 	ifp = if_unit(ifr->ifr_name);
2097 	if (ifp == NULL) {
2098 		KERNEL_UNLOCK();
2099 		return (ENXIO);
2100 	}
2101 	oif_flags = ifp->if_flags;
2102 	oif_xflags = ifp->if_xflags;
2103 
2104 	switch (cmd) {
2105 	case SIOCIFAFATTACH:
2106 	case SIOCIFAFDETACH:
2107 		if ((error = suser(p)) != 0)
2108 			break;
2109 		NET_LOCK();
2110 		switch (ifar->ifar_af) {
2111 		case AF_INET:
2112 			/* attach is a noop for AF_INET */
2113 			if (cmd == SIOCIFAFDETACH)
2114 				in_ifdetach(ifp);
2115 			break;
2116 #ifdef INET6
2117 		case AF_INET6:
2118 			if (cmd == SIOCIFAFATTACH)
2119 				error = in6_ifattach(ifp);
2120 			else
2121 				in6_ifdetach(ifp);
2122 			break;
2123 #endif /* INET6 */
2124 		default:
2125 			error = EAFNOSUPPORT;
2126 		}
2127 		NET_UNLOCK();
2128 		break;
2129 
2130 	case SIOCSIFXFLAGS:
2131 		if ((error = suser(p)) != 0)
2132 			break;
2133 
2134 		NET_LOCK();
2135 #ifdef INET6
2136 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2137 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2138 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2139 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2140 			error = in6_ifattach(ifp);
2141 			if (error != 0) {
2142 				NET_UNLOCK();
2143 				break;
2144 			}
2145 		}
2146 
2147 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2148 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2149 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2150 
2151 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2152 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2153 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2154 
2155 #endif	/* INET6 */
2156 
2157 #ifdef MPLS
2158 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2159 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2160 			s = splnet();
2161 			ifp->if_xflags |= IFXF_MPLS;
2162 			ifp->if_ll_output = ifp->if_output;
2163 			ifp->if_output = mpls_output;
2164 			splx(s);
2165 		}
2166 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2167 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2168 			s = splnet();
2169 			ifp->if_xflags &= ~IFXF_MPLS;
2170 			ifp->if_output = ifp->if_ll_output;
2171 			ifp->if_ll_output = NULL;
2172 			splx(s);
2173 		}
2174 #endif	/* MPLS */
2175 
2176 #ifndef SMALL_KERNEL
2177 		if (ifp->if_capabilities & IFCAP_WOL) {
2178 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2179 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2180 				s = splnet();
2181 				ifp->if_xflags |= IFXF_WOL;
2182 				error = ifp->if_wol(ifp, 1);
2183 				splx(s);
2184 			}
2185 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2186 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2187 				s = splnet();
2188 				ifp->if_xflags &= ~IFXF_WOL;
2189 				error = ifp->if_wol(ifp, 0);
2190 				splx(s);
2191 			}
2192 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2193 			ifr->ifr_flags &= ~IFXF_WOL;
2194 			error = ENOTSUP;
2195 		}
2196 #endif
2197 		if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2198 		    ISSET(ifp->if_xflags, IFXF_LRO))
2199 			error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2200 
2201 		if (error == 0)
2202 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2203 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2204 
2205 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2206 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2207 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2208 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2209 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2210 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2211 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2212 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2213 			goto forceup;
2214 		}
2215 
2216 		NET_UNLOCK();
2217 		break;
2218 
2219 	case SIOCSIFFLAGS:
2220 		if ((error = suser(p)) != 0)
2221 			break;
2222 
2223 		NET_LOCK();
2224 forceup:
2225 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2226 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2227 		error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2228 		if (error != 0) {
2229 			ifp->if_flags = oif_flags;
2230 			if (cmd == SIOCSIFXFLAGS)
2231 				ifp->if_xflags = oif_xflags;
2232 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2233 			s = splnet();
2234 			if (ISSET(ifp->if_flags, IFF_UP))
2235 				if_up(ifp);
2236 			else
2237 				if_down(ifp);
2238 			splx(s);
2239 		}
2240 		NET_UNLOCK();
2241 		break;
2242 
2243 	case SIOCSIFMETRIC:
2244 		if ((error = suser(p)) != 0)
2245 			break;
2246 		NET_LOCK();
2247 		ifp->if_metric = ifr->ifr_metric;
2248 		NET_UNLOCK();
2249 		break;
2250 
2251 	case SIOCSIFMTU:
2252 		if ((error = suser(p)) != 0)
2253 			break;
2254 		NET_LOCK();
2255 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2256 		NET_UNLOCK();
2257 		if (error == 0)
2258 			rtm_ifchg(ifp);
2259 		break;
2260 
2261 	case SIOCSIFDESCR:
2262 		if ((error = suser(p)) != 0)
2263 			break;
2264 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2265 		    IFDESCRSIZE, &bytesdone);
2266 		if (error == 0) {
2267 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2268 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2269 		}
2270 		break;
2271 
2272 	case SIOCSIFRTLABEL:
2273 		if ((error = suser(p)) != 0)
2274 			break;
2275 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2276 		    RTLABEL_LEN, &bytesdone);
2277 		if (error == 0) {
2278 			rtlabel_unref(ifp->if_rtlabelid);
2279 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2280 		}
2281 		break;
2282 
2283 	case SIOCSIFPRIORITY:
2284 		if ((error = suser(p)) != 0)
2285 			break;
2286 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2287 			error = EINVAL;
2288 			break;
2289 		}
2290 		ifp->if_priority = ifr->ifr_metric;
2291 		break;
2292 
2293 	case SIOCSIFRDOMAIN:
2294 		if ((error = suser(p)) != 0)
2295 			break;
2296 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2297 		if (!error || error == EEXIST) {
2298 			NET_LOCK();
2299 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2300 			NET_UNLOCK();
2301 		}
2302 		break;
2303 
2304 	case SIOCAIFGROUP:
2305 		if ((error = suser(p)))
2306 			break;
2307 		NET_LOCK();
2308 		error = if_addgroup(ifp, ifgr->ifgr_group);
2309 		if (error == 0) {
2310 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2311 			if (error == ENOTTY)
2312 				error = 0;
2313 		}
2314 		NET_UNLOCK();
2315 		break;
2316 
2317 	case SIOCDIFGROUP:
2318 		if ((error = suser(p)))
2319 			break;
2320 		NET_LOCK();
2321 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2322 		if (error == ENOTTY)
2323 			error = 0;
2324 		if (error == 0)
2325 			error = if_delgroup(ifp, ifgr->ifgr_group);
2326 		NET_UNLOCK();
2327 		break;
2328 
2329 	case SIOCSIFLLADDR:
2330 		if ((error = suser(p)))
2331 			break;
2332 		if ((ifp->if_sadl == NULL) ||
2333 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2334 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2335 			error = EINVAL;
2336 			break;
2337 		}
2338 		NET_LOCK();
2339 		switch (ifp->if_type) {
2340 		case IFT_ETHER:
2341 		case IFT_CARP:
2342 		case IFT_XETHER:
2343 		case IFT_ISO88025:
2344 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2345 			if (error == ENOTTY)
2346 				error = 0;
2347 			if (error == 0)
2348 				error = if_setlladdr(ifp,
2349 				    ifr->ifr_addr.sa_data);
2350 			break;
2351 		default:
2352 			error = ENODEV;
2353 		}
2354 
2355 		if (error == 0)
2356 			ifnewlladdr(ifp);
2357 		NET_UNLOCK();
2358 		if (error == 0)
2359 			rtm_ifchg(ifp);
2360 		break;
2361 
2362 	case SIOCSIFLLPRIO:
2363 		if ((error = suser(p)))
2364 			break;
2365 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2366 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2367 			error = EINVAL;
2368 			break;
2369 		}
2370 		NET_LOCK();
2371 		ifp->if_llprio = ifr->ifr_llprio;
2372 		NET_UNLOCK();
2373 		break;
2374 
2375 	case SIOCGIFSFFPAGE:
2376 		error = suser(p);
2377 		if (error != 0)
2378 			break;
2379 
2380 		error = if_sffpage_check(data);
2381 		if (error != 0)
2382 			break;
2383 
2384 		/* don't take NET_LOCK because i2c reads take a long time */
2385 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2386 		break;
2387 
2388 	case SIOCSIFMEDIA:
2389 		if ((error = suser(p)) != 0)
2390 			break;
2391 		/* FALLTHROUGH */
2392 	case SIOCGIFMEDIA:
2393 		/* net lock is not needed */
2394 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2395 		break;
2396 
2397 	case SIOCSETKALIVE:
2398 	case SIOCDIFPHYADDR:
2399 	case SIOCSLIFPHYADDR:
2400 	case SIOCSLIFPHYRTABLE:
2401 	case SIOCSLIFPHYTTL:
2402 	case SIOCSLIFPHYDF:
2403 	case SIOCSLIFPHYECN:
2404 	case SIOCADDMULTI:
2405 	case SIOCDELMULTI:
2406 	case SIOCSVNETID:
2407 	case SIOCDVNETID:
2408 	case SIOCSVNETFLOWID:
2409 	case SIOCSTXHPRIO:
2410 	case SIOCSRXHPRIO:
2411 	case SIOCSIFPAIR:
2412 	case SIOCSIFPARENT:
2413 	case SIOCDIFPARENT:
2414 	case SIOCSETMPWCFG:
2415 	case SIOCSETLABEL:
2416 	case SIOCDELLABEL:
2417 	case SIOCSPWE3CTRLWORD:
2418 	case SIOCSPWE3FAT:
2419 	case SIOCSPWE3NEIGHBOR:
2420 	case SIOCDPWE3NEIGHBOR:
2421 #if NBRIDGE > 0
2422 	case SIOCBRDGADD:
2423 	case SIOCBRDGDEL:
2424 	case SIOCBRDGSIFFLGS:
2425 	case SIOCBRDGSCACHE:
2426 	case SIOCBRDGADDS:
2427 	case SIOCBRDGDELS:
2428 	case SIOCBRDGSADDR:
2429 	case SIOCBRDGSTO:
2430 	case SIOCBRDGDADDR:
2431 	case SIOCBRDGFLUSH:
2432 	case SIOCBRDGADDL:
2433 	case SIOCBRDGSIFPROT:
2434 	case SIOCBRDGARL:
2435 	case SIOCBRDGFRL:
2436 	case SIOCBRDGSPRI:
2437 	case SIOCBRDGSHT:
2438 	case SIOCBRDGSFD:
2439 	case SIOCBRDGSMA:
2440 	case SIOCBRDGSIFPRIO:
2441 	case SIOCBRDGSIFCOST:
2442 	case SIOCBRDGSTXHC:
2443 	case SIOCBRDGSPROTO:
2444 #endif
2445 		if ((error = suser(p)) != 0)
2446 			break;
2447 		/* FALLTHROUGH */
2448 	default:
2449 		error = pru_control(so, cmd, data, ifp);
2450 		if (error != EOPNOTSUPP)
2451 			break;
2452 		switch (cmd) {
2453 		case SIOCAIFADDR:
2454 		case SIOCDIFADDR:
2455 		case SIOCSIFADDR:
2456 		case SIOCSIFNETMASK:
2457 		case SIOCSIFDSTADDR:
2458 		case SIOCSIFBRDADDR:
2459 #ifdef INET6
2460 		case SIOCAIFADDR_IN6:
2461 		case SIOCDIFADDR_IN6:
2462 #endif
2463 			error = suser(p);
2464 			break;
2465 		default:
2466 			error = 0;
2467 			break;
2468 		}
2469 		if (error)
2470 			break;
2471 		NET_LOCK();
2472 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2473 		NET_UNLOCK();
2474 		break;
2475 	}
2476 
2477 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2478 		/* if_up() and if_down() already sent an update, skip here */
2479 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2480 			rtm_ifchg(ifp);
2481 	}
2482 
2483 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2484 		getmicrotime(&ifp->if_lastchange);
2485 
2486 	KERNEL_UNLOCK();
2487 
2488 	if_put(ifp);
2489 
2490 	return (error);
2491 }
2492 
2493 int
ifioctl_get(u_long cmd,caddr_t data)2494 ifioctl_get(u_long cmd, caddr_t data)
2495 {
2496 	struct ifnet *ifp;
2497 	struct ifreq *ifr = (struct ifreq *)data;
2498 	char ifdescrbuf[IFDESCRSIZE];
2499 	char ifrtlabelbuf[RTLABEL_LEN];
2500 	int error = 0;
2501 	size_t bytesdone;
2502 
2503 	switch(cmd) {
2504 	case SIOCGIFCONF:
2505 		NET_LOCK_SHARED();
2506 		error = ifconf(data);
2507 		NET_UNLOCK_SHARED();
2508 		return (error);
2509 	case SIOCIFGCLONERS:
2510 		error = if_clone_list((struct if_clonereq *)data);
2511 		return (error);
2512 	case SIOCGIFGMEMB:
2513 		NET_LOCK_SHARED();
2514 		error = if_getgroupmembers(data);
2515 		NET_UNLOCK_SHARED();
2516 		return (error);
2517 	case SIOCGIFGATTR:
2518 		NET_LOCK_SHARED();
2519 		error = if_getgroupattribs(data);
2520 		NET_UNLOCK_SHARED();
2521 		return (error);
2522 	case SIOCGIFGLIST:
2523 		NET_LOCK_SHARED();
2524 		error = if_getgrouplist(data);
2525 		NET_UNLOCK_SHARED();
2526 		return (error);
2527 	}
2528 
2529 	KERNEL_LOCK();
2530 
2531 	ifp = if_unit(ifr->ifr_name);
2532 	if (ifp == NULL) {
2533 		KERNEL_UNLOCK();
2534 		return (ENXIO);
2535 	}
2536 
2537 	NET_LOCK_SHARED();
2538 
2539 	switch(cmd) {
2540 	case SIOCGIFFLAGS:
2541 		ifr->ifr_flags = ifp->if_flags;
2542 		if (ifq_is_oactive(&ifp->if_snd))
2543 			ifr->ifr_flags |= IFF_OACTIVE;
2544 		break;
2545 
2546 	case SIOCGIFXFLAGS:
2547 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2548 		break;
2549 
2550 	case SIOCGIFMETRIC:
2551 		ifr->ifr_metric = ifp->if_metric;
2552 		break;
2553 
2554 	case SIOCGIFMTU:
2555 		ifr->ifr_mtu = ifp->if_mtu;
2556 		break;
2557 
2558 	case SIOCGIFHARDMTU:
2559 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2560 		break;
2561 
2562 	case SIOCGIFDATA: {
2563 		struct if_data ifdata;
2564 		if_getdata(ifp, &ifdata);
2565 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2566 		break;
2567 	}
2568 
2569 	case SIOCGIFDESCR:
2570 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2571 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2572 		    &bytesdone);
2573 		break;
2574 
2575 	case SIOCGIFRTLABEL:
2576 		if (ifp->if_rtlabelid && rtlabel_id2name(ifp->if_rtlabelid,
2577 		    ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2578 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2579 			    RTLABEL_LEN, &bytesdone);
2580 		} else
2581 			error = ENOENT;
2582 		break;
2583 
2584 	case SIOCGIFPRIORITY:
2585 		ifr->ifr_metric = ifp->if_priority;
2586 		break;
2587 
2588 	case SIOCGIFRDOMAIN:
2589 		ifr->ifr_rdomainid = ifp->if_rdomain;
2590 		break;
2591 
2592 	case SIOCGIFGROUP:
2593 		error = if_getgroup(data, ifp);
2594 		break;
2595 
2596 	case SIOCGIFLLPRIO:
2597 		ifr->ifr_llprio = ifp->if_llprio;
2598 		break;
2599 
2600 	default:
2601 		panic("invalid ioctl %lu", cmd);
2602 	}
2603 
2604 	NET_UNLOCK_SHARED();
2605 
2606 	KERNEL_UNLOCK();
2607 
2608 	if_put(ifp);
2609 
2610 	return (error);
2611 }
2612 
2613 static int
if_sffpage_check(const caddr_t data)2614 if_sffpage_check(const caddr_t data)
2615 {
2616 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2617 
2618 	switch (sff->sff_addr) {
2619 	case IFSFF_ADDR_EEPROM:
2620 	case IFSFF_ADDR_DDM:
2621 		break;
2622 	default:
2623 		return (EINVAL);
2624 	}
2625 
2626 	return (0);
2627 }
2628 
2629 int
if_txhprio_l2_check(int hdrprio)2630 if_txhprio_l2_check(int hdrprio)
2631 {
2632 	switch (hdrprio) {
2633 	case IF_HDRPRIO_PACKET:
2634 		return (0);
2635 	default:
2636 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2637 			return (0);
2638 		break;
2639 	}
2640 
2641 	return (EINVAL);
2642 }
2643 
2644 int
if_txhprio_l3_check(int hdrprio)2645 if_txhprio_l3_check(int hdrprio)
2646 {
2647 	switch (hdrprio) {
2648 	case IF_HDRPRIO_PACKET:
2649 	case IF_HDRPRIO_PAYLOAD:
2650 		return (0);
2651 	default:
2652 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2653 			return (0);
2654 		break;
2655 	}
2656 
2657 	return (EINVAL);
2658 }
2659 
2660 int
if_rxhprio_l2_check(int hdrprio)2661 if_rxhprio_l2_check(int hdrprio)
2662 {
2663 	switch (hdrprio) {
2664 	case IF_HDRPRIO_PACKET:
2665 	case IF_HDRPRIO_OUTER:
2666 		return (0);
2667 	default:
2668 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2669 			return (0);
2670 		break;
2671 	}
2672 
2673 	return (EINVAL);
2674 }
2675 
2676 int
if_rxhprio_l3_check(int hdrprio)2677 if_rxhprio_l3_check(int hdrprio)
2678 {
2679 	switch (hdrprio) {
2680 	case IF_HDRPRIO_PACKET:
2681 	case IF_HDRPRIO_PAYLOAD:
2682 	case IF_HDRPRIO_OUTER:
2683 		return (0);
2684 	default:
2685 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2686 			return (0);
2687 		break;
2688 	}
2689 
2690 	return (EINVAL);
2691 }
2692 
2693 /*
2694  * Return interface configuration
2695  * of system.  List may be used
2696  * in later ioctl's (above) to get
2697  * other information.
2698  */
2699 int
ifconf(caddr_t data)2700 ifconf(caddr_t data)
2701 {
2702 	struct ifconf *ifc = (struct ifconf *)data;
2703 	struct ifnet *ifp;
2704 	struct ifaddr *ifa;
2705 	struct ifreq ifr, *ifrp;
2706 	int space = ifc->ifc_len, error = 0;
2707 
2708 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2709 	if (space == 0) {
2710 		TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2711 			struct sockaddr *sa;
2712 
2713 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2714 				space += sizeof (ifr);
2715 			else
2716 				TAILQ_FOREACH(ifa,
2717 				    &ifp->if_addrlist, ifa_list) {
2718 					sa = ifa->ifa_addr;
2719 					if (sa->sa_len > sizeof(*sa))
2720 						space += sa->sa_len -
2721 						    sizeof(*sa);
2722 					space += sizeof(ifr);
2723 				}
2724 		}
2725 		ifc->ifc_len = space;
2726 		return (0);
2727 	}
2728 
2729 	ifrp = ifc->ifc_req;
2730 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2731 		if (space < sizeof(ifr))
2732 			break;
2733 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2734 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2735 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2736 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2737 			    sizeof(ifr));
2738 			if (error)
2739 				break;
2740 			space -= sizeof (ifr), ifrp++;
2741 		} else
2742 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2743 				struct sockaddr *sa = ifa->ifa_addr;
2744 
2745 				if (space < sizeof(ifr))
2746 					break;
2747 				if (sa->sa_len <= sizeof(*sa)) {
2748 					ifr.ifr_addr = *sa;
2749 					error = copyout((caddr_t)&ifr,
2750 					    (caddr_t)ifrp, sizeof (ifr));
2751 					ifrp++;
2752 				} else {
2753 					space -= sa->sa_len - sizeof(*sa);
2754 					if (space < sizeof (ifr))
2755 						break;
2756 					error = copyout((caddr_t)&ifr,
2757 					    (caddr_t)ifrp,
2758 					    sizeof(ifr.ifr_name));
2759 					if (error == 0)
2760 						error = copyout((caddr_t)sa,
2761 						    (caddr_t)&ifrp->ifr_addr,
2762 						    sa->sa_len);
2763 					ifrp = (struct ifreq *)(sa->sa_len +
2764 					    (caddr_t)&ifrp->ifr_addr);
2765 				}
2766 				if (error)
2767 					break;
2768 				space -= sizeof (ifr);
2769 			}
2770 	}
2771 	ifc->ifc_len -= space;
2772 	return (error);
2773 }
2774 
2775 void
if_counters_alloc(struct ifnet * ifp)2776 if_counters_alloc(struct ifnet *ifp)
2777 {
2778 	KASSERT(ifp->if_counters == NULL);
2779 
2780 	ifp->if_counters = counters_alloc(ifc_ncounters);
2781 }
2782 
2783 void
if_counters_free(struct ifnet * ifp)2784 if_counters_free(struct ifnet *ifp)
2785 {
2786 	KASSERT(ifp->if_counters != NULL);
2787 
2788 	counters_free(ifp->if_counters, ifc_ncounters);
2789 	ifp->if_counters = NULL;
2790 }
2791 
2792 void
if_getdata(struct ifnet * ifp,struct if_data * data)2793 if_getdata(struct ifnet *ifp, struct if_data *data)
2794 {
2795 	unsigned int i;
2796 
2797 	*data = ifp->if_data;
2798 
2799 	if (ifp->if_counters != NULL) {
2800 		uint64_t counters[ifc_ncounters];
2801 
2802 		counters_read(ifp->if_counters, counters, nitems(counters),
2803 		    NULL);
2804 
2805 		data->ifi_ipackets += counters[ifc_ipackets];
2806 		data->ifi_ierrors += counters[ifc_ierrors];
2807 		data->ifi_opackets += counters[ifc_opackets];
2808 		data->ifi_oerrors += counters[ifc_oerrors];
2809 		data->ifi_collisions += counters[ifc_collisions];
2810 		data->ifi_ibytes += counters[ifc_ibytes];
2811 		data->ifi_obytes += counters[ifc_obytes];
2812 		data->ifi_imcasts += counters[ifc_imcasts];
2813 		data->ifi_omcasts += counters[ifc_omcasts];
2814 		data->ifi_iqdrops += counters[ifc_iqdrops];
2815 		data->ifi_oqdrops += counters[ifc_oqdrops];
2816 		data->ifi_noproto += counters[ifc_noproto];
2817 	}
2818 
2819 	for (i = 0; i < ifp->if_nifqs; i++) {
2820 		struct ifqueue *ifq = ifp->if_ifqs[i];
2821 
2822 		ifq_add_data(ifq, data);
2823 	}
2824 
2825 	for (i = 0; i < ifp->if_niqs; i++) {
2826 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2827 
2828 		ifiq_add_data(ifiq, data);
2829 	}
2830 }
2831 
2832 /*
2833  * Dummy functions replaced in ifnet during detach (if protocols decide to
2834  * fiddle with the if during detach.
2835  */
2836 void
if_detached_qstart(struct ifqueue * ifq)2837 if_detached_qstart(struct ifqueue *ifq)
2838 {
2839 	ifq_purge(ifq);
2840 }
2841 
2842 int
if_detached_ioctl(struct ifnet * ifp,u_long a,caddr_t b)2843 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2844 {
2845 	return ENODEV;
2846 }
2847 
2848 /*
2849  * Create interface group without members
2850  */
2851 struct ifg_group *
if_creategroup(const char * groupname)2852 if_creategroup(const char *groupname)
2853 {
2854 	struct ifg_group	*ifg;
2855 
2856 	if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL)
2857 		return (NULL);
2858 
2859 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2860 	ifg->ifg_refcnt = 1;
2861 	ifg->ifg_carp_demoted = 0;
2862 	TAILQ_INIT(&ifg->ifg_members);
2863 #if NPF > 0
2864 	pfi_attach_ifgroup(ifg);
2865 #endif
2866 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2867 
2868 	return (ifg);
2869 }
2870 
2871 /*
2872  * Add a group to an interface
2873  */
2874 int
if_addgroup(struct ifnet * ifp,const char * groupname)2875 if_addgroup(struct ifnet *ifp, const char *groupname)
2876 {
2877 	struct ifg_list		*ifgl;
2878 	struct ifg_group	*ifg = NULL;
2879 	struct ifg_member	*ifgm;
2880 	size_t			 namelen;
2881 
2882 	namelen = strlen(groupname);
2883 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2884 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2885 		return (EINVAL);
2886 
2887 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2888 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2889 			return (EEXIST);
2890 
2891 	if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL)
2892 		return (ENOMEM);
2893 
2894 	if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) {
2895 		free(ifgl, M_IFGROUP, sizeof(*ifgl));
2896 		return (ENOMEM);
2897 	}
2898 
2899 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2900 		if (!strcmp(ifg->ifg_group, groupname))
2901 			break;
2902 
2903 	if (ifg == NULL) {
2904 		ifg = if_creategroup(groupname);
2905 		if (ifg == NULL) {
2906 			free(ifgl, M_IFGROUP, sizeof(*ifgl));
2907 			free(ifgm, M_IFGROUP, sizeof(*ifgm));
2908 			return (ENOMEM);
2909 		}
2910 	} else
2911 		ifg->ifg_refcnt++;
2912 	KASSERT(ifg->ifg_refcnt != 0);
2913 
2914 	ifgl->ifgl_group = ifg;
2915 	ifgm->ifgm_ifp = ifp;
2916 
2917 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2918 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2919 
2920 #if NPF > 0
2921 	pfi_group_addmember(groupname);
2922 #endif
2923 
2924 	return (0);
2925 }
2926 
2927 /*
2928  * Remove a group from an interface
2929  */
2930 int
if_delgroup(struct ifnet * ifp,const char * groupname)2931 if_delgroup(struct ifnet *ifp, const char *groupname)
2932 {
2933 	struct ifg_list		*ifgl;
2934 	struct ifg_member	*ifgm;
2935 
2936 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2937 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2938 			break;
2939 	if (ifgl == NULL)
2940 		return (ENOENT);
2941 
2942 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2943 
2944 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2945 		if (ifgm->ifgm_ifp == ifp)
2946 			break;
2947 
2948 	if (ifgm != NULL) {
2949 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2950 		free(ifgm, M_IFGROUP, sizeof(*ifgm));
2951 	}
2952 
2953 #if NPF > 0
2954 	pfi_group_delmember(groupname);
2955 #endif
2956 
2957 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2958 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2959 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2960 #if NPF > 0
2961 		pfi_detach_ifgroup(ifgl->ifgl_group);
2962 #endif
2963 		free(ifgl->ifgl_group, M_IFGROUP, sizeof(*ifgl->ifgl_group));
2964 	}
2965 
2966 	free(ifgl, M_IFGROUP, sizeof(*ifgl));
2967 
2968 	return (0);
2969 }
2970 
2971 /*
2972  * Stores all groups from an interface in memory pointed
2973  * to by data
2974  */
2975 int
if_getgroup(caddr_t data,struct ifnet * ifp)2976 if_getgroup(caddr_t data, struct ifnet *ifp)
2977 {
2978 	int			 len, error;
2979 	struct ifg_list		*ifgl;
2980 	struct ifg_req		 ifgrq, *ifgp;
2981 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2982 
2983 	if (ifgr->ifgr_len == 0) {
2984 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2985 			ifgr->ifgr_len += sizeof(struct ifg_req);
2986 		return (0);
2987 	}
2988 
2989 	len = ifgr->ifgr_len;
2990 	ifgp = ifgr->ifgr_groups;
2991 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2992 		if (len < sizeof(ifgrq))
2993 			return (EINVAL);
2994 		bzero(&ifgrq, sizeof ifgrq);
2995 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2996 		    sizeof(ifgrq.ifgrq_group));
2997 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2998 		    sizeof(struct ifg_req))))
2999 			return (error);
3000 		len -= sizeof(ifgrq);
3001 		ifgp++;
3002 	}
3003 
3004 	return (0);
3005 }
3006 
3007 /*
3008  * Stores all members of a group in memory pointed to by data
3009  */
3010 int
if_getgroupmembers(caddr_t data)3011 if_getgroupmembers(caddr_t data)
3012 {
3013 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3014 	struct ifg_group	*ifg;
3015 	struct ifg_member	*ifgm;
3016 	struct ifg_req		 ifgrq, *ifgp;
3017 	int			 len, error;
3018 
3019 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3020 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3021 			break;
3022 	if (ifg == NULL)
3023 		return (ENOENT);
3024 
3025 	if (ifgr->ifgr_len == 0) {
3026 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3027 			ifgr->ifgr_len += sizeof(ifgrq);
3028 		return (0);
3029 	}
3030 
3031 	len = ifgr->ifgr_len;
3032 	ifgp = ifgr->ifgr_groups;
3033 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
3034 		if (len < sizeof(ifgrq))
3035 			return (EINVAL);
3036 		bzero(&ifgrq, sizeof ifgrq);
3037 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
3038 		    sizeof(ifgrq.ifgrq_member));
3039 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3040 		    sizeof(struct ifg_req))))
3041 			return (error);
3042 		len -= sizeof(ifgrq);
3043 		ifgp++;
3044 	}
3045 
3046 	return (0);
3047 }
3048 
3049 int
if_getgroupattribs(caddr_t data)3050 if_getgroupattribs(caddr_t data)
3051 {
3052 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3053 	struct ifg_group	*ifg;
3054 
3055 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3056 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3057 			break;
3058 	if (ifg == NULL)
3059 		return (ENOENT);
3060 
3061 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
3062 
3063 	return (0);
3064 }
3065 
3066 int
if_setgroupattribs(caddr_t data)3067 if_setgroupattribs(caddr_t data)
3068 {
3069 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3070 	struct ifg_group	*ifg;
3071 	struct ifg_member	*ifgm;
3072 	int			 demote;
3073 
3074 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3075 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3076 			break;
3077 	if (ifg == NULL)
3078 		return (ENOENT);
3079 
3080 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
3081 	if (demote + ifg->ifg_carp_demoted > 0xff ||
3082 	    demote + ifg->ifg_carp_demoted < 0)
3083 		return (EINVAL);
3084 
3085 	ifg->ifg_carp_demoted += demote;
3086 
3087 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3088 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3089 
3090 	return (0);
3091 }
3092 
3093 /*
3094  * Stores all groups in memory pointed to by data
3095  */
3096 int
if_getgrouplist(caddr_t data)3097 if_getgrouplist(caddr_t data)
3098 {
3099 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3100 	struct ifg_group	*ifg;
3101 	struct ifg_req		 ifgrq, *ifgp;
3102 	int			 len, error;
3103 
3104 	if (ifgr->ifgr_len == 0) {
3105 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3106 			ifgr->ifgr_len += sizeof(ifgrq);
3107 		return (0);
3108 	}
3109 
3110 	len = ifgr->ifgr_len;
3111 	ifgp = ifgr->ifgr_groups;
3112 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3113 		if (len < sizeof(ifgrq))
3114 			return (EINVAL);
3115 		bzero(&ifgrq, sizeof ifgrq);
3116 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3117 		    sizeof(ifgrq.ifgrq_group));
3118 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3119 		    sizeof(struct ifg_req))))
3120 			return (error);
3121 		len -= sizeof(ifgrq);
3122 		ifgp++;
3123 	}
3124 
3125 	return (0);
3126 }
3127 
3128 void
if_group_routechange(const struct sockaddr * dst,const struct sockaddr * mask)3129 if_group_routechange(const struct sockaddr *dst, const struct sockaddr *mask)
3130 {
3131 	switch (dst->sa_family) {
3132 	case AF_INET:
3133 		if (satosin_const(dst)->sin_addr.s_addr == INADDR_ANY &&
3134 		    mask && (mask->sa_len == 0 ||
3135 		    satosin_const(mask)->sin_addr.s_addr == INADDR_ANY))
3136 			if_group_egress_build();
3137 		break;
3138 #ifdef INET6
3139 	case AF_INET6:
3140 		if (IN6_ARE_ADDR_EQUAL(&(satosin6_const(dst))->sin6_addr,
3141 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
3142 		    IN6_ARE_ADDR_EQUAL(&(satosin6_const(mask))->sin6_addr,
3143 		    &in6addr_any)))
3144 			if_group_egress_build();
3145 		break;
3146 #endif
3147 	}
3148 }
3149 
3150 int
if_group_egress_build(void)3151 if_group_egress_build(void)
3152 {
3153 	struct ifnet		*ifp;
3154 	struct ifg_group	*ifg;
3155 	struct ifg_member	*ifgm, *next;
3156 	struct sockaddr_in	 sa_in;
3157 #ifdef INET6
3158 	struct sockaddr_in6	 sa_in6;
3159 #endif
3160 	struct rtentry		*rt;
3161 
3162 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3163 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3164 			break;
3165 
3166 	if (ifg != NULL)
3167 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3168 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3169 
3170 	bzero(&sa_in, sizeof(sa_in));
3171 	sa_in.sin_len = sizeof(sa_in);
3172 	sa_in.sin_family = AF_INET;
3173 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3174 	while (rt != NULL) {
3175 		ifp = if_get(rt->rt_ifidx);
3176 		if (ifp != NULL) {
3177 			if_addgroup(ifp, IFG_EGRESS);
3178 			if_put(ifp);
3179 		}
3180 		rt = rtable_iterate(rt);
3181 	}
3182 
3183 #ifdef INET6
3184 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3185 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3186 	    RTP_ANY);
3187 	while (rt != NULL) {
3188 		ifp = if_get(rt->rt_ifidx);
3189 		if (ifp != NULL) {
3190 			if_addgroup(ifp, IFG_EGRESS);
3191 			if_put(ifp);
3192 		}
3193 		rt = rtable_iterate(rt);
3194 	}
3195 #endif /* INET6 */
3196 
3197 	return (0);
3198 }
3199 
3200 /*
3201  * Set/clear promiscuous mode on interface ifp based on the truth value
3202  * of pswitch.  The calls are reference counted so that only the first
3203  * "on" request actually has an effect, as does the final "off" request.
3204  * Results are undefined if the "off" and "on" requests are not matched.
3205  */
3206 int
ifpromisc(struct ifnet * ifp,int pswitch)3207 ifpromisc(struct ifnet *ifp, int pswitch)
3208 {
3209 	struct ifreq ifr;
3210 	unsigned short oif_flags;
3211 	int oif_pcount, error;
3212 
3213 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3214 
3215 	oif_flags = ifp->if_flags;
3216 	oif_pcount = ifp->if_pcount;
3217 	if (pswitch) {
3218 		if (ifp->if_pcount++ != 0)
3219 			return (0);
3220 		ifp->if_flags |= IFF_PROMISC;
3221 	} else {
3222 		if (--ifp->if_pcount > 0)
3223 			return (0);
3224 		ifp->if_flags &= ~IFF_PROMISC;
3225 	}
3226 
3227 	if ((ifp->if_flags & IFF_UP) == 0)
3228 		return (0);
3229 
3230 	memset(&ifr, 0, sizeof(ifr));
3231 	ifr.ifr_flags = ifp->if_flags;
3232 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3233 	if (error) {
3234 		ifp->if_flags = oif_flags;
3235 		ifp->if_pcount = oif_pcount;
3236 	}
3237 
3238 	return (error);
3239 }
3240 
3241 /* Set/clear LRO flag and restart interface if needed. */
3242 int
ifsetlro(struct ifnet * ifp,int on)3243 ifsetlro(struct ifnet *ifp, int on)
3244 {
3245 	struct ifreq ifrq;
3246 	int error = 0;
3247 	int s = splnet();
3248 	struct if_parent parent;
3249 
3250 	memset(&parent, 0, sizeof(parent));
3251 	if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) {
3252 		struct ifnet *ifp0 = if_unit(parent.ifp_parent);
3253 
3254 		if (ifp0 != NULL) {
3255 			ifsetlro(ifp0, on);
3256 			if_put(ifp0);
3257 		}
3258 	}
3259 
3260 	if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3261 		error = ENOTSUP;
3262 		goto out;
3263 	}
3264 
3265 	NET_ASSERT_LOCKED();	/* for ioctl */
3266 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3267 
3268 	if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3269 		if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) {
3270 			error = EBUSY;
3271 			goto out;
3272 		}
3273 		SET(ifp->if_xflags, IFXF_LRO);
3274 	} else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3275 		CLR(ifp->if_xflags, IFXF_LRO);
3276 	else
3277 		goto out;
3278 
3279 	/* restart interface */
3280 	if (ISSET(ifp->if_flags, IFF_UP)) {
3281 		/* go down for a moment... */
3282 		CLR(ifp->if_flags, IFF_UP);
3283 		ifrq.ifr_flags = ifp->if_flags;
3284 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3285 
3286 		/* ... and up again */
3287 		SET(ifp->if_flags, IFF_UP);
3288 		ifrq.ifr_flags = ifp->if_flags;
3289 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3290 	}
3291  out:
3292 	splx(s);
3293 
3294 	return error;
3295 }
3296 
3297 void
ifa_add(struct ifnet * ifp,struct ifaddr * ifa)3298 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3299 {
3300 	NET_ASSERT_LOCKED_EXCLUSIVE();
3301 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3302 }
3303 
3304 void
ifa_del(struct ifnet * ifp,struct ifaddr * ifa)3305 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3306 {
3307 	NET_ASSERT_LOCKED_EXCLUSIVE();
3308 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3309 }
3310 
3311 void
ifa_update_broadaddr(struct ifnet * ifp,struct ifaddr * ifa,struct sockaddr * sa)3312 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3313 {
3314 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3315 		panic("ifa_update_broadaddr does not support dynamic length");
3316 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3317 }
3318 
3319 #ifdef DDB
3320 /* debug function, can be called from ddb> */
3321 void
ifa_print_all(void)3322 ifa_print_all(void)
3323 {
3324 	struct ifnet *ifp;
3325 	struct ifaddr *ifa;
3326 
3327 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3328 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3329 			char addr[INET6_ADDRSTRLEN];
3330 
3331 			switch (ifa->ifa_addr->sa_family) {
3332 			case AF_INET:
3333 				printf("%s", inet_ntop(AF_INET,
3334 				    &satosin(ifa->ifa_addr)->sin_addr,
3335 				    addr, sizeof(addr)));
3336 				break;
3337 #ifdef INET6
3338 			case AF_INET6:
3339 				printf("%s", inet_ntop(AF_INET6,
3340 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3341 				    addr, sizeof(addr)));
3342 				break;
3343 #endif
3344 			}
3345 			printf(" on %s\n", ifp->if_xname);
3346 		}
3347 	}
3348 }
3349 #endif /* DDB */
3350 
3351 void
ifnewlladdr(struct ifnet * ifp)3352 ifnewlladdr(struct ifnet *ifp)
3353 {
3354 #ifdef INET6
3355 	struct ifaddr *ifa;
3356 #endif
3357 	struct ifreq ifrq;
3358 	short up;
3359 
3360 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3361 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3362 
3363 	up = ifp->if_flags & IFF_UP;
3364 
3365 	if (up) {
3366 		/* go down for a moment... */
3367 		ifp->if_flags &= ~IFF_UP;
3368 		ifrq.ifr_flags = ifp->if_flags;
3369 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3370 	}
3371 
3372 	ifp->if_flags |= IFF_UP;
3373 	ifrq.ifr_flags = ifp->if_flags;
3374 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3375 
3376 #ifdef INET6
3377 	/*
3378 	 * Update the link-local address.  Don't do it if we're
3379 	 * a router to avoid confusing hosts on the network.
3380 	 */
3381 	if (!ip6_forwarding) {
3382 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3383 		if (ifa) {
3384 			in6_purgeaddr(ifa);
3385 			if_hooks_run(&ifp->if_addrhooks);
3386 			in6_ifattach(ifp);
3387 		}
3388 	}
3389 #endif
3390 	if (!up) {
3391 		/* go back down */
3392 		ifp->if_flags &= ~IFF_UP;
3393 		ifrq.ifr_flags = ifp->if_flags;
3394 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3395 	}
3396 }
3397 
3398 void
if_addrhook_add(struct ifnet * ifp,struct task * t)3399 if_addrhook_add(struct ifnet *ifp, struct task *t)
3400 {
3401 	mtx_enter(&if_hooks_mtx);
3402 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3403 	mtx_leave(&if_hooks_mtx);
3404 }
3405 
3406 void
if_addrhook_del(struct ifnet * ifp,struct task * t)3407 if_addrhook_del(struct ifnet *ifp, struct task *t)
3408 {
3409 	mtx_enter(&if_hooks_mtx);
3410 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3411 	mtx_leave(&if_hooks_mtx);
3412 }
3413 
3414 void
if_addrhooks_run(struct ifnet * ifp)3415 if_addrhooks_run(struct ifnet *ifp)
3416 {
3417 	if_hooks_run(&ifp->if_addrhooks);
3418 }
3419 
3420 void
if_rxr_init(struct if_rxring * rxr,u_int lwm,u_int hwm)3421 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3422 {
3423 	extern int ticks;
3424 
3425 	memset(rxr, 0, sizeof(*rxr));
3426 
3427 	rxr->rxr_adjusted = ticks;
3428 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3429 	rxr->rxr_hwm = hwm;
3430 }
3431 
3432 static inline void
if_rxr_adjust_cwm(struct if_rxring * rxr)3433 if_rxr_adjust_cwm(struct if_rxring *rxr)
3434 {
3435 	extern int ticks;
3436 
3437 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3438 		return;
3439 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3440 		rxr->rxr_cwm++;
3441 
3442 	rxr->rxr_adjusted = ticks;
3443 }
3444 
3445 void
if_rxr_livelocked(struct if_rxring * rxr)3446 if_rxr_livelocked(struct if_rxring *rxr)
3447 {
3448 	extern int ticks;
3449 
3450 	if (ticks - rxr->rxr_adjusted >= 1) {
3451 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3452 			rxr->rxr_cwm--;
3453 
3454 		rxr->rxr_adjusted = ticks;
3455 	}
3456 }
3457 
3458 u_int
if_rxr_get(struct if_rxring * rxr,u_int max)3459 if_rxr_get(struct if_rxring *rxr, u_int max)
3460 {
3461 	extern int ticks;
3462 	u_int diff;
3463 
3464 	if (ticks - rxr->rxr_adjusted >= 1) {
3465 		/* we're free to try for an adjustment */
3466 		if_rxr_adjust_cwm(rxr);
3467 	}
3468 
3469 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3470 		return (0);
3471 
3472 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3473 	rxr->rxr_alive += diff;
3474 
3475 	return (diff);
3476 }
3477 
3478 int
if_rxr_info_ioctl(struct if_rxrinfo * uifri,u_int t,struct if_rxring_info * e)3479 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3480 {
3481 	struct if_rxrinfo kifri;
3482 	int error;
3483 	u_int n;
3484 
3485 	error = copyin(uifri, &kifri, sizeof(kifri));
3486 	if (error)
3487 		return (error);
3488 
3489 	n = min(t, kifri.ifri_total);
3490 	kifri.ifri_total = t;
3491 
3492 	if (n > 0) {
3493 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3494 		if (error)
3495 			return (error);
3496 	}
3497 
3498 	return (copyout(&kifri, uifri, sizeof(kifri)));
3499 }
3500 
3501 int
if_rxr_ioctl(struct if_rxrinfo * ifri,const char * name,u_int size,struct if_rxring * rxr)3502 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3503     struct if_rxring *rxr)
3504 {
3505 	struct if_rxring_info ifr;
3506 
3507 	memset(&ifr, 0, sizeof(ifr));
3508 
3509 	if (name != NULL)
3510 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3511 
3512 	ifr.ifr_size = size;
3513 	ifr.ifr_info = *rxr;
3514 
3515 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3516 }
3517 
3518 /*
3519  * Network stack input queues.
3520  */
3521 
3522 void
niq_init(struct niqueue * niq,u_int maxlen,u_int isr)3523 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3524 {
3525 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3526 	niq->ni_isr = isr;
3527 }
3528 
3529 int
niq_enqueue(struct niqueue * niq,struct mbuf * m)3530 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3531 {
3532 	int rv;
3533 
3534 	rv = mq_enqueue(&niq->ni_q, m);
3535 	if (rv == 0)
3536 		schednetisr(niq->ni_isr);
3537 	else
3538 		if_congestion();
3539 
3540 	return (rv);
3541 }
3542 
3543 int
niq_enlist(struct niqueue * niq,struct mbuf_list * ml)3544 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3545 {
3546 	int rv;
3547 
3548 	rv = mq_enlist(&niq->ni_q, ml);
3549 	if (rv == 0)
3550 		schednetisr(niq->ni_isr);
3551 	else
3552 		if_congestion();
3553 
3554 	return (rv);
3555 }
3556 
3557 __dead void
unhandled_af(int af)3558 unhandled_af(int af)
3559 {
3560 	panic("unhandled af %d", af);
3561 }
3562 
3563 struct taskq *
net_tq(unsigned int ifindex)3564 net_tq(unsigned int ifindex)
3565 {
3566 	struct softnet *sn;
3567 	static int nettaskqs;
3568 
3569 	if (nettaskqs == 0)
3570 		nettaskqs = min(NET_TASKQ, ncpus);
3571 
3572 	sn = &softnets[ifindex % nettaskqs];
3573 
3574 	return (sn->sn_taskq);
3575 }
3576 
3577 void
net_tq_barriers(const char * wmesg)3578 net_tq_barriers(const char *wmesg)
3579 {
3580 	struct task barriers[NET_TASKQ];
3581 	struct refcnt r = REFCNT_INITIALIZER();
3582 	int i;
3583 
3584 	for (i = 0; i < nitems(barriers); i++) {
3585 		task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
3586 		refcnt_take(&r);
3587 		task_add(softnets[i].sn_taskq, &barriers[i]);
3588 	}
3589 
3590 	refcnt_finalize(&r, wmesg);
3591 }
3592