xref: /openbsd/sys/net/if.c (revision d1df5f10)
1 /*	$OpenBSD: if.c,v 1.725 2025/01/25 10:53:36 mvs Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "ppp.h"
70 #include "pppoe.h"
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/timeout.h>
78 #include <sys/protosw.h>
79 #include <sys/kernel.h>
80 #include <sys/ioctl.h>
81 #include <sys/domain.h>
82 #include <sys/task.h>
83 #include <sys/atomic.h>
84 #include <sys/percpu.h>
85 #include <sys/proc.h>
86 #include <sys/stdint.h>	/* uintptr_t */
87 #include <sys/rwlock.h>
88 #include <sys/smr.h>
89 
90 #include <net/if.h>
91 #include <net/if_dl.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/netisr.h>
95 
96 #include "vlan.h"
97 #if NVLAN > 0
98 #include <net/if_vlan_var.h>
99 #endif
100 
101 #include <netinet/in.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/igmp.h>
104 #ifdef MROUTING
105 #include <netinet/ip_mroute.h>
106 #endif
107 #include <netinet/tcp.h>
108 #include <netinet/tcp_timer.h>
109 #include <netinet/tcp_var.h>
110 
111 #ifdef INET6
112 #include <netinet6/in6_var.h>
113 #include <netinet6/in6_ifattach.h>
114 #include <netinet6/nd6.h>
115 #include <netinet/ip6.h>
116 #include <netinet6/ip6_var.h>
117 #endif
118 
119 #ifdef MPLS
120 #include <netmpls/mpls.h>
121 #endif
122 
123 #if NBPFILTER > 0
124 #include <net/bpf.h>
125 #endif
126 
127 #if NBRIDGE > 0
128 #include <net/if_bridge.h>
129 #endif
130 
131 #if NCARP > 0
132 #include <netinet/ip_carp.h>
133 #endif
134 
135 #if NPF > 0
136 #include <net/pfvar.h>
137 #endif
138 
139 #include <sys/device.h>
140 
141 void	if_attachsetup(struct ifnet *);
142 void	if_attach_common(struct ifnet *);
143 void	if_remove(struct ifnet *);
144 int	if_createrdomain(int, struct ifnet *);
145 int	if_setrdomain(struct ifnet *, int);
146 void	if_slowtimo(void *);
147 
148 void	if_detached_qstart(struct ifqueue *);
149 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
150 
151 int	ifioctl_get(u_long, caddr_t);
152 int	ifconf(caddr_t);
153 static int
154 	if_sffpage_check(const caddr_t);
155 
156 int	if_getgroup(caddr_t, struct ifnet *);
157 int	if_getgroupmembers(caddr_t);
158 int	if_getgroupattribs(caddr_t);
159 int	if_setgroupattribs(caddr_t);
160 int	if_getgrouplist(caddr_t);
161 
162 void	if_linkstate(struct ifnet *);
163 void	if_linkstate_task(void *);
164 
165 int	if_clone_list(struct if_clonereq *);
166 struct if_clone	*if_clone_lookup(const char *, int *);
167 
168 int	if_group_egress_build(void);
169 
170 void	if_watchdog_task(void *);
171 
172 void	if_netisr(void *);
173 
174 #ifdef DDB
175 void	ifa_print_all(void);
176 #endif
177 
178 void	if_qstart_compat(struct ifqueue *);
179 
180 /*
181  * interface index map
182  *
183  * the kernel maintains a mapping of interface indexes to struct ifnet
184  * pointers.
185  *
186  * the map is an array of struct ifnet pointers prefixed by an if_map
187  * structure. the if_map structure stores the length of its array.
188  *
189  * as interfaces are attached to the system, the map is grown on demand
190  * up to USHRT_MAX entries.
191  *
192  * interface index 0 is reserved and represents no interface. this
193  * supports the use of the interface index as the scope for IPv6 link
194  * local addresses, where scope 0 means no scope has been specified.
195  * it also supports the use of interface index as the unique identifier
196  * for network interfaces in SNMP applications as per RFC2863. therefore
197  * if_get(0) returns NULL.
198  */
199 
200 struct ifnet *if_ref(struct ifnet *);
201 
202 /*
203  * struct if_idxmap
204  *
205  * infrastructure to manage updates and accesses to the current if_map.
206  *
207  * interface index 0 is special and represents "no interface", so we
208  * use the 0th slot in map to store the length of the array.
209  */
210 
211 struct if_idxmap {
212 	unsigned int		  serial;
213 	unsigned int		  count;
214 	struct ifnet		**map;		/* SMR protected */
215 	struct rwlock		  lock;
216 	unsigned char		 *usedidx;	/* bitmap of indices in use */
217 };
218 
219 struct if_idxmap_dtor {
220 	struct smr_entry	  smr;
221 	struct ifnet		**map;
222 };
223 
224 void	if_idxmap_init(unsigned int);
225 void	if_idxmap_free(void *);
226 void	if_idxmap_alloc(struct ifnet *);
227 void	if_idxmap_insert(struct ifnet *);
228 void	if_idxmap_remove(struct ifnet *);
229 
230 TAILQ_HEAD(, ifg_group) ifg_head =
231     TAILQ_HEAD_INITIALIZER(ifg_head);	/* [N] list of interface groups */
232 
233 LIST_HEAD(, if_clone) if_cloners =
234     LIST_HEAD_INITIALIZER(if_cloners);	/* [I] list of clonable interfaces */
235 int if_cloners_count;	/* [I] number of clonable interfaces */
236 
237 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
238 struct rwlock if_tmplist_lock = RWLOCK_INITIALIZER("iftmplk");
239 
240 /* hooks should only be added, deleted, and run from a process context */
241 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
242 void	if_hooks_run(struct task_list *);
243 
244 int	ifq_congestion;
245 
246 int		 netisr;
247 
248 struct softnet {
249 	char		 sn_name[16];
250 	struct taskq	*sn_taskq;
251 };
252 
253 #define	NET_TASKQ	4
254 struct softnet	softnets[NET_TASKQ];
255 
256 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
257 
258 /*
259  * Serialize socket operations to ensure no new sleeping points
260  * are introduced in IP output paths.
261  */
262 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
263 
264 /*
265  * Network interface utility routines.
266  */
267 void
ifinit(void)268 ifinit(void)
269 {
270 	unsigned int	i;
271 
272 	/*
273 	 * most machines boot with 4 or 5 interfaces, so size the initial map
274 	 * to accommodate this
275 	 */
276 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
277 
278 	for (i = 0; i < NET_TASKQ; i++) {
279 		struct softnet *sn = &softnets[i];
280 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
281 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
282 		    TASKQ_MPSAFE);
283 		if (sn->sn_taskq == NULL)
284 			panic("unable to create network taskq %d", i);
285 	}
286 }
287 
288 static struct if_idxmap if_idxmap;
289 
290 /*
291  * XXXSMP: For `ifnetlist' modification both kernel and net locks
292  * should be taken. For read-only access only one lock of them required.
293  */
294 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
295 
296 static inline unsigned int
if_idxmap_limit(struct ifnet ** if_map)297 if_idxmap_limit(struct ifnet **if_map)
298 {
299 	return ((uintptr_t)if_map[0]);
300 }
301 
302 static inline size_t
if_idxmap_usedidx_size(unsigned int limit)303 if_idxmap_usedidx_size(unsigned int limit)
304 {
305 	return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
306 }
307 
308 void
if_idxmap_init(unsigned int limit)309 if_idxmap_init(unsigned int limit)
310 {
311 	struct ifnet **if_map;
312 
313 	rw_init(&if_idxmap.lock, "idxmaplk");
314 	if_idxmap.serial = 1; /* skip ifidx 0 */
315 
316 	if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
317 	    M_WAITOK | M_ZERO);
318 
319 	if_map[0] = (struct ifnet *)(uintptr_t)limit;
320 
321 	if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
322 	    M_IFADDR, M_WAITOK | M_ZERO);
323 	setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
324 
325 	/* this is called early so there's nothing to race with */
326 	SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
327 }
328 
329 void
if_idxmap_alloc(struct ifnet * ifp)330 if_idxmap_alloc(struct ifnet *ifp)
331 {
332 	struct ifnet **if_map;
333 	unsigned int limit;
334 	unsigned int index, i;
335 
336 	refcnt_init(&ifp->if_refcnt);
337 
338 	rw_enter_write(&if_idxmap.lock);
339 
340 	if (++if_idxmap.count >= USHRT_MAX)
341 		panic("too many interfaces");
342 
343 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
344 	limit = if_idxmap_limit(if_map);
345 
346 	index = if_idxmap.serial++ & USHRT_MAX;
347 
348 	if (index >= limit) {
349 		struct if_idxmap_dtor *dtor;
350 		struct ifnet **oif_map;
351 		unsigned int olimit;
352 		unsigned char *nusedidx;
353 
354 		oif_map = if_map;
355 		olimit = limit;
356 
357 		limit = olimit * 2;
358 		if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
359 		    M_WAITOK | M_ZERO);
360 		if_map[0] = (struct ifnet *)(uintptr_t)limit;
361 
362 		for (i = 1; i < olimit; i++) {
363 			struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
364 			if (oifp == NULL)
365 				continue;
366 
367 			/*
368 			 * nif_map isn't visible yet, so don't need
369 			 * SMR_PTR_SET_LOCKED and its membar.
370 			 */
371 			if_map[i] = if_ref(oifp);
372 		}
373 
374 		nusedidx = malloc(if_idxmap_usedidx_size(limit),
375 		    M_IFADDR, M_WAITOK | M_ZERO);
376 		memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
377 
378 		/* use the old usedidx bitmap as an smr_entry for the if_map */
379 		dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
380 		if_idxmap.usedidx = nusedidx;
381 
382 		SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
383 
384 		dtor->map = oif_map;
385 		smr_init(&dtor->smr);
386 		smr_call(&dtor->smr, if_idxmap_free, dtor);
387 	}
388 
389 	/* pick the next free index */
390 	for (i = 0; i < USHRT_MAX; i++) {
391 		if (index != 0 && isclr(if_idxmap.usedidx, index))
392 			break;
393 
394 		index = if_idxmap.serial++ & USHRT_MAX;
395 	}
396 	KASSERT(index != 0 && index < limit);
397 	KASSERT(isclr(if_idxmap.usedidx, index));
398 
399 	setbit(if_idxmap.usedidx, index);
400 	ifp->if_index = index;
401 
402 	rw_exit_write(&if_idxmap.lock);
403 }
404 
405 void
if_idxmap_free(void * arg)406 if_idxmap_free(void *arg)
407 {
408 	struct if_idxmap_dtor *dtor = arg;
409 	struct ifnet **oif_map = dtor->map;
410 	unsigned int olimit = if_idxmap_limit(oif_map);
411 	unsigned int i;
412 
413 	for (i = 1; i < olimit; i++)
414 		if_put(oif_map[i]);
415 
416 	free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
417 	free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
418 }
419 
420 void
if_idxmap_insert(struct ifnet * ifp)421 if_idxmap_insert(struct ifnet *ifp)
422 {
423 	struct ifnet **if_map;
424 	unsigned int index = ifp->if_index;
425 
426 	rw_enter_write(&if_idxmap.lock);
427 
428 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
429 
430 	KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
431 	    "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
432 	    if_idxmap_limit(if_map));
433 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
434 	KASSERT(isset(if_idxmap.usedidx, index));
435 
436 	/* commit */
437 	SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
438 
439 	rw_exit_write(&if_idxmap.lock);
440 }
441 
442 void
if_idxmap_remove(struct ifnet * ifp)443 if_idxmap_remove(struct ifnet *ifp)
444 {
445 	struct ifnet **if_map;
446 	unsigned int index = ifp->if_index;
447 
448 	rw_enter_write(&if_idxmap.lock);
449 
450 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
451 
452 	KASSERT(index != 0 && index < if_idxmap_limit(if_map));
453 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
454 	KASSERT(isset(if_idxmap.usedidx, index));
455 
456 	SMR_PTR_SET_LOCKED(&if_map[index], NULL);
457 
458 	if_idxmap.count--;
459 	clrbit(if_idxmap.usedidx, index);
460 	/* end of if_idxmap modifications */
461 
462 	rw_exit_write(&if_idxmap.lock);
463 
464 	smr_barrier();
465 	if_put(ifp);
466 }
467 
468 /*
469  * Attach an interface to the
470  * list of "active" interfaces.
471  */
472 void
if_attachsetup(struct ifnet * ifp)473 if_attachsetup(struct ifnet *ifp)
474 {
475 	unsigned long ifidx;
476 
477 	NET_ASSERT_LOCKED();
478 
479 	if_addgroup(ifp, IFG_ALL);
480 
481 #ifdef INET6
482 	nd6_ifattach(ifp);
483 #endif
484 
485 #if NPF > 0
486 	pfi_attach_ifnet(ifp);
487 #endif
488 
489 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
490 	if_slowtimo(ifp);
491 
492 	if_idxmap_insert(ifp);
493 	KASSERT(if_get(0) == NULL);
494 
495 	ifidx = ifp->if_index;
496 
497 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
498 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
499 
500 	/* Announce the interface. */
501 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
502 }
503 
504 /*
505  * Allocate the link level name for the specified interface.  This
506  * is an attachment helper.  It must be called after ifp->if_addrlen
507  * is initialized, which may not be the case when if_attach() is
508  * called.
509  */
510 void
if_alloc_sadl(struct ifnet * ifp)511 if_alloc_sadl(struct ifnet *ifp)
512 {
513 	unsigned int socksize;
514 	int namelen, masklen;
515 	struct sockaddr_dl *sdl;
516 
517 	/*
518 	 * If the interface already has a link name, release it
519 	 * now.  This is useful for interfaces that can change
520 	 * link types, and thus switch link names often.
521 	 */
522 	if_free_sadl(ifp);
523 
524 	namelen = strlen(ifp->if_xname);
525 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
526 	socksize = masklen + ifp->if_addrlen;
527 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
528 	if (socksize < sizeof(*sdl))
529 		socksize = sizeof(*sdl);
530 	socksize = ROUNDUP(socksize);
531 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
532 	sdl->sdl_len = socksize;
533 	sdl->sdl_family = AF_LINK;
534 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
535 	sdl->sdl_nlen = namelen;
536 	sdl->sdl_alen = ifp->if_addrlen;
537 	sdl->sdl_index = ifp->if_index;
538 	sdl->sdl_type = ifp->if_type;
539 	ifp->if_sadl = sdl;
540 }
541 
542 /*
543  * Free the link level name for the specified interface.  This is
544  * a detach helper.  This is called from if_detach() or from
545  * link layer type specific detach functions.
546  */
547 void
if_free_sadl(struct ifnet * ifp)548 if_free_sadl(struct ifnet *ifp)
549 {
550 	if (ifp->if_sadl == NULL)
551 		return;
552 
553 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
554 	ifp->if_sadl = NULL;
555 }
556 
557 void
if_attachhead(struct ifnet * ifp)558 if_attachhead(struct ifnet *ifp)
559 {
560 	if_attach_common(ifp);
561 	NET_LOCK();
562 	TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
563 	if_attachsetup(ifp);
564 	NET_UNLOCK();
565 }
566 
567 void
if_attach(struct ifnet * ifp)568 if_attach(struct ifnet *ifp)
569 {
570 	if_attach_common(ifp);
571 	NET_LOCK();
572 	TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
573 	if_attachsetup(ifp);
574 	NET_UNLOCK();
575 }
576 
577 void
if_attach_queues(struct ifnet * ifp,unsigned int nqs)578 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
579 {
580 	struct ifqueue **map;
581 	struct ifqueue *ifq;
582 	int i;
583 
584 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
585 	KASSERT(nqs != 0);
586 
587 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
588 
589 	ifp->if_snd.ifq_softc = NULL;
590 	map[0] = &ifp->if_snd;
591 
592 	for (i = 1; i < nqs; i++) {
593 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
594 		ifq_init_maxlen(ifq, ifp->if_snd.ifq_maxlen);
595 		ifq_init(ifq, ifp, i);
596 		map[i] = ifq;
597 	}
598 
599 	ifp->if_ifqs = map;
600 	ifp->if_nifqs = nqs;
601 }
602 
603 void
if_attach_iqueues(struct ifnet * ifp,unsigned int niqs)604 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
605 {
606 	struct ifiqueue **map;
607 	struct ifiqueue *ifiq;
608 	unsigned int i;
609 
610 	KASSERT(niqs != 0);
611 
612 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
613 
614 	ifp->if_rcv.ifiq_softc = NULL;
615 	map[0] = &ifp->if_rcv;
616 
617 	for (i = 1; i < niqs; i++) {
618 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
619 		ifiq_init(ifiq, ifp, i);
620 		map[i] = ifiq;
621 	}
622 
623 	ifp->if_iqs = map;
624 	ifp->if_niqs = niqs;
625 }
626 
627 void
if_attach_common(struct ifnet * ifp)628 if_attach_common(struct ifnet *ifp)
629 {
630 	KASSERT(ifp->if_ioctl != NULL);
631 
632 	TAILQ_INIT(&ifp->if_addrlist);
633 	TAILQ_INIT(&ifp->if_maddrlist);
634 	TAILQ_INIT(&ifp->if_groups);
635 
636 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
637 		KASSERTMSG(ifp->if_qstart == NULL,
638 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
639 		ifp->if_qstart = if_qstart_compat;
640 	} else {
641 		KASSERTMSG(ifp->if_start == NULL,
642 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
643 		KASSERTMSG(ifp->if_qstart != NULL,
644 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
645 	}
646 
647 	if_idxmap_alloc(ifp);
648 
649 	ifq_init(&ifp->if_snd, ifp, 0);
650 
651 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
652 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
653 	ifp->if_nifqs = 1;
654 	if (ifp->if_txmit == 0)
655 		ifp->if_txmit = IF_TXMIT_DEFAULT;
656 
657 	ifiq_init(&ifp->if_rcv, ifp, 0);
658 
659 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
660 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
661 	ifp->if_niqs = 1;
662 
663 	TAILQ_INIT(&ifp->if_addrhooks);
664 	TAILQ_INIT(&ifp->if_linkstatehooks);
665 	TAILQ_INIT(&ifp->if_detachhooks);
666 
667 	if (ifp->if_rtrequest == NULL)
668 		ifp->if_rtrequest = if_rtrequest_dummy;
669 	if (ifp->if_enqueue == NULL)
670 		ifp->if_enqueue = if_enqueue_ifq;
671 #if NBPFILTER > 0
672 	if (ifp->if_bpf_mtap == NULL)
673 		ifp->if_bpf_mtap = bpf_mtap_ether;
674 #endif
675 	ifp->if_llprio = IFQ_DEFPRIO;
676 }
677 
678 void
if_attach_ifq(struct ifnet * ifp,const struct ifq_ops * newops,void * args)679 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
680 {
681 	/*
682 	 * only switch the ifq_ops on the first ifq on an interface.
683 	 *
684 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
685 	 * works on a single ifq. because the code uses the ifq_ops
686 	 * on the first ifq (if_snd) to select a queue for an mbuf,
687 	 * by switching only the first one we change both the algorithm
688 	 * and force the routing of all new packets to it.
689 	 */
690 	ifq_attach(&ifp->if_snd, newops, args);
691 }
692 
693 void
if_start(struct ifnet * ifp)694 if_start(struct ifnet *ifp)
695 {
696 	KASSERT(ifp->if_qstart == if_qstart_compat);
697 	if_qstart_compat(&ifp->if_snd);
698 }
699 void
if_qstart_compat(struct ifqueue * ifq)700 if_qstart_compat(struct ifqueue *ifq)
701 {
702 	struct ifnet *ifp = ifq->ifq_if;
703 	int s;
704 
705 	/*
706 	 * the stack assumes that an interface can have multiple
707 	 * transmit rings, but a lot of drivers are still written
708 	 * so that interfaces and send rings have a 1:1 mapping.
709 	 * this provides compatibility between the stack and the older
710 	 * drivers by translating from the only queue they have
711 	 * (ifp->if_snd) back to the interface and calling if_start.
712 	 */
713 
714 	KERNEL_LOCK();
715 	s = splnet();
716 	(*ifp->if_start)(ifp);
717 	splx(s);
718 	KERNEL_UNLOCK();
719 }
720 
721 int
if_enqueue(struct ifnet * ifp,struct mbuf * m)722 if_enqueue(struct ifnet *ifp, struct mbuf *m)
723 {
724 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
725 
726 #if NPF > 0
727 	if (m->m_pkthdr.pf.delay > 0)
728 		return (pf_delay_pkt(m, ifp->if_index));
729 #endif
730 
731 #if NBRIDGE > 0
732 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
733 		int error;
734 
735 		error = bridge_enqueue(ifp, m);
736 		return (error);
737 	}
738 #endif
739 
740 #if NPF > 0
741 	pf_pkt_addr_changed(m);
742 #endif	/* NPF > 0 */
743 
744 	return ((*ifp->if_enqueue)(ifp, m));
745 }
746 
747 int
if_enqueue_ifq(struct ifnet * ifp,struct mbuf * m)748 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
749 {
750 	struct ifqueue *ifq = &ifp->if_snd;
751 	int error;
752 
753 	if (ifp->if_nifqs > 1) {
754 		unsigned int idx;
755 
756 		/*
757 		 * use the operations on the first ifq to pick which of
758 		 * the array gets this mbuf.
759 		 */
760 
761 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
762 		ifq = ifp->if_ifqs[idx];
763 	}
764 
765 	error = ifq_enqueue(ifq, m);
766 	if (error)
767 		return (error);
768 
769 	ifq_start(ifq);
770 
771 	return (0);
772 }
773 
774 void
if_input(struct ifnet * ifp,struct mbuf_list * ml)775 if_input(struct ifnet *ifp, struct mbuf_list *ml)
776 {
777 	ifiq_input(&ifp->if_rcv, ml);
778 }
779 
780 int
if_input_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)781 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
782 {
783 	int keepflags, keepcksum;
784 	uint16_t keepmss;
785 
786 #if NBPFILTER > 0
787 	/*
788 	 * Only send packets to bpf if they are destined to local
789 	 * addresses.
790 	 *
791 	 * if_input_local() is also called for SIMPLEX interfaces to
792 	 * duplicate packets for local use.  But don't dup them to bpf.
793 	 */
794 	if (ifp->if_flags & IFF_LOOPBACK) {
795 		caddr_t if_bpf = ifp->if_bpf;
796 
797 		if (if_bpf)
798 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
799 	}
800 #endif
801 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
802 	/*
803 	 * Preserve outgoing checksum flags, in case the packet is
804 	 * forwarded to another interface.  Then the checksum, which
805 	 * is now incorrect, will be calculated before sending.
806 	 */
807 	keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
808 	    M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
809 	    M_TCP_TSO);
810 	keepmss = m->m_pkthdr.ph_mss;
811 	m_resethdr(m);
812 	m->m_flags |= M_LOOP | keepflags;
813 	m->m_pkthdr.csum_flags = keepcksum;
814 	m->m_pkthdr.ph_mss = keepmss;
815 	m->m_pkthdr.ph_ifidx = ifp->if_index;
816 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
817 
818 	if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
819 		if (ifp->if_mtu > 0 &&
820 		    ((af == AF_INET &&
821 		    ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
822 		    (af == AF_INET6 &&
823 		    ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) {
824 			tcpstat_inc(tcps_inswlro);
825 			tcpstat_add(tcps_inpktlro,
826 			    (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
827 		} else {
828 			tcpstat_inc(tcps_inbadlro);
829 			m_freem(m);
830 			return (EPROTONOSUPPORT);
831 		}
832 	}
833 
834 	if (ISSET(keepcksum, M_TCP_CSUM_OUT))
835 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
836 	if (ISSET(keepcksum, M_UDP_CSUM_OUT))
837 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
838 	if (ISSET(keepcksum, M_ICMP_CSUM_OUT))
839 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK;
840 
841 	/* do not count multicast loopback and simplex interfaces */
842 	if (ISSET(ifp->if_flags, IFF_LOOPBACK)) {
843 		counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
844 		    m->m_pkthdr.len);
845 	}
846 
847 	switch (af) {
848 	case AF_INET:
849 		if (ISSET(keepcksum, M_IPV4_CSUM_OUT))
850 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
851 		ipv4_input(ifp, m);
852 		break;
853 #ifdef INET6
854 	case AF_INET6:
855 		ipv6_input(ifp, m);
856 		break;
857 #endif /* INET6 */
858 #ifdef MPLS
859 	case AF_MPLS:
860 		mpls_input(ifp, m);
861 		break;
862 #endif /* MPLS */
863 	default:
864 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
865 		m_freem(m);
866 		return (EAFNOSUPPORT);
867 	}
868 
869 	return (0);
870 }
871 
872 int
if_output_ml(struct ifnet * ifp,struct mbuf_list * ml,struct sockaddr * dst,struct rtentry * rt)873 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
874     struct sockaddr *dst, struct rtentry *rt)
875 {
876 	struct mbuf *m;
877 	int error = 0;
878 
879 	while ((m = ml_dequeue(ml)) != NULL) {
880 		error = ifp->if_output(ifp, m, dst, rt);
881 		if (error)
882 			break;
883 	}
884 	if (error)
885 		ml_purge(ml);
886 
887 	return error;
888 }
889 
890 int
if_output_tso(struct ifnet * ifp,struct mbuf ** mp,struct sockaddr * dst,struct rtentry * rt,u_int mtu)891 if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
892     struct rtentry *rt, u_int mtu)
893 {
894 	uint32_t ifcap;
895 	int error;
896 
897 	switch (dst->sa_family) {
898 	case AF_INET:
899 		ifcap = IFCAP_TSOv4;
900 		break;
901 #ifdef INET6
902 	case AF_INET6:
903 		ifcap = IFCAP_TSOv6;
904 		break;
905 #endif
906 	default:
907 		unhandled_af(dst->sa_family);
908 	}
909 
910 	/*
911 	 * Try to send with TSO first.  When forwarding LRO may set
912 	 * maximum segment size in mbuf header.  Chop TCP segment
913 	 * even if it would fit interface MTU to preserve maximum
914 	 * path MTU.
915 	 */
916 	error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
917 	if (error || *mp == NULL)
918 		return error;
919 
920 	if ((*mp)->m_pkthdr.len <= mtu) {
921 		switch (dst->sa_family) {
922 		case AF_INET:
923 			in_hdr_cksum_out(*mp, ifp);
924 			in_proto_cksum_out(*mp, ifp);
925 			break;
926 #ifdef INET6
927 		case AF_INET6:
928 			in6_proto_cksum_out(*mp, ifp);
929 			break;
930 #endif
931 		}
932 		error = ifp->if_output(ifp, *mp, dst, rt);
933 		*mp = NULL;
934 		return error;
935 	}
936 
937 	/* mp still contains mbuf that has to be fragmented or dropped. */
938 	return 0;
939 }
940 
941 int
if_output_mq(struct ifnet * ifp,struct mbuf_queue * mq,unsigned int * total,struct sockaddr * dst,struct rtentry * rt)942 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
943     struct sockaddr *dst, struct rtentry *rt)
944 {
945 	struct mbuf_list ml;
946 	unsigned int len;
947 	int error;
948 
949 	mq_delist(mq, &ml);
950 	len = ml_len(&ml);
951 	error = if_output_ml(ifp, &ml, dst, rt);
952 
953 	/* XXXSMP we also discard if other CPU enqueues */
954 	if (mq_len(mq) > 0) {
955 		/* mbuf is back in queue. Discard. */
956 		atomic_sub_int(total, len + mq_purge(mq));
957 	} else
958 		atomic_sub_int(total, len);
959 
960 	return error;
961 }
962 
963 int
if_output_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)964 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
965 {
966 	struct ifiqueue *ifiq;
967 	unsigned int flow = 0;
968 
969 	m->m_pkthdr.ph_family = af;
970 	m->m_pkthdr.ph_ifidx = ifp->if_index;
971 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
972 
973 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
974 		flow = m->m_pkthdr.ph_flowid;
975 
976 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
977 
978 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
979 }
980 
981 void
if_input_process(struct ifnet * ifp,struct mbuf_list * ml)982 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
983 {
984 	struct mbuf *m;
985 
986 	if (ml_empty(ml))
987 		return;
988 
989 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
990 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
991 
992 	/*
993 	 * We grab the shared netlock for packet processing in the softnet
994 	 * threads.  Packets can regrab the exclusive lock via queues.
995 	 * ioctl, sysctl, and socket syscall may use shared lock if access is
996 	 * read only or MP safe.  Usually they hold the exclusive net lock.
997 	 */
998 
999 	NET_LOCK_SHARED();
1000 	while ((m = ml_dequeue(ml)) != NULL)
1001 		(*ifp->if_input)(ifp, m);
1002 	NET_UNLOCK_SHARED();
1003 }
1004 
1005 void
if_vinput(struct ifnet * ifp,struct mbuf * m)1006 if_vinput(struct ifnet *ifp, struct mbuf *m)
1007 {
1008 #if NBPFILTER > 0
1009 	caddr_t if_bpf;
1010 #endif
1011 
1012 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1013 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1014 
1015 	counters_pkt(ifp->if_counters,
1016 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1017 
1018 #if NPF > 0
1019 	pf_pkt_addr_changed(m);
1020 #endif
1021 
1022 #if NBPFILTER > 0
1023 	if_bpf = ifp->if_bpf;
1024 	if (if_bpf) {
1025 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
1026 			m_freem(m);
1027 			return;
1028 		}
1029 	}
1030 #endif
1031 
1032 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
1033 		(*ifp->if_input)(ifp, m);
1034 	else
1035 		m_freem(m);
1036 }
1037 
1038 void
if_netisr(void * unused)1039 if_netisr(void *unused)
1040 {
1041 	int n, t = 0;
1042 
1043 	NET_LOCK();
1044 
1045 	while ((n = netisr) != 0) {
1046 		/* Like sched_pause() but with a rwlock dance. */
1047 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
1048 			NET_UNLOCK();
1049 			yield();
1050 			NET_LOCK();
1051 		}
1052 
1053 		atomic_clearbits_int(&netisr, n);
1054 
1055 #if NETHER > 0
1056 		if (n & (1 << NETISR_ARP))
1057 			arpintr();
1058 #endif
1059 		if (n & (1 << NETISR_IP))
1060 			ipintr();
1061 #ifdef INET6
1062 		if (n & (1 << NETISR_IPV6))
1063 			ip6intr();
1064 #endif
1065 #if NPPP > 0
1066 		if (n & (1 << NETISR_PPP)) {
1067 			KERNEL_LOCK();
1068 			pppintr();
1069 			KERNEL_UNLOCK();
1070 		}
1071 #endif
1072 #if NBRIDGE > 0
1073 		if (n & (1 << NETISR_BRIDGE))
1074 			bridgeintr();
1075 #endif
1076 #ifdef PIPEX
1077 		if (n & (1 << NETISR_PIPEX))
1078 			pipexintr();
1079 #endif
1080 #if NPPPOE > 0
1081 		if (n & (1 << NETISR_PPPOE)) {
1082 			KERNEL_LOCK();
1083 			pppoeintr();
1084 			KERNEL_UNLOCK();
1085 		}
1086 #endif
1087 		t |= n;
1088 	}
1089 
1090 	NET_UNLOCK();
1091 }
1092 
1093 void
if_hooks_run(struct task_list * hooks)1094 if_hooks_run(struct task_list *hooks)
1095 {
1096 	struct task *t, *nt;
1097 	struct task cursor = { .t_func = NULL };
1098 	void (*func)(void *);
1099 	void *arg;
1100 
1101 	mtx_enter(&if_hooks_mtx);
1102 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1103 		if (t->t_func == NULL) { /* skip cursors */
1104 			nt = TAILQ_NEXT(t, t_entry);
1105 			continue;
1106 		}
1107 		func = t->t_func;
1108 		arg = t->t_arg;
1109 
1110 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1111 		mtx_leave(&if_hooks_mtx);
1112 
1113 		(*func)(arg);
1114 
1115 		mtx_enter(&if_hooks_mtx);
1116 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1117 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1118 	}
1119 	mtx_leave(&if_hooks_mtx);
1120 }
1121 
1122 void
if_remove(struct ifnet * ifp)1123 if_remove(struct ifnet *ifp)
1124 {
1125 	/* Remove the interface from the list of all interfaces. */
1126 	NET_LOCK();
1127 	TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1128 	NET_UNLOCK();
1129 
1130 	/* Remove the interface from the interface index map. */
1131 	if_idxmap_remove(ifp);
1132 
1133 	/* Sleep until the last reference is released. */
1134 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
1135 }
1136 
1137 void
if_deactivate(struct ifnet * ifp)1138 if_deactivate(struct ifnet *ifp)
1139 {
1140 	/*
1141 	 * Call detach hooks from head to tail.  To make sure detach
1142 	 * hooks are executed in the reverse order they were added, all
1143 	 * the hooks have to be added to the head!
1144 	 */
1145 
1146 	NET_LOCK();
1147 	if_hooks_run(&ifp->if_detachhooks);
1148 	NET_UNLOCK();
1149 }
1150 
1151 void
if_detachhook_add(struct ifnet * ifp,struct task * t)1152 if_detachhook_add(struct ifnet *ifp, struct task *t)
1153 {
1154 	mtx_enter(&if_hooks_mtx);
1155 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1156 	mtx_leave(&if_hooks_mtx);
1157 }
1158 
1159 void
if_detachhook_del(struct ifnet * ifp,struct task * t)1160 if_detachhook_del(struct ifnet *ifp, struct task *t)
1161 {
1162 	mtx_enter(&if_hooks_mtx);
1163 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1164 	mtx_leave(&if_hooks_mtx);
1165 }
1166 
1167 /*
1168  * Detach an interface from everything in the kernel.  Also deallocate
1169  * private resources.
1170  */
1171 void
if_detach(struct ifnet * ifp)1172 if_detach(struct ifnet *ifp)
1173 {
1174 	struct ifaddr *ifa;
1175 	struct ifg_list *ifg;
1176 	int i, s;
1177 
1178 	/* Undo pseudo-driver changes. */
1179 	if_deactivate(ifp);
1180 
1181 	/* Other CPUs must not have a reference before we start destroying. */
1182 	if_remove(ifp);
1183 
1184 	ifp->if_qstart = if_detached_qstart;
1185 
1186 	/* Wait until the start routines finished. */
1187 	ifq_barrier(&ifp->if_snd);
1188 	ifq_clr_oactive(&ifp->if_snd);
1189 
1190 #if NBPFILTER > 0
1191 	bpfdetach(ifp);
1192 #endif
1193 
1194 	NET_LOCK();
1195 	s = splnet();
1196 	ifp->if_ioctl = if_detached_ioctl;
1197 	ifp->if_watchdog = NULL;
1198 
1199 	/* Remove the watchdog timeout & task */
1200 	timeout_del(&ifp->if_slowtimo);
1201 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1202 
1203 	/* Remove the link state task */
1204 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1205 
1206 	rti_delete(ifp);
1207 #if NETHER > 0 && defined(NFSCLIENT)
1208 	if (ifp->if_index == revarp_ifidx)
1209 		revarp_ifidx = 0;
1210 #endif
1211 #ifdef MROUTING
1212 	vif_delete(ifp);
1213 #endif
1214 	in_ifdetach(ifp);
1215 #ifdef INET6
1216 	in6_ifdetach(ifp);
1217 #endif
1218 #if NPF > 0
1219 	pfi_detach_ifnet(ifp);
1220 #endif
1221 
1222 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1223 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1224 
1225 	if_free_sadl(ifp);
1226 
1227 	/* We should not have any address left at this point. */
1228 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1229 #ifdef DIAGNOSTIC
1230 		printf("%s: address list non empty\n", ifp->if_xname);
1231 #endif
1232 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1233 			ifa_del(ifp, ifa);
1234 			ifa->ifa_ifp = NULL;
1235 			ifafree(ifa);
1236 		}
1237 	}
1238 	splx(s);
1239 	NET_UNLOCK();
1240 
1241 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1242 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1243 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1244 
1245 #ifdef INET6
1246 	nd6_ifdetach(ifp);
1247 #endif
1248 
1249 	/* Announce that the interface is gone. */
1250 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1251 
1252 	if (ifp->if_counters != NULL)
1253 		if_counters_free(ifp);
1254 
1255 	for (i = 0; i < ifp->if_nifqs; i++)
1256 		ifq_destroy(ifp->if_ifqs[i]);
1257 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1258 		for (i = 1; i < ifp->if_nifqs; i++) {
1259 			free(ifp->if_ifqs[i], M_DEVBUF,
1260 			    sizeof(struct ifqueue));
1261 		}
1262 		free(ifp->if_ifqs, M_DEVBUF,
1263 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1264 	}
1265 
1266 	for (i = 0; i < ifp->if_niqs; i++)
1267 		ifiq_destroy(ifp->if_iqs[i]);
1268 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1269 		for (i = 1; i < ifp->if_niqs; i++) {
1270 			free(ifp->if_iqs[i], M_DEVBUF,
1271 			    sizeof(struct ifiqueue));
1272 		}
1273 		free(ifp->if_iqs, M_DEVBUF,
1274 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1275 	}
1276 }
1277 
1278 /*
1279  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1280  */
1281 int
if_isconnected(const struct ifnet * ifp0,unsigned int ifidx)1282 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1283 {
1284 	struct ifnet *ifp;
1285 	int connected = 0;
1286 
1287 	ifp = if_get(ifidx);
1288 	if (ifp == NULL)
1289 		return (0);
1290 
1291 	if (ifp0->if_index == ifp->if_index)
1292 		connected = 1;
1293 
1294 #if NBRIDGE > 0
1295 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1296 		connected = 1;
1297 #endif
1298 #if NCARP > 0
1299 	if ((ifp0->if_type == IFT_CARP &&
1300 	    ifp0->if_carpdevidx == ifp->if_index) ||
1301 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1302 		connected = 1;
1303 #endif
1304 
1305 	if_put(ifp);
1306 	return (connected);
1307 }
1308 
1309 /*
1310  * Create a clone network interface.
1311  */
1312 int
if_clone_create(const char * name,int rdomain)1313 if_clone_create(const char *name, int rdomain)
1314 {
1315 	struct if_clone *ifc;
1316 	struct ifnet *ifp;
1317 	int unit, ret;
1318 
1319 	ifc = if_clone_lookup(name, &unit);
1320 	if (ifc == NULL)
1321 		return (EINVAL);
1322 
1323 	rw_enter_write(&if_cloners_lock);
1324 
1325 	if ((ifp = if_unit(name)) != NULL) {
1326 		ret = EEXIST;
1327 		goto unlock;
1328 	}
1329 
1330 	ret = (*ifc->ifc_create)(ifc, unit);
1331 
1332 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1333 		goto unlock;
1334 
1335 	NET_LOCK();
1336 	if_addgroup(ifp, ifc->ifc_name);
1337 	if (rdomain != 0)
1338 		if_setrdomain(ifp, rdomain);
1339 	NET_UNLOCK();
1340 unlock:
1341 	rw_exit_write(&if_cloners_lock);
1342 	if_put(ifp);
1343 
1344 	return (ret);
1345 }
1346 
1347 /*
1348  * Destroy a clone network interface.
1349  */
1350 int
if_clone_destroy(const char * name)1351 if_clone_destroy(const char *name)
1352 {
1353 	struct if_clone *ifc;
1354 	struct ifnet *ifp;
1355 	int ret;
1356 
1357 	ifc = if_clone_lookup(name, NULL);
1358 	if (ifc == NULL)
1359 		return (EINVAL);
1360 
1361 	if (ifc->ifc_destroy == NULL)
1362 		return (EOPNOTSUPP);
1363 
1364 	rw_enter_write(&if_cloners_lock);
1365 
1366 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1367 		if (strcmp(ifp->if_xname, name) == 0)
1368 			break;
1369 	}
1370 	if (ifp == NULL) {
1371 		rw_exit_write(&if_cloners_lock);
1372 		return (ENXIO);
1373 	}
1374 
1375 	NET_LOCK();
1376 	if (ifp->if_flags & IFF_UP) {
1377 		int s;
1378 		s = splnet();
1379 		if_down(ifp);
1380 		splx(s);
1381 	}
1382 	NET_UNLOCK();
1383 	ret = (*ifc->ifc_destroy)(ifp);
1384 
1385 	rw_exit_write(&if_cloners_lock);
1386 
1387 	return (ret);
1388 }
1389 
1390 /*
1391  * Look up a network interface cloner.
1392  */
1393 struct if_clone *
if_clone_lookup(const char * name,int * unitp)1394 if_clone_lookup(const char *name, int *unitp)
1395 {
1396 	struct if_clone *ifc;
1397 	const char *cp;
1398 	int unit;
1399 
1400 	/* separate interface name from unit */
1401 	for (cp = name;
1402 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1403 	    cp++)
1404 		continue;
1405 
1406 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1407 		return (NULL);	/* No name or unit number */
1408 
1409 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1410 		return (NULL);	/* unit number 0 padded */
1411 
1412 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1413 		if (strlen(ifc->ifc_name) == cp - name &&
1414 		    !strncmp(name, ifc->ifc_name, cp - name))
1415 			break;
1416 	}
1417 
1418 	if (ifc == NULL)
1419 		return (NULL);
1420 
1421 	unit = 0;
1422 	while (cp - name < IFNAMSIZ && *cp) {
1423 		if (*cp < '0' || *cp > '9' ||
1424 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1425 			/* Bogus unit number. */
1426 			return (NULL);
1427 		}
1428 		unit = (unit * 10) + (*cp++ - '0');
1429 	}
1430 
1431 	if (unitp != NULL)
1432 		*unitp = unit;
1433 	return (ifc);
1434 }
1435 
1436 /*
1437  * Register a network interface cloner.
1438  */
1439 void
if_clone_attach(struct if_clone * ifc)1440 if_clone_attach(struct if_clone *ifc)
1441 {
1442 	/*
1443 	 * we are called at kernel boot by main(), when pseudo devices are
1444 	 * being attached. The main() is the only guy which may alter the
1445 	 * if_cloners. While system is running and main() is done with
1446 	 * initialization, the if_cloners becomes immutable.
1447 	 */
1448 	KASSERT(pdevinit_done == 0);
1449 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1450 	if_cloners_count++;
1451 }
1452 
1453 /*
1454  * Provide list of interface cloners to userspace.
1455  */
1456 int
if_clone_list(struct if_clonereq * ifcr)1457 if_clone_list(struct if_clonereq *ifcr)
1458 {
1459 	char outbuf[IFNAMSIZ], *dst;
1460 	struct if_clone *ifc;
1461 	int count, error = 0;
1462 
1463 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1464 		/* Just asking how many there are. */
1465 		ifcr->ifcr_total = if_cloners_count;
1466 		return (0);
1467 	}
1468 
1469 	if (ifcr->ifcr_count < 0)
1470 		return (EINVAL);
1471 
1472 	ifcr->ifcr_total = if_cloners_count;
1473 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1474 
1475 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1476 		if (count == 0)
1477 			break;
1478 		bzero(outbuf, sizeof outbuf);
1479 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1480 		error = copyout(outbuf, dst, IFNAMSIZ);
1481 		if (error)
1482 			break;
1483 		count--;
1484 		dst += IFNAMSIZ;
1485 	}
1486 
1487 	return (error);
1488 }
1489 
1490 /*
1491  * set queue congestion marker
1492  */
1493 void
if_congestion(void)1494 if_congestion(void)
1495 {
1496 	extern int ticks;
1497 
1498 	ifq_congestion = ticks;
1499 }
1500 
1501 int
if_congested(void)1502 if_congested(void)
1503 {
1504 	extern int ticks;
1505 	int diff;
1506 
1507 	diff = ticks - ifq_congestion;
1508 	if (diff < 0) {
1509 		ifq_congestion = ticks - hz;
1510 		return (0);
1511 	}
1512 
1513 	return (diff <= (hz / 100));
1514 }
1515 
1516 #define	equal(a1, a2)	\
1517 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1518 	(a1)->sa_len) == 0)
1519 
1520 /*
1521  * Locate an interface based on a complete address.
1522  */
1523 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr,u_int rtableid)1524 ifa_ifwithaddr(const struct sockaddr *addr, u_int rtableid)
1525 {
1526 	struct ifnet *ifp;
1527 	struct ifaddr *ifa;
1528 	u_int rdomain;
1529 
1530 	NET_ASSERT_LOCKED();
1531 
1532 	rdomain = rtable_l2(rtableid);
1533 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1534 		if (ifp->if_rdomain != rdomain)
1535 			continue;
1536 
1537 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1538 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1539 				continue;
1540 
1541 			if (equal(addr, ifa->ifa_addr)) {
1542 				return (ifa);
1543 			}
1544 		}
1545 	}
1546 	return (NULL);
1547 }
1548 
1549 /*
1550  * Locate the point to point interface with a given destination address.
1551  */
1552 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr,u_int rdomain)1553 ifa_ifwithdstaddr(const struct sockaddr *addr, u_int rdomain)
1554 {
1555 	struct ifnet *ifp;
1556 	struct ifaddr *ifa;
1557 
1558 	NET_ASSERT_LOCKED();
1559 
1560 	rdomain = rtable_l2(rdomain);
1561 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1562 		if (ifp->if_rdomain != rdomain)
1563 			continue;
1564 		if (ifp->if_flags & IFF_POINTOPOINT) {
1565 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1566 				if (ifa->ifa_addr->sa_family !=
1567 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1568 					continue;
1569 				if (equal(addr, ifa->ifa_dstaddr)) {
1570 					return (ifa);
1571 				}
1572 			}
1573 		}
1574 	}
1575 	return (NULL);
1576 }
1577 
1578 /*
1579  * Find an interface address specific to an interface best matching
1580  * a given address.
1581  */
1582 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)1583 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1584 {
1585 	struct ifaddr *ifa;
1586 	const char *cp, *cp2, *cp3;
1587 	char *cplim;
1588 	struct ifaddr *ifa_maybe = NULL;
1589 	u_int af = addr->sa_family;
1590 
1591 	if (af >= AF_MAX)
1592 		return (NULL);
1593 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1594 		if (ifa->ifa_addr->sa_family != af)
1595 			continue;
1596 		if (ifa_maybe == NULL)
1597 			ifa_maybe = ifa;
1598 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1599 			if (equal(addr, ifa->ifa_addr) ||
1600 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1601 				return (ifa);
1602 			continue;
1603 		}
1604 		cp = addr->sa_data;
1605 		cp2 = ifa->ifa_addr->sa_data;
1606 		cp3 = ifa->ifa_netmask->sa_data;
1607 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1608 		for (; cp3 < cplim; cp3++)
1609 			if ((*cp++ ^ *cp2++) & *cp3)
1610 				break;
1611 		if (cp3 == cplim)
1612 			return (ifa);
1613 	}
1614 	return (ifa_maybe);
1615 }
1616 
1617 void
if_rtrequest_dummy(struct ifnet * ifp,int req,struct rtentry * rt)1618 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1619 {
1620 }
1621 
1622 /*
1623  * Default action when installing a local route on a point-to-point
1624  * interface.
1625  */
1626 void
p2p_rtrequest(struct ifnet * ifp,int req,struct rtentry * rt)1627 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1628 {
1629 	struct ifnet *lo0ifp;
1630 	struct ifaddr *ifa, *lo0ifa;
1631 
1632 	switch (req) {
1633 	case RTM_ADD:
1634 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1635 			break;
1636 
1637 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1638 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1639 			    rt_key(rt)->sa_len) == 0)
1640 				break;
1641 		}
1642 
1643 		if (ifa == NULL)
1644 			break;
1645 
1646 		KASSERT(ifa == rt->rt_ifa);
1647 
1648 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1649 		KASSERT(lo0ifp != NULL);
1650 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1651 			if (lo0ifa->ifa_addr->sa_family ==
1652 			    ifa->ifa_addr->sa_family)
1653 				break;
1654 		}
1655 		if_put(lo0ifp);
1656 
1657 		if (lo0ifa == NULL)
1658 			break;
1659 
1660 		rt->rt_flags &= ~RTF_LLINFO;
1661 		break;
1662 	case RTM_DELETE:
1663 	case RTM_RESOLVE:
1664 	default:
1665 		break;
1666 	}
1667 }
1668 
1669 int
p2p_bpf_mtap(caddr_t if_bpf,const struct mbuf * m,u_int dir)1670 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1671 {
1672 #if NBPFILTER > 0
1673 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1674 #else
1675 	return (0);
1676 #endif
1677 }
1678 
1679 void
p2p_input(struct ifnet * ifp,struct mbuf * m)1680 p2p_input(struct ifnet *ifp, struct mbuf *m)
1681 {
1682 	void (*input)(struct ifnet *, struct mbuf *);
1683 
1684 	switch (m->m_pkthdr.ph_family) {
1685 	case AF_INET:
1686 		input = ipv4_input;
1687 		break;
1688 #ifdef INET6
1689 	case AF_INET6:
1690 		input = ipv6_input;
1691 		break;
1692 #endif
1693 #ifdef MPLS
1694 	case AF_MPLS:
1695 		input = mpls_input;
1696 		break;
1697 #endif
1698 	default:
1699 		m_freem(m);
1700 		return;
1701 	}
1702 
1703 	(*input)(ifp, m);
1704 }
1705 
1706 /*
1707  * Bring down all interfaces
1708  */
1709 void
if_downall(void)1710 if_downall(void)
1711 {
1712 	struct ifreq ifrq;	/* XXX only partly built */
1713 	struct ifnet *ifp;
1714 
1715 	NET_LOCK();
1716 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1717 		if ((ifp->if_flags & IFF_UP) == 0)
1718 			continue;
1719 		if_down(ifp);
1720 		ifrq.ifr_flags = ifp->if_flags;
1721 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1722 	}
1723 	NET_UNLOCK();
1724 }
1725 
1726 /*
1727  * Mark an interface down and notify protocols of
1728  * the transition.
1729  */
1730 void
if_down(struct ifnet * ifp)1731 if_down(struct ifnet *ifp)
1732 {
1733 	NET_ASSERT_LOCKED();
1734 
1735 	ifp->if_flags &= ~IFF_UP;
1736 	getmicrotime(&ifp->if_lastchange);
1737 	ifq_purge(&ifp->if_snd);
1738 
1739 	if_linkstate(ifp);
1740 }
1741 
1742 /*
1743  * Mark an interface up and notify protocols of
1744  * the transition.
1745  */
1746 void
if_up(struct ifnet * ifp)1747 if_up(struct ifnet *ifp)
1748 {
1749 	NET_ASSERT_LOCKED();
1750 
1751 	ifp->if_flags |= IFF_UP;
1752 	getmicrotime(&ifp->if_lastchange);
1753 
1754 #ifdef INET6
1755 	/* Userland expects the kernel to set ::1 on default lo(4). */
1756 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1757 		in6_ifattach(ifp);
1758 #endif
1759 
1760 	if_linkstate(ifp);
1761 }
1762 
1763 /*
1764  * Notify userland, the routing table and hooks owner of
1765  * a link-state transition.
1766  */
1767 void
if_linkstate_task(void * xifidx)1768 if_linkstate_task(void *xifidx)
1769 {
1770 	unsigned int ifidx = (unsigned long)xifidx;
1771 	struct ifnet *ifp;
1772 
1773 	NET_LOCK();
1774 	KERNEL_LOCK();
1775 
1776 	ifp = if_get(ifidx);
1777 	if (ifp != NULL)
1778 		if_linkstate(ifp);
1779 	if_put(ifp);
1780 
1781 	KERNEL_UNLOCK();
1782 	NET_UNLOCK();
1783 }
1784 
1785 void
if_linkstate(struct ifnet * ifp)1786 if_linkstate(struct ifnet *ifp)
1787 {
1788 	NET_ASSERT_LOCKED();
1789 
1790 	if (panicstr == NULL) {
1791 		rtm_ifchg(ifp);
1792 		rt_if_track(ifp);
1793 	}
1794 
1795 	if_hooks_run(&ifp->if_linkstatehooks);
1796 }
1797 
1798 void
if_linkstatehook_add(struct ifnet * ifp,struct task * t)1799 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1800 {
1801 	mtx_enter(&if_hooks_mtx);
1802 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1803 	mtx_leave(&if_hooks_mtx);
1804 }
1805 
1806 void
if_linkstatehook_del(struct ifnet * ifp,struct task * t)1807 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1808 {
1809 	mtx_enter(&if_hooks_mtx);
1810 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1811 	mtx_leave(&if_hooks_mtx);
1812 }
1813 
1814 /*
1815  * Schedule a link state change task.
1816  */
1817 void
if_link_state_change(struct ifnet * ifp)1818 if_link_state_change(struct ifnet *ifp)
1819 {
1820 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1821 }
1822 
1823 /*
1824  * Handle interface watchdog timer routine.  Called
1825  * from softclock, we decrement timer (if set) and
1826  * call the appropriate interface routine on expiration.
1827  */
1828 void
if_slowtimo(void * arg)1829 if_slowtimo(void *arg)
1830 {
1831 	struct ifnet *ifp = arg;
1832 	int s = splnet();
1833 
1834 	if (ifp->if_watchdog) {
1835 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1836 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1837 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1838 	}
1839 	splx(s);
1840 }
1841 
1842 void
if_watchdog_task(void * xifidx)1843 if_watchdog_task(void *xifidx)
1844 {
1845 	unsigned int ifidx = (unsigned long)xifidx;
1846 	struct ifnet *ifp;
1847 	int s;
1848 
1849 	ifp = if_get(ifidx);
1850 	if (ifp == NULL)
1851 		return;
1852 
1853 	KERNEL_LOCK();
1854 	s = splnet();
1855 	if (ifp->if_watchdog)
1856 		(*ifp->if_watchdog)(ifp);
1857 	splx(s);
1858 	KERNEL_UNLOCK();
1859 
1860 	if_put(ifp);
1861 }
1862 
1863 /*
1864  * Map interface name to interface structure pointer.
1865  */
1866 struct ifnet *
if_unit(const char * name)1867 if_unit(const char *name)
1868 {
1869 	struct ifnet *ifp;
1870 
1871 	KERNEL_ASSERT_LOCKED();
1872 
1873 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1874 		if (strcmp(ifp->if_xname, name) == 0) {
1875 			if_ref(ifp);
1876 			return (ifp);
1877 		}
1878 	}
1879 
1880 	return (NULL);
1881 }
1882 
1883 /*
1884  * Map interface index to interface structure pointer.
1885  */
1886 struct ifnet *
if_get(unsigned int index)1887 if_get(unsigned int index)
1888 {
1889 	struct ifnet **if_map;
1890 	struct ifnet *ifp = NULL;
1891 
1892 	if (index == 0)
1893 		return (NULL);
1894 
1895 	smr_read_enter();
1896 	if_map = SMR_PTR_GET(&if_idxmap.map);
1897 	if (index < if_idxmap_limit(if_map)) {
1898 		ifp = SMR_PTR_GET(&if_map[index]);
1899 		if (ifp != NULL) {
1900 			KASSERT(ifp->if_index == index);
1901 			if_ref(ifp);
1902 		}
1903 	}
1904 	smr_read_leave();
1905 
1906 	return (ifp);
1907 }
1908 
1909 struct ifnet *
if_ref(struct ifnet * ifp)1910 if_ref(struct ifnet *ifp)
1911 {
1912 	refcnt_take(&ifp->if_refcnt);
1913 
1914 	return (ifp);
1915 }
1916 
1917 void
if_put(struct ifnet * ifp)1918 if_put(struct ifnet *ifp)
1919 {
1920 	if (ifp == NULL)
1921 		return;
1922 
1923 	refcnt_rele_wake(&ifp->if_refcnt);
1924 }
1925 
1926 int
if_setlladdr(struct ifnet * ifp,const uint8_t * lladdr)1927 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1928 {
1929 	if (ifp->if_sadl == NULL)
1930 		return (EINVAL);
1931 
1932 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1933 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1934 
1935 	return (0);
1936 }
1937 
1938 int
if_createrdomain(int rdomain,struct ifnet * ifp)1939 if_createrdomain(int rdomain, struct ifnet *ifp)
1940 {
1941 	int error;
1942 	struct ifnet *loifp;
1943 	char loifname[IFNAMSIZ];
1944 	unsigned int unit = rdomain;
1945 
1946 	if ((error = rtable_add(rdomain)) != 0)
1947 		return (error);
1948 	if (!rtable_empty(rdomain))
1949 		return (EEXIST);
1950 
1951 	/* Create rdomain including its loopback if with unit == rdomain */
1952 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1953 	error = if_clone_create(loifname, 0);
1954 	if ((loifp = if_unit(loifname)) == NULL)
1955 		return (ENXIO);
1956 	if (error && (ifp != loifp || error != EEXIST)) {
1957 		if_put(loifp);
1958 		return (error);
1959 	}
1960 
1961 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1962 	loifp->if_rdomain = rdomain;
1963 	if_put(loifp);
1964 
1965 	return (0);
1966 }
1967 
1968 int
if_setrdomain(struct ifnet * ifp,int rdomain)1969 if_setrdomain(struct ifnet *ifp, int rdomain)
1970 {
1971 	struct ifreq ifr;
1972 	int error, up = 0, s;
1973 
1974 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1975 		return (EINVAL);
1976 
1977 	if (rdomain != ifp->if_rdomain &&
1978 	    (ifp->if_flags & IFF_LOOPBACK) &&
1979 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1980 		return (EPERM);
1981 
1982 	if (!rtable_exists(rdomain))
1983 		return (ESRCH);
1984 
1985 	/* make sure that the routing table is a real rdomain */
1986 	if (rdomain != rtable_l2(rdomain))
1987 		return (EINVAL);
1988 
1989 	if (rdomain != ifp->if_rdomain) {
1990 		s = splnet();
1991 		/*
1992 		 * We are tearing down the world.
1993 		 * Take down the IF so:
1994 		 * 1. everything that cares gets a message
1995 		 * 2. the automagic IPv6 bits are recreated
1996 		 */
1997 		if (ifp->if_flags & IFF_UP) {
1998 			up = 1;
1999 			if_down(ifp);
2000 		}
2001 		rti_delete(ifp);
2002 #ifdef MROUTING
2003 		vif_delete(ifp);
2004 #endif
2005 		in_ifdetach(ifp);
2006 #ifdef INET6
2007 		in6_ifdetach(ifp);
2008 #endif
2009 		splx(s);
2010 	}
2011 
2012 	/* Let devices like enc(4) or mpe(4) know about the change */
2013 	ifr.ifr_rdomainid = rdomain;
2014 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
2015 	    (caddr_t)&ifr)) != ENOTTY)
2016 		return (error);
2017 	error = 0;
2018 
2019 	/* Add interface to the specified rdomain */
2020 	ifp->if_rdomain = rdomain;
2021 
2022 	/* If we took down the IF, bring it back */
2023 	if (up) {
2024 		s = splnet();
2025 		if_up(ifp);
2026 		splx(s);
2027 	}
2028 
2029 	return (0);
2030 }
2031 
2032 /*
2033  * Interface ioctls.
2034  */
2035 int
ifioctl(struct socket * so,u_long cmd,caddr_t data,struct proc * p)2036 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
2037 {
2038 	struct ifnet *ifp;
2039 	struct ifreq *ifr = (struct ifreq *)data;
2040 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
2041 	struct if_afreq *ifar = (struct if_afreq *)data;
2042 	char ifdescrbuf[IFDESCRSIZE];
2043 	char ifrtlabelbuf[RTLABEL_LEN];
2044 	int s, error = 0, oif_xflags;
2045 	size_t bytesdone;
2046 	unsigned short oif_flags;
2047 
2048 	switch (cmd) {
2049 	case SIOCIFCREATE:
2050 		if ((error = suser(p)) != 0)
2051 			return (error);
2052 		KERNEL_LOCK();
2053 		error = if_clone_create(ifr->ifr_name, 0);
2054 		KERNEL_UNLOCK();
2055 		return (error);
2056 	case SIOCIFDESTROY:
2057 		if ((error = suser(p)) != 0)
2058 			return (error);
2059 		KERNEL_LOCK();
2060 		error = if_clone_destroy(ifr->ifr_name);
2061 		KERNEL_UNLOCK();
2062 		return (error);
2063 	case SIOCSIFGATTR:
2064 		if ((error = suser(p)) != 0)
2065 			return (error);
2066 		KERNEL_LOCK();
2067 		NET_LOCK();
2068 		error = if_setgroupattribs(data);
2069 		NET_UNLOCK();
2070 		KERNEL_UNLOCK();
2071 		return (error);
2072 	case SIOCGIFCONF:
2073 	case SIOCIFGCLONERS:
2074 	case SIOCGIFGMEMB:
2075 	case SIOCGIFGATTR:
2076 	case SIOCGIFGLIST:
2077 	case SIOCGIFFLAGS:
2078 	case SIOCGIFXFLAGS:
2079 	case SIOCGIFMETRIC:
2080 	case SIOCGIFMTU:
2081 	case SIOCGIFHARDMTU:
2082 	case SIOCGIFDATA:
2083 	case SIOCGIFDESCR:
2084 	case SIOCGIFRTLABEL:
2085 	case SIOCGIFPRIORITY:
2086 	case SIOCGIFRDOMAIN:
2087 	case SIOCGIFGROUP:
2088 	case SIOCGIFLLPRIO:
2089 		error = ifioctl_get(cmd, data);
2090 		return (error);
2091 	}
2092 
2093 	KERNEL_LOCK();
2094 
2095 	ifp = if_unit(ifr->ifr_name);
2096 	if (ifp == NULL) {
2097 		KERNEL_UNLOCK();
2098 		return (ENXIO);
2099 	}
2100 	oif_flags = ifp->if_flags;
2101 	oif_xflags = ifp->if_xflags;
2102 
2103 	switch (cmd) {
2104 	case SIOCIFAFATTACH:
2105 	case SIOCIFAFDETACH:
2106 		if ((error = suser(p)) != 0)
2107 			break;
2108 		NET_LOCK();
2109 		switch (ifar->ifar_af) {
2110 		case AF_INET:
2111 			/* attach is a noop for AF_INET */
2112 			if (cmd == SIOCIFAFDETACH)
2113 				in_ifdetach(ifp);
2114 			break;
2115 #ifdef INET6
2116 		case AF_INET6:
2117 			if (cmd == SIOCIFAFATTACH)
2118 				error = in6_ifattach(ifp);
2119 			else
2120 				in6_ifdetach(ifp);
2121 			break;
2122 #endif /* INET6 */
2123 		default:
2124 			error = EAFNOSUPPORT;
2125 		}
2126 		NET_UNLOCK();
2127 		break;
2128 
2129 	case SIOCSIFXFLAGS:
2130 		if ((error = suser(p)) != 0)
2131 			break;
2132 
2133 		NET_LOCK();
2134 #ifdef INET6
2135 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2136 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2137 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2138 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2139 			error = in6_ifattach(ifp);
2140 			if (error != 0) {
2141 				NET_UNLOCK();
2142 				break;
2143 			}
2144 		}
2145 
2146 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2147 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2148 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2149 
2150 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2151 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2152 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2153 
2154 #endif	/* INET6 */
2155 
2156 #ifdef MPLS
2157 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2158 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2159 			s = splnet();
2160 			ifp->if_xflags |= IFXF_MPLS;
2161 			ifp->if_ll_output = ifp->if_output;
2162 			ifp->if_output = mpls_output;
2163 			splx(s);
2164 		}
2165 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2166 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2167 			s = splnet();
2168 			ifp->if_xflags &= ~IFXF_MPLS;
2169 			ifp->if_output = ifp->if_ll_output;
2170 			ifp->if_ll_output = NULL;
2171 			splx(s);
2172 		}
2173 #endif	/* MPLS */
2174 
2175 #ifndef SMALL_KERNEL
2176 		if (ifp->if_capabilities & IFCAP_WOL) {
2177 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2178 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2179 				s = splnet();
2180 				ifp->if_xflags |= IFXF_WOL;
2181 				error = ifp->if_wol(ifp, 1);
2182 				splx(s);
2183 			}
2184 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2185 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2186 				s = splnet();
2187 				ifp->if_xflags &= ~IFXF_WOL;
2188 				error = ifp->if_wol(ifp, 0);
2189 				splx(s);
2190 			}
2191 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2192 			ifr->ifr_flags &= ~IFXF_WOL;
2193 			error = ENOTSUP;
2194 		}
2195 #endif
2196 		if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2197 		    ISSET(ifp->if_xflags, IFXF_LRO))
2198 			error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2199 
2200 		if (error == 0)
2201 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2202 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2203 
2204 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2205 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2206 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2207 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2208 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2209 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2210 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2211 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2212 			goto forceup;
2213 		}
2214 
2215 		NET_UNLOCK();
2216 		break;
2217 
2218 	case SIOCSIFFLAGS:
2219 		if ((error = suser(p)) != 0)
2220 			break;
2221 
2222 		NET_LOCK();
2223 forceup:
2224 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2225 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2226 		error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2227 		if (error != 0) {
2228 			ifp->if_flags = oif_flags;
2229 			if (cmd == SIOCSIFXFLAGS)
2230 				ifp->if_xflags = oif_xflags;
2231 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2232 			s = splnet();
2233 			if (ISSET(ifp->if_flags, IFF_UP))
2234 				if_up(ifp);
2235 			else
2236 				if_down(ifp);
2237 			splx(s);
2238 		}
2239 		NET_UNLOCK();
2240 		break;
2241 
2242 	case SIOCSIFMETRIC:
2243 		if ((error = suser(p)) != 0)
2244 			break;
2245 		NET_LOCK();
2246 		ifp->if_metric = ifr->ifr_metric;
2247 		NET_UNLOCK();
2248 		break;
2249 
2250 	case SIOCSIFMTU:
2251 		if ((error = suser(p)) != 0)
2252 			break;
2253 		NET_LOCK();
2254 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2255 		NET_UNLOCK();
2256 		if (error == 0)
2257 			rtm_ifchg(ifp);
2258 		break;
2259 
2260 	case SIOCSIFDESCR:
2261 		if ((error = suser(p)) != 0)
2262 			break;
2263 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2264 		    IFDESCRSIZE, &bytesdone);
2265 		if (error == 0) {
2266 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2267 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2268 		}
2269 		break;
2270 
2271 	case SIOCSIFRTLABEL:
2272 		if ((error = suser(p)) != 0)
2273 			break;
2274 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2275 		    RTLABEL_LEN, &bytesdone);
2276 		if (error == 0) {
2277 			rtlabel_unref(ifp->if_rtlabelid);
2278 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2279 		}
2280 		break;
2281 
2282 	case SIOCSIFPRIORITY:
2283 		if ((error = suser(p)) != 0)
2284 			break;
2285 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2286 			error = EINVAL;
2287 			break;
2288 		}
2289 		ifp->if_priority = ifr->ifr_metric;
2290 		break;
2291 
2292 	case SIOCSIFRDOMAIN:
2293 		if ((error = suser(p)) != 0)
2294 			break;
2295 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2296 		if (!error || error == EEXIST) {
2297 			NET_LOCK();
2298 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2299 			NET_UNLOCK();
2300 		}
2301 		break;
2302 
2303 	case SIOCAIFGROUP:
2304 		if ((error = suser(p)))
2305 			break;
2306 		NET_LOCK();
2307 		error = if_addgroup(ifp, ifgr->ifgr_group);
2308 		if (error == 0) {
2309 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2310 			if (error == ENOTTY)
2311 				error = 0;
2312 		}
2313 		NET_UNLOCK();
2314 		break;
2315 
2316 	case SIOCDIFGROUP:
2317 		if ((error = suser(p)))
2318 			break;
2319 		NET_LOCK();
2320 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2321 		if (error == ENOTTY)
2322 			error = 0;
2323 		if (error == 0)
2324 			error = if_delgroup(ifp, ifgr->ifgr_group);
2325 		NET_UNLOCK();
2326 		break;
2327 
2328 	case SIOCSIFLLADDR:
2329 		if ((error = suser(p)))
2330 			break;
2331 		if ((ifp->if_sadl == NULL) ||
2332 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2333 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2334 			error = EINVAL;
2335 			break;
2336 		}
2337 		NET_LOCK();
2338 		switch (ifp->if_type) {
2339 		case IFT_ETHER:
2340 		case IFT_CARP:
2341 		case IFT_XETHER:
2342 		case IFT_ISO88025:
2343 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2344 			if (error == ENOTTY)
2345 				error = 0;
2346 			if (error == 0)
2347 				error = if_setlladdr(ifp,
2348 				    ifr->ifr_addr.sa_data);
2349 			break;
2350 		default:
2351 			error = ENODEV;
2352 		}
2353 
2354 		if (error == 0)
2355 			ifnewlladdr(ifp);
2356 		NET_UNLOCK();
2357 		if (error == 0)
2358 			rtm_ifchg(ifp);
2359 		break;
2360 
2361 	case SIOCSIFLLPRIO:
2362 		if ((error = suser(p)))
2363 			break;
2364 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2365 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2366 			error = EINVAL;
2367 			break;
2368 		}
2369 		NET_LOCK();
2370 		ifp->if_llprio = ifr->ifr_llprio;
2371 		NET_UNLOCK();
2372 		break;
2373 
2374 	case SIOCGIFSFFPAGE:
2375 		error = suser(p);
2376 		if (error != 0)
2377 			break;
2378 
2379 		error = if_sffpage_check(data);
2380 		if (error != 0)
2381 			break;
2382 
2383 		/* don't take NET_LOCK because i2c reads take a long time */
2384 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2385 		break;
2386 
2387 	case SIOCSIFMEDIA:
2388 		if ((error = suser(p)) != 0)
2389 			break;
2390 		/* FALLTHROUGH */
2391 	case SIOCGIFMEDIA:
2392 		/* net lock is not needed */
2393 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2394 		break;
2395 
2396 	case SIOCSETKALIVE:
2397 	case SIOCDIFPHYADDR:
2398 	case SIOCSLIFPHYADDR:
2399 	case SIOCSLIFPHYRTABLE:
2400 	case SIOCSLIFPHYTTL:
2401 	case SIOCSLIFPHYDF:
2402 	case SIOCSLIFPHYECN:
2403 	case SIOCADDMULTI:
2404 	case SIOCDELMULTI:
2405 	case SIOCSVNETID:
2406 	case SIOCDVNETID:
2407 	case SIOCSVNETFLOWID:
2408 	case SIOCSTXHPRIO:
2409 	case SIOCSRXHPRIO:
2410 	case SIOCSIFPAIR:
2411 	case SIOCSIFPARENT:
2412 	case SIOCDIFPARENT:
2413 	case SIOCSETMPWCFG:
2414 	case SIOCSETLABEL:
2415 	case SIOCDELLABEL:
2416 	case SIOCSPWE3CTRLWORD:
2417 	case SIOCSPWE3FAT:
2418 	case SIOCSPWE3NEIGHBOR:
2419 	case SIOCDPWE3NEIGHBOR:
2420 #if NBRIDGE > 0
2421 	case SIOCBRDGADD:
2422 	case SIOCBRDGDEL:
2423 	case SIOCBRDGSIFFLGS:
2424 	case SIOCBRDGSCACHE:
2425 	case SIOCBRDGADDS:
2426 	case SIOCBRDGDELS:
2427 	case SIOCBRDGSADDR:
2428 	case SIOCBRDGSTO:
2429 	case SIOCBRDGDADDR:
2430 	case SIOCBRDGFLUSH:
2431 	case SIOCBRDGADDL:
2432 	case SIOCBRDGSIFPROT:
2433 	case SIOCBRDGARL:
2434 	case SIOCBRDGFRL:
2435 	case SIOCBRDGSPRI:
2436 	case SIOCBRDGSHT:
2437 	case SIOCBRDGSFD:
2438 	case SIOCBRDGSMA:
2439 	case SIOCBRDGSIFPRIO:
2440 	case SIOCBRDGSIFCOST:
2441 	case SIOCBRDGSTXHC:
2442 	case SIOCBRDGSPROTO:
2443 #endif
2444 		if ((error = suser(p)) != 0)
2445 			break;
2446 		/* FALLTHROUGH */
2447 	default:
2448 		error = pru_control(so, cmd, data, ifp);
2449 		if (error != EOPNOTSUPP)
2450 			break;
2451 		switch (cmd) {
2452 		case SIOCAIFADDR:
2453 		case SIOCDIFADDR:
2454 		case SIOCSIFADDR:
2455 		case SIOCSIFNETMASK:
2456 		case SIOCSIFDSTADDR:
2457 		case SIOCSIFBRDADDR:
2458 #ifdef INET6
2459 		case SIOCAIFADDR_IN6:
2460 		case SIOCDIFADDR_IN6:
2461 #endif
2462 			error = suser(p);
2463 			break;
2464 		default:
2465 			error = 0;
2466 			break;
2467 		}
2468 		if (error)
2469 			break;
2470 		NET_LOCK();
2471 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2472 		NET_UNLOCK();
2473 		break;
2474 	}
2475 
2476 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2477 		/* if_up() and if_down() already sent an update, skip here */
2478 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2479 			rtm_ifchg(ifp);
2480 	}
2481 
2482 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2483 		getmicrotime(&ifp->if_lastchange);
2484 
2485 	KERNEL_UNLOCK();
2486 
2487 	if_put(ifp);
2488 
2489 	return (error);
2490 }
2491 
2492 int
ifioctl_get(u_long cmd,caddr_t data)2493 ifioctl_get(u_long cmd, caddr_t data)
2494 {
2495 	struct ifnet *ifp;
2496 	struct ifreq *ifr = (struct ifreq *)data;
2497 	int error = 0;
2498 	size_t bytesdone;
2499 
2500 	switch(cmd) {
2501 	case SIOCGIFCONF:
2502 		NET_LOCK_SHARED();
2503 		error = ifconf(data);
2504 		NET_UNLOCK_SHARED();
2505 		return (error);
2506 	case SIOCIFGCLONERS:
2507 		error = if_clone_list((struct if_clonereq *)data);
2508 		return (error);
2509 	case SIOCGIFGMEMB:
2510 		error = if_getgroupmembers(data);
2511 		return (error);
2512 	case SIOCGIFGATTR:
2513 		NET_LOCK_SHARED();
2514 		error = if_getgroupattribs(data);
2515 		NET_UNLOCK_SHARED();
2516 		return (error);
2517 	case SIOCGIFGLIST:
2518 		error = if_getgrouplist(data);
2519 		return (error);
2520 	}
2521 
2522 	KERNEL_LOCK();
2523 	ifp = if_unit(ifr->ifr_name);
2524 	KERNEL_UNLOCK();
2525 
2526 	if (ifp == NULL)
2527 		return (ENXIO);
2528 
2529 	switch(cmd) {
2530 	case SIOCGIFFLAGS:
2531 		ifr->ifr_flags = ifp->if_flags;
2532 		if (ifq_is_oactive(&ifp->if_snd))
2533 			ifr->ifr_flags |= IFF_OACTIVE;
2534 		break;
2535 
2536 	case SIOCGIFXFLAGS:
2537 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2538 		break;
2539 
2540 	case SIOCGIFMETRIC:
2541 		ifr->ifr_metric = ifp->if_metric;
2542 		break;
2543 
2544 	case SIOCGIFMTU:
2545 		ifr->ifr_mtu = ifp->if_mtu;
2546 		break;
2547 
2548 	case SIOCGIFHARDMTU:
2549 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2550 		break;
2551 
2552 	case SIOCGIFDATA: {
2553 		struct if_data ifdata;
2554 
2555 		NET_LOCK_SHARED();
2556 		KERNEL_LOCK();
2557 		if_getdata(ifp, &ifdata);
2558 		KERNEL_UNLOCK();
2559 		NET_UNLOCK_SHARED();
2560 
2561 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2562 		break;
2563 	}
2564 
2565 	case SIOCGIFDESCR: {
2566 		char ifdescrbuf[IFDESCRSIZE];
2567 		KERNEL_LOCK();
2568 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2569 		KERNEL_UNLOCK();
2570 
2571 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2572 		    &bytesdone);
2573 		break;
2574 	}
2575 	case SIOCGIFRTLABEL: {
2576 		char ifrtlabelbuf[RTLABEL_LEN];
2577 		u_short rtlabelid = READ_ONCE(ifp->if_rtlabelid);
2578 
2579 		if (rtlabelid && rtlabel_id2name(rtlabelid,
2580 		    ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2581 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2582 			    RTLABEL_LEN, &bytesdone);
2583 		} else
2584 			error = ENOENT;
2585 		break;
2586 	}
2587 	case SIOCGIFPRIORITY:
2588 		ifr->ifr_metric = ifp->if_priority;
2589 		break;
2590 
2591 	case SIOCGIFRDOMAIN:
2592 		ifr->ifr_rdomainid = ifp->if_rdomain;
2593 		break;
2594 
2595 	case SIOCGIFGROUP:
2596 		error = if_getgroup(data, ifp);
2597 		break;
2598 
2599 	case SIOCGIFLLPRIO:
2600 		ifr->ifr_llprio = ifp->if_llprio;
2601 		break;
2602 
2603 	default:
2604 		panic("invalid ioctl %lu", cmd);
2605 	}
2606 
2607 	if_put(ifp);
2608 
2609 	return (error);
2610 }
2611 
2612 static int
if_sffpage_check(const caddr_t data)2613 if_sffpage_check(const caddr_t data)
2614 {
2615 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2616 
2617 	switch (sff->sff_addr) {
2618 	case IFSFF_ADDR_EEPROM:
2619 	case IFSFF_ADDR_DDM:
2620 		break;
2621 	default:
2622 		return (EINVAL);
2623 	}
2624 
2625 	return (0);
2626 }
2627 
2628 int
if_txhprio_l2_check(int hdrprio)2629 if_txhprio_l2_check(int hdrprio)
2630 {
2631 	switch (hdrprio) {
2632 	case IF_HDRPRIO_PACKET:
2633 		return (0);
2634 	default:
2635 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2636 			return (0);
2637 		break;
2638 	}
2639 
2640 	return (EINVAL);
2641 }
2642 
2643 int
if_txhprio_l3_check(int hdrprio)2644 if_txhprio_l3_check(int hdrprio)
2645 {
2646 	switch (hdrprio) {
2647 	case IF_HDRPRIO_PACKET:
2648 	case IF_HDRPRIO_PAYLOAD:
2649 		return (0);
2650 	default:
2651 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2652 			return (0);
2653 		break;
2654 	}
2655 
2656 	return (EINVAL);
2657 }
2658 
2659 int
if_rxhprio_l2_check(int hdrprio)2660 if_rxhprio_l2_check(int hdrprio)
2661 {
2662 	switch (hdrprio) {
2663 	case IF_HDRPRIO_PACKET:
2664 	case IF_HDRPRIO_OUTER:
2665 		return (0);
2666 	default:
2667 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2668 			return (0);
2669 		break;
2670 	}
2671 
2672 	return (EINVAL);
2673 }
2674 
2675 int
if_rxhprio_l3_check(int hdrprio)2676 if_rxhprio_l3_check(int hdrprio)
2677 {
2678 	switch (hdrprio) {
2679 	case IF_HDRPRIO_PACKET:
2680 	case IF_HDRPRIO_PAYLOAD:
2681 	case IF_HDRPRIO_OUTER:
2682 		return (0);
2683 	default:
2684 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2685 			return (0);
2686 		break;
2687 	}
2688 
2689 	return (EINVAL);
2690 }
2691 
2692 /*
2693  * Return interface configuration
2694  * of system.  List may be used
2695  * in later ioctl's (above) to get
2696  * other information.
2697  */
2698 int
ifconf(caddr_t data)2699 ifconf(caddr_t data)
2700 {
2701 	struct ifconf *ifc = (struct ifconf *)data;
2702 	struct ifnet *ifp;
2703 	struct ifaddr *ifa;
2704 	struct ifreq ifr, *ifrp;
2705 	int space = ifc->ifc_len, error = 0;
2706 
2707 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2708 	if (space == 0) {
2709 		TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2710 			struct sockaddr *sa;
2711 
2712 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2713 				space += sizeof (ifr);
2714 			else
2715 				TAILQ_FOREACH(ifa,
2716 				    &ifp->if_addrlist, ifa_list) {
2717 					sa = ifa->ifa_addr;
2718 					if (sa->sa_len > sizeof(*sa))
2719 						space += sa->sa_len -
2720 						    sizeof(*sa);
2721 					space += sizeof(ifr);
2722 				}
2723 		}
2724 		ifc->ifc_len = space;
2725 		return (0);
2726 	}
2727 
2728 	ifrp = ifc->ifc_req;
2729 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2730 		if (space < sizeof(ifr))
2731 			break;
2732 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2733 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2734 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2735 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2736 			    sizeof(ifr));
2737 			if (error)
2738 				break;
2739 			space -= sizeof (ifr), ifrp++;
2740 		} else
2741 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2742 				struct sockaddr *sa = ifa->ifa_addr;
2743 
2744 				if (space < sizeof(ifr))
2745 					break;
2746 				if (sa->sa_len <= sizeof(*sa)) {
2747 					ifr.ifr_addr = *sa;
2748 					error = copyout((caddr_t)&ifr,
2749 					    (caddr_t)ifrp, sizeof (ifr));
2750 					ifrp++;
2751 				} else {
2752 					space -= sa->sa_len - sizeof(*sa);
2753 					if (space < sizeof (ifr))
2754 						break;
2755 					error = copyout((caddr_t)&ifr,
2756 					    (caddr_t)ifrp,
2757 					    sizeof(ifr.ifr_name));
2758 					if (error == 0)
2759 						error = copyout((caddr_t)sa,
2760 						    (caddr_t)&ifrp->ifr_addr,
2761 						    sa->sa_len);
2762 					ifrp = (struct ifreq *)(sa->sa_len +
2763 					    (caddr_t)&ifrp->ifr_addr);
2764 				}
2765 				if (error)
2766 					break;
2767 				space -= sizeof (ifr);
2768 			}
2769 	}
2770 	ifc->ifc_len -= space;
2771 	return (error);
2772 }
2773 
2774 void
if_counters_alloc(struct ifnet * ifp)2775 if_counters_alloc(struct ifnet *ifp)
2776 {
2777 	KASSERT(ifp->if_counters == NULL);
2778 
2779 	ifp->if_counters = counters_alloc(ifc_ncounters);
2780 }
2781 
2782 void
if_counters_free(struct ifnet * ifp)2783 if_counters_free(struct ifnet *ifp)
2784 {
2785 	KASSERT(ifp->if_counters != NULL);
2786 
2787 	counters_free(ifp->if_counters, ifc_ncounters);
2788 	ifp->if_counters = NULL;
2789 }
2790 
2791 void
if_getdata(struct ifnet * ifp,struct if_data * data)2792 if_getdata(struct ifnet *ifp, struct if_data *data)
2793 {
2794 	unsigned int i;
2795 
2796 	data->ifi_type = ifp->if_type;
2797 	data->ifi_addrlen = ifp->if_addrlen;
2798 	data->ifi_hdrlen = ifp->if_hdrlen;
2799 	data->ifi_link_state = ifp->if_link_state;
2800 	data->ifi_mtu = ifp->if_mtu;
2801 	data->ifi_metric = ifp->if_metric;
2802 	data->ifi_baudrate = ifp->if_baudrate;
2803 	data->ifi_capabilities = ifp->if_capabilities;
2804 	data->ifi_rdomain = ifp->if_rdomain;
2805 	data->ifi_lastchange = ifp->if_lastchange;
2806 
2807 	data->ifi_ipackets = ifp->if_data_counters[ifc_ipackets];
2808 	data->ifi_ierrors = ifp->if_data_counters[ifc_ierrors];
2809 	data->ifi_opackets = ifp->if_data_counters[ifc_opackets];
2810 	data->ifi_oerrors = ifp->if_data_counters[ifc_oerrors];
2811 	data->ifi_collisions = ifp->if_data_counters[ifc_collisions];
2812 	data->ifi_ibytes = ifp->if_data_counters[ifc_ibytes];
2813 	data->ifi_obytes = ifp->if_data_counters[ifc_obytes];
2814 	data->ifi_imcasts = ifp->if_data_counters[ifc_imcasts];
2815 	data->ifi_omcasts = ifp->if_data_counters[ifc_omcasts];
2816 	data->ifi_iqdrops = ifp->if_data_counters[ifc_iqdrops];
2817 	data->ifi_oqdrops = ifp->if_data_counters[ifc_oqdrops];
2818 	data->ifi_noproto = ifp->if_data_counters[ifc_noproto];
2819 
2820 	if (ifp->if_counters != NULL) {
2821 		uint64_t counters[ifc_ncounters];
2822 
2823 		counters_read(ifp->if_counters, counters, nitems(counters),
2824 		    NULL);
2825 
2826 		data->ifi_ipackets += counters[ifc_ipackets];
2827 		data->ifi_ierrors += counters[ifc_ierrors];
2828 		data->ifi_opackets += counters[ifc_opackets];
2829 		data->ifi_oerrors += counters[ifc_oerrors];
2830 		data->ifi_collisions += counters[ifc_collisions];
2831 		data->ifi_ibytes += counters[ifc_ibytes];
2832 		data->ifi_obytes += counters[ifc_obytes];
2833 		data->ifi_imcasts += counters[ifc_imcasts];
2834 		data->ifi_omcasts += counters[ifc_omcasts];
2835 		data->ifi_iqdrops += counters[ifc_iqdrops];
2836 		data->ifi_oqdrops += counters[ifc_oqdrops];
2837 		data->ifi_noproto += counters[ifc_noproto];
2838 	}
2839 
2840 	for (i = 0; i < ifp->if_nifqs; i++) {
2841 		struct ifqueue *ifq = ifp->if_ifqs[i];
2842 
2843 		ifq_add_data(ifq, data);
2844 	}
2845 
2846 	for (i = 0; i < ifp->if_niqs; i++) {
2847 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2848 
2849 		ifiq_add_data(ifiq, data);
2850 	}
2851 }
2852 
2853 /*
2854  * Dummy functions replaced in ifnet during detach (if protocols decide to
2855  * fiddle with the if during detach.
2856  */
2857 void
if_detached_qstart(struct ifqueue * ifq)2858 if_detached_qstart(struct ifqueue *ifq)
2859 {
2860 	ifq_purge(ifq);
2861 }
2862 
2863 int
if_detached_ioctl(struct ifnet * ifp,u_long a,caddr_t b)2864 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2865 {
2866 	return ENODEV;
2867 }
2868 
2869 static inline void
ifgroup_icref(struct ifg_group * ifg)2870 ifgroup_icref(struct ifg_group *ifg)
2871 {
2872 	refcnt_take(&ifg->ifg_tmprefcnt);
2873 }
2874 
2875 static inline void
ifgroup_icrele(struct ifg_group * ifg)2876 ifgroup_icrele(struct ifg_group *ifg)
2877 {
2878 	if (refcnt_rele(&ifg->ifg_tmprefcnt) != 0)
2879 		free(ifg, M_IFGROUP, sizeof(*ifg));
2880 }
2881 
2882 /*
2883  * Create interface group without members
2884  */
2885 struct ifg_group *
if_creategroup(const char * groupname)2886 if_creategroup(const char *groupname)
2887 {
2888 	struct ifg_group	*ifg;
2889 
2890 	if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL)
2891 		return (NULL);
2892 
2893 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2894 	ifg->ifg_refcnt = 1;
2895 	ifg->ifg_carp_demoted = 0;
2896 	TAILQ_INIT(&ifg->ifg_members);
2897 	refcnt_init(&ifg->ifg_tmprefcnt);
2898 #if NPF > 0
2899 	pfi_attach_ifgroup(ifg);
2900 #endif
2901 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2902 
2903 	return (ifg);
2904 }
2905 
2906 /*
2907  * Add a group to an interface
2908  */
2909 int
if_addgroup(struct ifnet * ifp,const char * groupname)2910 if_addgroup(struct ifnet *ifp, const char *groupname)
2911 {
2912 	struct ifg_list		*ifgl;
2913 	struct ifg_group	*ifg = NULL;
2914 	struct ifg_member	*ifgm;
2915 	size_t			 namelen;
2916 
2917 	namelen = strlen(groupname);
2918 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2919 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2920 		return (EINVAL);
2921 
2922 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2923 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2924 			return (EEXIST);
2925 
2926 	if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL)
2927 		return (ENOMEM);
2928 
2929 	if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) {
2930 		free(ifgl, M_IFGROUP, sizeof(*ifgl));
2931 		return (ENOMEM);
2932 	}
2933 
2934 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2935 		if (!strcmp(ifg->ifg_group, groupname))
2936 			break;
2937 
2938 	if (ifg == NULL) {
2939 		ifg = if_creategroup(groupname);
2940 		if (ifg == NULL) {
2941 			free(ifgl, M_IFGROUP, sizeof(*ifgl));
2942 			free(ifgm, M_IFGROUP, sizeof(*ifgm));
2943 			return (ENOMEM);
2944 		}
2945 	} else
2946 		ifg->ifg_refcnt++;
2947 	KASSERT(ifg->ifg_refcnt != 0);
2948 
2949 	ifgl->ifgl_group = ifg;
2950 	ifgm->ifgm_ifp = ifp;
2951 
2952 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2953 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2954 
2955 #if NPF > 0
2956 	pfi_group_addmember(groupname);
2957 #endif
2958 
2959 	return (0);
2960 }
2961 
2962 /*
2963  * Remove a group from an interface
2964  */
2965 int
if_delgroup(struct ifnet * ifp,const char * groupname)2966 if_delgroup(struct ifnet *ifp, const char *groupname)
2967 {
2968 	struct ifg_list		*ifgl;
2969 	struct ifg_member	*ifgm;
2970 
2971 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2972 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2973 			break;
2974 	if (ifgl == NULL)
2975 		return (ENOENT);
2976 
2977 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2978 
2979 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2980 		if (ifgm->ifgm_ifp == ifp)
2981 			break;
2982 
2983 	if (ifgm != NULL) {
2984 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2985 		free(ifgm, M_IFGROUP, sizeof(*ifgm));
2986 	}
2987 
2988 #if NPF > 0
2989 	pfi_group_delmember(groupname);
2990 #endif
2991 
2992 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2993 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2994 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2995 #if NPF > 0
2996 		pfi_detach_ifgroup(ifgl->ifgl_group);
2997 #endif
2998 		ifgroup_icrele(ifgl->ifgl_group);
2999 	}
3000 
3001 	free(ifgl, M_IFGROUP, sizeof(*ifgl));
3002 
3003 	return (0);
3004 }
3005 
3006 /*
3007  * Stores all groups from an interface in memory pointed
3008  * to by data
3009  */
3010 int
if_getgroup(caddr_t data,struct ifnet * ifp)3011 if_getgroup(caddr_t data, struct ifnet *ifp)
3012 {
3013 	TAILQ_HEAD(, ifg_group)	 ifg_tmplist =
3014 	    TAILQ_HEAD_INITIALIZER(ifg_tmplist);
3015 	struct ifg_list		*ifgl;
3016 	struct ifg_req		 ifgrq, *ifgp;
3017 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3018 	struct ifg_group	 *ifg;
3019 	int			 len, error = 0;
3020 
3021 	if (ifgr->ifgr_len == 0) {
3022 		NET_LOCK_SHARED();
3023 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
3024 			ifgr->ifgr_len += sizeof(struct ifg_req);
3025 		NET_UNLOCK_SHARED();
3026 		return (0);
3027 	}
3028 
3029 	len = ifgr->ifgr_len;
3030 	ifgp = ifgr->ifgr_groups;
3031 
3032 	rw_enter_write(&if_tmplist_lock);
3033 
3034 	NET_LOCK_SHARED();
3035 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
3036 		ifgroup_icref(ifgl->ifgl_group);
3037 		TAILQ_INSERT_TAIL(&ifg_tmplist, ifgl->ifgl_group, ifg_tmplist);
3038 	}
3039 	NET_UNLOCK_SHARED();
3040 
3041 	TAILQ_FOREACH(ifg, &ifg_tmplist, ifg_tmplist) {
3042 		if (len < sizeof(ifgrq)) {
3043 			error = EINVAL;
3044 			break;
3045 		}
3046 		bzero(&ifgrq, sizeof ifgrq);
3047 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3048 		    sizeof(ifgrq.ifgrq_group));
3049 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3050 		    sizeof(struct ifg_req))))
3051 			break;
3052 		len -= sizeof(ifgrq);
3053 		ifgp++;
3054 	}
3055 
3056 	while ((ifg = TAILQ_FIRST(&ifg_tmplist))){
3057 		TAILQ_REMOVE(&ifg_tmplist, ifg, ifg_tmplist);
3058 		ifgroup_icrele(ifg);
3059 	}
3060 
3061 	rw_exit_write(&if_tmplist_lock);
3062 
3063 	return (error);
3064 }
3065 
3066 /*
3067  * Stores all members of a group in memory pointed to by data
3068  */
3069 int
if_getgroupmembers(caddr_t data)3070 if_getgroupmembers(caddr_t data)
3071 {
3072 	TAILQ_HEAD(, ifnet)	if_tmplist =
3073 	    TAILQ_HEAD_INITIALIZER(if_tmplist);
3074 	struct ifnet		*ifp;
3075 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3076 	struct ifg_group	*ifg;
3077 	struct ifg_member	*ifgm;
3078 	struct ifg_req		 ifgrq, *ifgp;
3079 	int			 len, error = 0;
3080 
3081 	rw_enter_write(&if_tmplist_lock);
3082 	NET_LOCK_SHARED();
3083 
3084 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3085 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3086 			break;
3087 	if (ifg == NULL) {
3088 		error = ENOENT;
3089 		goto unlock;
3090 	}
3091 
3092 	if (ifgr->ifgr_len == 0) {
3093 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3094 			ifgr->ifgr_len += sizeof(ifgrq);
3095 		goto unlock;
3096 	}
3097 
3098 	TAILQ_FOREACH (ifgm, &ifg->ifg_members, ifgm_next) {
3099 		if_ref(ifgm->ifgm_ifp);
3100 		TAILQ_INSERT_TAIL(&if_tmplist, ifgm->ifgm_ifp, if_tmplist);
3101 	}
3102 	NET_UNLOCK_SHARED();
3103 
3104 	len = ifgr->ifgr_len;
3105 	ifgp = ifgr->ifgr_groups;
3106 
3107 	TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) {
3108 		if (len < sizeof(ifgrq)) {
3109 			error = EINVAL;
3110 			break;
3111 		}
3112 		bzero(&ifgrq, sizeof ifgrq);
3113 		strlcpy(ifgrq.ifgrq_member, ifp->if_xname,
3114 		    sizeof(ifgrq.ifgrq_member));
3115 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3116 		    sizeof(struct ifg_req))))
3117 			break;
3118 		len -= sizeof(ifgrq);
3119 		ifgp++;
3120 	}
3121 
3122 	while ((ifp = TAILQ_FIRST(&if_tmplist))) {
3123 		TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist);
3124 		if_put(ifp);
3125 	}
3126 	rw_exit_write(&if_tmplist_lock);
3127 
3128 	return (error);
3129 
3130 unlock:
3131 	NET_UNLOCK_SHARED();
3132 	rw_exit_write(&if_tmplist_lock);
3133 
3134 	return (error);
3135 }
3136 
3137 int
if_getgroupattribs(caddr_t data)3138 if_getgroupattribs(caddr_t data)
3139 {
3140 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3141 	struct ifg_group	*ifg;
3142 
3143 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3144 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3145 			break;
3146 	if (ifg == NULL)
3147 		return (ENOENT);
3148 
3149 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
3150 
3151 	return (0);
3152 }
3153 
3154 int
if_setgroupattribs(caddr_t data)3155 if_setgroupattribs(caddr_t data)
3156 {
3157 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3158 	struct ifg_group	*ifg;
3159 	struct ifg_member	*ifgm;
3160 	int			 demote;
3161 
3162 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3163 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3164 			break;
3165 	if (ifg == NULL)
3166 		return (ENOENT);
3167 
3168 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
3169 	if (demote + ifg->ifg_carp_demoted > 0xff ||
3170 	    demote + ifg->ifg_carp_demoted < 0)
3171 		return (EINVAL);
3172 
3173 	ifg->ifg_carp_demoted += demote;
3174 
3175 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3176 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3177 
3178 	return (0);
3179 }
3180 
3181 /*
3182  * Stores all groups in memory pointed to by data
3183  */
3184 int
if_getgrouplist(caddr_t data)3185 if_getgrouplist(caddr_t data)
3186 {
3187 	TAILQ_HEAD(, ifg_group)	 ifg_tmplist =
3188 	    TAILQ_HEAD_INITIALIZER(ifg_tmplist);
3189 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3190 	struct ifg_group	*ifg;
3191 	struct ifg_req		 ifgrq, *ifgp;
3192 	int			 len, error = 0;
3193 
3194 	if (ifgr->ifgr_len == 0) {
3195 		NET_LOCK_SHARED();
3196 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3197 			ifgr->ifgr_len += sizeof(ifgrq);
3198 		NET_UNLOCK_SHARED();
3199 		return (0);
3200 	}
3201 
3202 	len = ifgr->ifgr_len;
3203 	ifgp = ifgr->ifgr_groups;
3204 
3205 	rw_enter_write(&if_tmplist_lock);
3206 
3207 	NET_LOCK_SHARED();
3208 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3209 		ifgroup_icref(ifg);
3210 		TAILQ_INSERT_TAIL(&ifg_tmplist, ifg, ifg_tmplist);
3211 	}
3212 	NET_UNLOCK_SHARED();
3213 
3214 	TAILQ_FOREACH(ifg, &ifg_tmplist, ifg_tmplist) {
3215 		if (len < sizeof(ifgrq)) {
3216 			error = EINVAL;
3217 			break;
3218 		}
3219 		bzero(&ifgrq, sizeof ifgrq);
3220 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3221 		    sizeof(ifgrq.ifgrq_group));
3222 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3223 		    sizeof(struct ifg_req))))
3224 			break;
3225 		len -= sizeof(ifgrq);
3226 		ifgp++;
3227 	}
3228 
3229 	while ((ifg = TAILQ_FIRST(&ifg_tmplist))){
3230 		TAILQ_REMOVE(&ifg_tmplist, ifg, ifg_tmplist);
3231 		ifgroup_icrele(ifg);
3232 	}
3233 
3234 	rw_exit_write(&if_tmplist_lock);
3235 
3236 	return (error);
3237 }
3238 
3239 void
if_group_routechange(const struct sockaddr * dst,const struct sockaddr * mask)3240 if_group_routechange(const struct sockaddr *dst, const struct sockaddr *mask)
3241 {
3242 	switch (dst->sa_family) {
3243 	case AF_INET:
3244 		if (satosin_const(dst)->sin_addr.s_addr == INADDR_ANY &&
3245 		    mask && (mask->sa_len == 0 ||
3246 		    satosin_const(mask)->sin_addr.s_addr == INADDR_ANY))
3247 			if_group_egress_build();
3248 		break;
3249 #ifdef INET6
3250 	case AF_INET6:
3251 		if (IN6_ARE_ADDR_EQUAL(&(satosin6_const(dst))->sin6_addr,
3252 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
3253 		    IN6_ARE_ADDR_EQUAL(&(satosin6_const(mask))->sin6_addr,
3254 		    &in6addr_any)))
3255 			if_group_egress_build();
3256 		break;
3257 #endif
3258 	}
3259 }
3260 
3261 int
if_group_egress_build(void)3262 if_group_egress_build(void)
3263 {
3264 	struct ifnet		*ifp;
3265 	struct ifg_group	*ifg;
3266 	struct ifg_member	*ifgm, *next;
3267 	struct sockaddr_in	 sa_in;
3268 #ifdef INET6
3269 	struct sockaddr_in6	 sa_in6;
3270 #endif
3271 	struct rtentry		*rt;
3272 
3273 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3274 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3275 			break;
3276 
3277 	if (ifg != NULL)
3278 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3279 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3280 
3281 	bzero(&sa_in, sizeof(sa_in));
3282 	sa_in.sin_len = sizeof(sa_in);
3283 	sa_in.sin_family = AF_INET;
3284 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3285 	while (rt != NULL) {
3286 		ifp = if_get(rt->rt_ifidx);
3287 		if (ifp != NULL) {
3288 			if_addgroup(ifp, IFG_EGRESS);
3289 			if_put(ifp);
3290 		}
3291 		rt = rtable_iterate(rt);
3292 	}
3293 
3294 #ifdef INET6
3295 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3296 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3297 	    RTP_ANY);
3298 	while (rt != NULL) {
3299 		ifp = if_get(rt->rt_ifidx);
3300 		if (ifp != NULL) {
3301 			if_addgroup(ifp, IFG_EGRESS);
3302 			if_put(ifp);
3303 		}
3304 		rt = rtable_iterate(rt);
3305 	}
3306 #endif /* INET6 */
3307 
3308 	return (0);
3309 }
3310 
3311 /*
3312  * Set/clear promiscuous mode on interface ifp based on the truth value
3313  * of pswitch.  The calls are reference counted so that only the first
3314  * "on" request actually has an effect, as does the final "off" request.
3315  * Results are undefined if the "off" and "on" requests are not matched.
3316  */
3317 int
ifpromisc(struct ifnet * ifp,int pswitch)3318 ifpromisc(struct ifnet *ifp, int pswitch)
3319 {
3320 	struct ifreq ifr;
3321 	unsigned short oif_flags;
3322 	int oif_pcount, error;
3323 
3324 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3325 
3326 	oif_flags = ifp->if_flags;
3327 	oif_pcount = ifp->if_pcount;
3328 	if (pswitch) {
3329 		if (ifp->if_pcount++ != 0)
3330 			return (0);
3331 		ifp->if_flags |= IFF_PROMISC;
3332 	} else {
3333 		if (--ifp->if_pcount > 0)
3334 			return (0);
3335 		ifp->if_flags &= ~IFF_PROMISC;
3336 	}
3337 
3338 	if ((ifp->if_flags & IFF_UP) == 0)
3339 		return (0);
3340 
3341 	memset(&ifr, 0, sizeof(ifr));
3342 	ifr.ifr_flags = ifp->if_flags;
3343 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3344 	if (error) {
3345 		ifp->if_flags = oif_flags;
3346 		ifp->if_pcount = oif_pcount;
3347 	}
3348 
3349 	return (error);
3350 }
3351 
3352 /* Set/clear LRO flag and restart interface if needed. */
3353 int
ifsetlro(struct ifnet * ifp,int on)3354 ifsetlro(struct ifnet *ifp, int on)
3355 {
3356 	struct ifreq ifrq;
3357 	int error = 0;
3358 	int s = splnet();
3359 	struct if_parent parent;
3360 
3361 	memset(&parent, 0, sizeof(parent));
3362 	if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) {
3363 		struct ifnet *ifp0 = if_unit(parent.ifp_parent);
3364 
3365 		if (ifp0 != NULL) {
3366 			ifsetlro(ifp0, on);
3367 			if_put(ifp0);
3368 		}
3369 	}
3370 
3371 	if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3372 		error = ENOTSUP;
3373 		goto out;
3374 	}
3375 
3376 	NET_ASSERT_LOCKED();	/* for ioctl */
3377 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3378 
3379 	if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3380 		if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) {
3381 			error = EBUSY;
3382 			goto out;
3383 		}
3384 		SET(ifp->if_xflags, IFXF_LRO);
3385 	} else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3386 		CLR(ifp->if_xflags, IFXF_LRO);
3387 	else
3388 		goto out;
3389 
3390 	/* restart interface */
3391 	if (ISSET(ifp->if_flags, IFF_UP)) {
3392 		/* go down for a moment... */
3393 		CLR(ifp->if_flags, IFF_UP);
3394 		ifrq.ifr_flags = ifp->if_flags;
3395 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3396 
3397 		/* ... and up again */
3398 		SET(ifp->if_flags, IFF_UP);
3399 		ifrq.ifr_flags = ifp->if_flags;
3400 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3401 	}
3402  out:
3403 	splx(s);
3404 
3405 	return error;
3406 }
3407 
3408 void
ifa_add(struct ifnet * ifp,struct ifaddr * ifa)3409 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3410 {
3411 	NET_ASSERT_LOCKED_EXCLUSIVE();
3412 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3413 }
3414 
3415 void
ifa_del(struct ifnet * ifp,struct ifaddr * ifa)3416 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3417 {
3418 	NET_ASSERT_LOCKED_EXCLUSIVE();
3419 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3420 }
3421 
3422 void
ifa_update_broadaddr(struct ifnet * ifp,struct ifaddr * ifa,struct sockaddr * sa)3423 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3424 {
3425 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3426 		panic("ifa_update_broadaddr does not support dynamic length");
3427 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3428 }
3429 
3430 #ifdef DDB
3431 /* debug function, can be called from ddb> */
3432 void
ifa_print_all(void)3433 ifa_print_all(void)
3434 {
3435 	struct ifnet *ifp;
3436 	struct ifaddr *ifa;
3437 
3438 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3439 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3440 			char addr[INET6_ADDRSTRLEN];
3441 
3442 			switch (ifa->ifa_addr->sa_family) {
3443 			case AF_INET:
3444 				printf("%s", inet_ntop(AF_INET,
3445 				    &satosin(ifa->ifa_addr)->sin_addr,
3446 				    addr, sizeof(addr)));
3447 				break;
3448 #ifdef INET6
3449 			case AF_INET6:
3450 				printf("%s", inet_ntop(AF_INET6,
3451 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3452 				    addr, sizeof(addr)));
3453 				break;
3454 #endif
3455 			}
3456 			printf(" on %s\n", ifp->if_xname);
3457 		}
3458 	}
3459 }
3460 #endif /* DDB */
3461 
3462 void
ifnewlladdr(struct ifnet * ifp)3463 ifnewlladdr(struct ifnet *ifp)
3464 {
3465 #ifdef INET6
3466 	struct ifaddr *ifa;
3467 	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
3468 #endif
3469 	struct ifreq ifrq;
3470 	short up;
3471 
3472 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3473 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3474 
3475 	up = ifp->if_flags & IFF_UP;
3476 
3477 	if (up) {
3478 		/* go down for a moment... */
3479 		ifp->if_flags &= ~IFF_UP;
3480 		ifrq.ifr_flags = ifp->if_flags;
3481 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3482 	}
3483 
3484 	ifp->if_flags |= IFF_UP;
3485 	ifrq.ifr_flags = ifp->if_flags;
3486 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3487 
3488 #ifdef INET6
3489 	/*
3490 	 * Update the link-local address.  Don't do it if we're
3491 	 * a router to avoid confusing hosts on the network.
3492 	 */
3493 	if (!i_am_router) {
3494 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3495 		if (ifa) {
3496 			in6_purgeaddr(ifa);
3497 			if_hooks_run(&ifp->if_addrhooks);
3498 			in6_ifattach(ifp);
3499 		}
3500 	}
3501 #endif
3502 	if (!up) {
3503 		/* go back down */
3504 		ifp->if_flags &= ~IFF_UP;
3505 		ifrq.ifr_flags = ifp->if_flags;
3506 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3507 	}
3508 }
3509 
3510 void
if_addrhook_add(struct ifnet * ifp,struct task * t)3511 if_addrhook_add(struct ifnet *ifp, struct task *t)
3512 {
3513 	mtx_enter(&if_hooks_mtx);
3514 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3515 	mtx_leave(&if_hooks_mtx);
3516 }
3517 
3518 void
if_addrhook_del(struct ifnet * ifp,struct task * t)3519 if_addrhook_del(struct ifnet *ifp, struct task *t)
3520 {
3521 	mtx_enter(&if_hooks_mtx);
3522 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3523 	mtx_leave(&if_hooks_mtx);
3524 }
3525 
3526 void
if_addrhooks_run(struct ifnet * ifp)3527 if_addrhooks_run(struct ifnet *ifp)
3528 {
3529 	if_hooks_run(&ifp->if_addrhooks);
3530 }
3531 
3532 void
if_rxr_init(struct if_rxring * rxr,u_int lwm,u_int hwm)3533 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3534 {
3535 	extern int ticks;
3536 
3537 	memset(rxr, 0, sizeof(*rxr));
3538 
3539 	rxr->rxr_adjusted = ticks;
3540 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3541 	rxr->rxr_hwm = hwm;
3542 }
3543 
3544 static inline void
if_rxr_adjust_cwm(struct if_rxring * rxr)3545 if_rxr_adjust_cwm(struct if_rxring *rxr)
3546 {
3547 	extern int ticks;
3548 
3549 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3550 		return;
3551 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3552 		rxr->rxr_cwm++;
3553 
3554 	rxr->rxr_adjusted = ticks;
3555 }
3556 
3557 void
if_rxr_livelocked(struct if_rxring * rxr)3558 if_rxr_livelocked(struct if_rxring *rxr)
3559 {
3560 	extern int ticks;
3561 
3562 	if (ticks - rxr->rxr_adjusted >= 1) {
3563 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3564 			rxr->rxr_cwm--;
3565 
3566 		rxr->rxr_adjusted = ticks;
3567 	}
3568 }
3569 
3570 u_int
if_rxr_get(struct if_rxring * rxr,u_int max)3571 if_rxr_get(struct if_rxring *rxr, u_int max)
3572 {
3573 	extern int ticks;
3574 	u_int diff;
3575 
3576 	if (ticks - rxr->rxr_adjusted >= 1) {
3577 		/* we're free to try for an adjustment */
3578 		if_rxr_adjust_cwm(rxr);
3579 	}
3580 
3581 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3582 		return (0);
3583 
3584 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3585 	rxr->rxr_alive += diff;
3586 
3587 	return (diff);
3588 }
3589 
3590 int
if_rxr_info_ioctl(struct if_rxrinfo * uifri,u_int t,struct if_rxring_info * e)3591 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3592 {
3593 	struct if_rxrinfo kifri;
3594 	int error;
3595 	u_int n;
3596 
3597 	error = copyin(uifri, &kifri, sizeof(kifri));
3598 	if (error)
3599 		return (error);
3600 
3601 	n = min(t, kifri.ifri_total);
3602 	kifri.ifri_total = t;
3603 
3604 	if (n > 0) {
3605 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3606 		if (error)
3607 			return (error);
3608 	}
3609 
3610 	return (copyout(&kifri, uifri, sizeof(kifri)));
3611 }
3612 
3613 int
if_rxr_ioctl(struct if_rxrinfo * ifri,const char * name,u_int size,struct if_rxring * rxr)3614 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3615     struct if_rxring *rxr)
3616 {
3617 	struct if_rxring_info ifr;
3618 
3619 	memset(&ifr, 0, sizeof(ifr));
3620 
3621 	if (name != NULL)
3622 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3623 
3624 	ifr.ifr_size = size;
3625 	ifr.ifr_info = *rxr;
3626 
3627 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3628 }
3629 
3630 /*
3631  * Network stack input queues.
3632  */
3633 
3634 void
niq_init(struct niqueue * niq,u_int maxlen,u_int isr)3635 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3636 {
3637 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3638 	niq->ni_isr = isr;
3639 }
3640 
3641 int
niq_enqueue(struct niqueue * niq,struct mbuf * m)3642 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3643 {
3644 	int rv;
3645 
3646 	rv = mq_enqueue(&niq->ni_q, m);
3647 	if (rv == 0)
3648 		schednetisr(niq->ni_isr);
3649 	else
3650 		if_congestion();
3651 
3652 	return (rv);
3653 }
3654 
3655 int
niq_enlist(struct niqueue * niq,struct mbuf_list * ml)3656 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3657 {
3658 	int rv;
3659 
3660 	rv = mq_enlist(&niq->ni_q, ml);
3661 	if (rv == 0)
3662 		schednetisr(niq->ni_isr);
3663 	else
3664 		if_congestion();
3665 
3666 	return (rv);
3667 }
3668 
3669 __dead void
unhandled_af(int af)3670 unhandled_af(int af)
3671 {
3672 	panic("unhandled af %d", af);
3673 }
3674 
3675 struct taskq *
net_tq(unsigned int ifindex)3676 net_tq(unsigned int ifindex)
3677 {
3678 	struct softnet *sn;
3679 	static int nettaskqs;
3680 
3681 	if (nettaskqs == 0)
3682 		nettaskqs = min(NET_TASKQ, ncpus);
3683 
3684 	sn = &softnets[ifindex % nettaskqs];
3685 
3686 	return (sn->sn_taskq);
3687 }
3688 
3689 void
net_tq_barriers(const char * wmesg)3690 net_tq_barriers(const char *wmesg)
3691 {
3692 	struct task barriers[NET_TASKQ];
3693 	struct refcnt r = REFCNT_INITIALIZER();
3694 	int i;
3695 
3696 	for (i = 0; i < nitems(barriers); i++) {
3697 		task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
3698 		refcnt_take(&r);
3699 		task_add(softnets[i].sn_taskq, &barriers[i]);
3700 	}
3701 
3702 	refcnt_finalize(&r, wmesg);
3703 }
3704