1 /* $OpenBSD: if.c,v 1.725 2025/01/25 10:53:36 mvs Exp $ */
2 /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1980, 1986, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)if.c 8.3 (Berkeley) 1/4/94
62 */
63
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "ppp.h"
70 #include "pppoe.h"
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/timeout.h>
78 #include <sys/protosw.h>
79 #include <sys/kernel.h>
80 #include <sys/ioctl.h>
81 #include <sys/domain.h>
82 #include <sys/task.h>
83 #include <sys/atomic.h>
84 #include <sys/percpu.h>
85 #include <sys/proc.h>
86 #include <sys/stdint.h> /* uintptr_t */
87 #include <sys/rwlock.h>
88 #include <sys/smr.h>
89
90 #include <net/if.h>
91 #include <net/if_dl.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/netisr.h>
95
96 #include "vlan.h"
97 #if NVLAN > 0
98 #include <net/if_vlan_var.h>
99 #endif
100
101 #include <netinet/in.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/igmp.h>
104 #ifdef MROUTING
105 #include <netinet/ip_mroute.h>
106 #endif
107 #include <netinet/tcp.h>
108 #include <netinet/tcp_timer.h>
109 #include <netinet/tcp_var.h>
110
111 #ifdef INET6
112 #include <netinet6/in6_var.h>
113 #include <netinet6/in6_ifattach.h>
114 #include <netinet6/nd6.h>
115 #include <netinet/ip6.h>
116 #include <netinet6/ip6_var.h>
117 #endif
118
119 #ifdef MPLS
120 #include <netmpls/mpls.h>
121 #endif
122
123 #if NBPFILTER > 0
124 #include <net/bpf.h>
125 #endif
126
127 #if NBRIDGE > 0
128 #include <net/if_bridge.h>
129 #endif
130
131 #if NCARP > 0
132 #include <netinet/ip_carp.h>
133 #endif
134
135 #if NPF > 0
136 #include <net/pfvar.h>
137 #endif
138
139 #include <sys/device.h>
140
141 void if_attachsetup(struct ifnet *);
142 void if_attach_common(struct ifnet *);
143 void if_remove(struct ifnet *);
144 int if_createrdomain(int, struct ifnet *);
145 int if_setrdomain(struct ifnet *, int);
146 void if_slowtimo(void *);
147
148 void if_detached_qstart(struct ifqueue *);
149 int if_detached_ioctl(struct ifnet *, u_long, caddr_t);
150
151 int ifioctl_get(u_long, caddr_t);
152 int ifconf(caddr_t);
153 static int
154 if_sffpage_check(const caddr_t);
155
156 int if_getgroup(caddr_t, struct ifnet *);
157 int if_getgroupmembers(caddr_t);
158 int if_getgroupattribs(caddr_t);
159 int if_setgroupattribs(caddr_t);
160 int if_getgrouplist(caddr_t);
161
162 void if_linkstate(struct ifnet *);
163 void if_linkstate_task(void *);
164
165 int if_clone_list(struct if_clonereq *);
166 struct if_clone *if_clone_lookup(const char *, int *);
167
168 int if_group_egress_build(void);
169
170 void if_watchdog_task(void *);
171
172 void if_netisr(void *);
173
174 #ifdef DDB
175 void ifa_print_all(void);
176 #endif
177
178 void if_qstart_compat(struct ifqueue *);
179
180 /*
181 * interface index map
182 *
183 * the kernel maintains a mapping of interface indexes to struct ifnet
184 * pointers.
185 *
186 * the map is an array of struct ifnet pointers prefixed by an if_map
187 * structure. the if_map structure stores the length of its array.
188 *
189 * as interfaces are attached to the system, the map is grown on demand
190 * up to USHRT_MAX entries.
191 *
192 * interface index 0 is reserved and represents no interface. this
193 * supports the use of the interface index as the scope for IPv6 link
194 * local addresses, where scope 0 means no scope has been specified.
195 * it also supports the use of interface index as the unique identifier
196 * for network interfaces in SNMP applications as per RFC2863. therefore
197 * if_get(0) returns NULL.
198 */
199
200 struct ifnet *if_ref(struct ifnet *);
201
202 /*
203 * struct if_idxmap
204 *
205 * infrastructure to manage updates and accesses to the current if_map.
206 *
207 * interface index 0 is special and represents "no interface", so we
208 * use the 0th slot in map to store the length of the array.
209 */
210
211 struct if_idxmap {
212 unsigned int serial;
213 unsigned int count;
214 struct ifnet **map; /* SMR protected */
215 struct rwlock lock;
216 unsigned char *usedidx; /* bitmap of indices in use */
217 };
218
219 struct if_idxmap_dtor {
220 struct smr_entry smr;
221 struct ifnet **map;
222 };
223
224 void if_idxmap_init(unsigned int);
225 void if_idxmap_free(void *);
226 void if_idxmap_alloc(struct ifnet *);
227 void if_idxmap_insert(struct ifnet *);
228 void if_idxmap_remove(struct ifnet *);
229
230 TAILQ_HEAD(, ifg_group) ifg_head =
231 TAILQ_HEAD_INITIALIZER(ifg_head); /* [N] list of interface groups */
232
233 LIST_HEAD(, if_clone) if_cloners =
234 LIST_HEAD_INITIALIZER(if_cloners); /* [I] list of clonable interfaces */
235 int if_cloners_count; /* [I] number of clonable interfaces */
236
237 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
238 struct rwlock if_tmplist_lock = RWLOCK_INITIALIZER("iftmplk");
239
240 /* hooks should only be added, deleted, and run from a process context */
241 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
242 void if_hooks_run(struct task_list *);
243
244 int ifq_congestion;
245
246 int netisr;
247
248 struct softnet {
249 char sn_name[16];
250 struct taskq *sn_taskq;
251 };
252
253 #define NET_TASKQ 4
254 struct softnet softnets[NET_TASKQ];
255
256 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
257
258 /*
259 * Serialize socket operations to ensure no new sleeping points
260 * are introduced in IP output paths.
261 */
262 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
263
264 /*
265 * Network interface utility routines.
266 */
267 void
ifinit(void)268 ifinit(void)
269 {
270 unsigned int i;
271
272 /*
273 * most machines boot with 4 or 5 interfaces, so size the initial map
274 * to accommodate this
275 */
276 if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
277
278 for (i = 0; i < NET_TASKQ; i++) {
279 struct softnet *sn = &softnets[i];
280 snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
281 sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
282 TASKQ_MPSAFE);
283 if (sn->sn_taskq == NULL)
284 panic("unable to create network taskq %d", i);
285 }
286 }
287
288 static struct if_idxmap if_idxmap;
289
290 /*
291 * XXXSMP: For `ifnetlist' modification both kernel and net locks
292 * should be taken. For read-only access only one lock of them required.
293 */
294 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
295
296 static inline unsigned int
if_idxmap_limit(struct ifnet ** if_map)297 if_idxmap_limit(struct ifnet **if_map)
298 {
299 return ((uintptr_t)if_map[0]);
300 }
301
302 static inline size_t
if_idxmap_usedidx_size(unsigned int limit)303 if_idxmap_usedidx_size(unsigned int limit)
304 {
305 return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
306 }
307
308 void
if_idxmap_init(unsigned int limit)309 if_idxmap_init(unsigned int limit)
310 {
311 struct ifnet **if_map;
312
313 rw_init(&if_idxmap.lock, "idxmaplk");
314 if_idxmap.serial = 1; /* skip ifidx 0 */
315
316 if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
317 M_WAITOK | M_ZERO);
318
319 if_map[0] = (struct ifnet *)(uintptr_t)limit;
320
321 if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
322 M_IFADDR, M_WAITOK | M_ZERO);
323 setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
324
325 /* this is called early so there's nothing to race with */
326 SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
327 }
328
329 void
if_idxmap_alloc(struct ifnet * ifp)330 if_idxmap_alloc(struct ifnet *ifp)
331 {
332 struct ifnet **if_map;
333 unsigned int limit;
334 unsigned int index, i;
335
336 refcnt_init(&ifp->if_refcnt);
337
338 rw_enter_write(&if_idxmap.lock);
339
340 if (++if_idxmap.count >= USHRT_MAX)
341 panic("too many interfaces");
342
343 if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
344 limit = if_idxmap_limit(if_map);
345
346 index = if_idxmap.serial++ & USHRT_MAX;
347
348 if (index >= limit) {
349 struct if_idxmap_dtor *dtor;
350 struct ifnet **oif_map;
351 unsigned int olimit;
352 unsigned char *nusedidx;
353
354 oif_map = if_map;
355 olimit = limit;
356
357 limit = olimit * 2;
358 if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
359 M_WAITOK | M_ZERO);
360 if_map[0] = (struct ifnet *)(uintptr_t)limit;
361
362 for (i = 1; i < olimit; i++) {
363 struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
364 if (oifp == NULL)
365 continue;
366
367 /*
368 * nif_map isn't visible yet, so don't need
369 * SMR_PTR_SET_LOCKED and its membar.
370 */
371 if_map[i] = if_ref(oifp);
372 }
373
374 nusedidx = malloc(if_idxmap_usedidx_size(limit),
375 M_IFADDR, M_WAITOK | M_ZERO);
376 memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
377
378 /* use the old usedidx bitmap as an smr_entry for the if_map */
379 dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
380 if_idxmap.usedidx = nusedidx;
381
382 SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
383
384 dtor->map = oif_map;
385 smr_init(&dtor->smr);
386 smr_call(&dtor->smr, if_idxmap_free, dtor);
387 }
388
389 /* pick the next free index */
390 for (i = 0; i < USHRT_MAX; i++) {
391 if (index != 0 && isclr(if_idxmap.usedidx, index))
392 break;
393
394 index = if_idxmap.serial++ & USHRT_MAX;
395 }
396 KASSERT(index != 0 && index < limit);
397 KASSERT(isclr(if_idxmap.usedidx, index));
398
399 setbit(if_idxmap.usedidx, index);
400 ifp->if_index = index;
401
402 rw_exit_write(&if_idxmap.lock);
403 }
404
405 void
if_idxmap_free(void * arg)406 if_idxmap_free(void *arg)
407 {
408 struct if_idxmap_dtor *dtor = arg;
409 struct ifnet **oif_map = dtor->map;
410 unsigned int olimit = if_idxmap_limit(oif_map);
411 unsigned int i;
412
413 for (i = 1; i < olimit; i++)
414 if_put(oif_map[i]);
415
416 free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
417 free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
418 }
419
420 void
if_idxmap_insert(struct ifnet * ifp)421 if_idxmap_insert(struct ifnet *ifp)
422 {
423 struct ifnet **if_map;
424 unsigned int index = ifp->if_index;
425
426 rw_enter_write(&if_idxmap.lock);
427
428 if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
429
430 KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
431 "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
432 if_idxmap_limit(if_map));
433 KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
434 KASSERT(isset(if_idxmap.usedidx, index));
435
436 /* commit */
437 SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
438
439 rw_exit_write(&if_idxmap.lock);
440 }
441
442 void
if_idxmap_remove(struct ifnet * ifp)443 if_idxmap_remove(struct ifnet *ifp)
444 {
445 struct ifnet **if_map;
446 unsigned int index = ifp->if_index;
447
448 rw_enter_write(&if_idxmap.lock);
449
450 if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
451
452 KASSERT(index != 0 && index < if_idxmap_limit(if_map));
453 KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
454 KASSERT(isset(if_idxmap.usedidx, index));
455
456 SMR_PTR_SET_LOCKED(&if_map[index], NULL);
457
458 if_idxmap.count--;
459 clrbit(if_idxmap.usedidx, index);
460 /* end of if_idxmap modifications */
461
462 rw_exit_write(&if_idxmap.lock);
463
464 smr_barrier();
465 if_put(ifp);
466 }
467
468 /*
469 * Attach an interface to the
470 * list of "active" interfaces.
471 */
472 void
if_attachsetup(struct ifnet * ifp)473 if_attachsetup(struct ifnet *ifp)
474 {
475 unsigned long ifidx;
476
477 NET_ASSERT_LOCKED();
478
479 if_addgroup(ifp, IFG_ALL);
480
481 #ifdef INET6
482 nd6_ifattach(ifp);
483 #endif
484
485 #if NPF > 0
486 pfi_attach_ifnet(ifp);
487 #endif
488
489 timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
490 if_slowtimo(ifp);
491
492 if_idxmap_insert(ifp);
493 KASSERT(if_get(0) == NULL);
494
495 ifidx = ifp->if_index;
496
497 task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
498 task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
499
500 /* Announce the interface. */
501 rtm_ifannounce(ifp, IFAN_ARRIVAL);
502 }
503
504 /*
505 * Allocate the link level name for the specified interface. This
506 * is an attachment helper. It must be called after ifp->if_addrlen
507 * is initialized, which may not be the case when if_attach() is
508 * called.
509 */
510 void
if_alloc_sadl(struct ifnet * ifp)511 if_alloc_sadl(struct ifnet *ifp)
512 {
513 unsigned int socksize;
514 int namelen, masklen;
515 struct sockaddr_dl *sdl;
516
517 /*
518 * If the interface already has a link name, release it
519 * now. This is useful for interfaces that can change
520 * link types, and thus switch link names often.
521 */
522 if_free_sadl(ifp);
523
524 namelen = strlen(ifp->if_xname);
525 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
526 socksize = masklen + ifp->if_addrlen;
527 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
528 if (socksize < sizeof(*sdl))
529 socksize = sizeof(*sdl);
530 socksize = ROUNDUP(socksize);
531 sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
532 sdl->sdl_len = socksize;
533 sdl->sdl_family = AF_LINK;
534 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
535 sdl->sdl_nlen = namelen;
536 sdl->sdl_alen = ifp->if_addrlen;
537 sdl->sdl_index = ifp->if_index;
538 sdl->sdl_type = ifp->if_type;
539 ifp->if_sadl = sdl;
540 }
541
542 /*
543 * Free the link level name for the specified interface. This is
544 * a detach helper. This is called from if_detach() or from
545 * link layer type specific detach functions.
546 */
547 void
if_free_sadl(struct ifnet * ifp)548 if_free_sadl(struct ifnet *ifp)
549 {
550 if (ifp->if_sadl == NULL)
551 return;
552
553 free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
554 ifp->if_sadl = NULL;
555 }
556
557 void
if_attachhead(struct ifnet * ifp)558 if_attachhead(struct ifnet *ifp)
559 {
560 if_attach_common(ifp);
561 NET_LOCK();
562 TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
563 if_attachsetup(ifp);
564 NET_UNLOCK();
565 }
566
567 void
if_attach(struct ifnet * ifp)568 if_attach(struct ifnet *ifp)
569 {
570 if_attach_common(ifp);
571 NET_LOCK();
572 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
573 if_attachsetup(ifp);
574 NET_UNLOCK();
575 }
576
577 void
if_attach_queues(struct ifnet * ifp,unsigned int nqs)578 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
579 {
580 struct ifqueue **map;
581 struct ifqueue *ifq;
582 int i;
583
584 KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
585 KASSERT(nqs != 0);
586
587 map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
588
589 ifp->if_snd.ifq_softc = NULL;
590 map[0] = &ifp->if_snd;
591
592 for (i = 1; i < nqs; i++) {
593 ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
594 ifq_init_maxlen(ifq, ifp->if_snd.ifq_maxlen);
595 ifq_init(ifq, ifp, i);
596 map[i] = ifq;
597 }
598
599 ifp->if_ifqs = map;
600 ifp->if_nifqs = nqs;
601 }
602
603 void
if_attach_iqueues(struct ifnet * ifp,unsigned int niqs)604 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
605 {
606 struct ifiqueue **map;
607 struct ifiqueue *ifiq;
608 unsigned int i;
609
610 KASSERT(niqs != 0);
611
612 map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
613
614 ifp->if_rcv.ifiq_softc = NULL;
615 map[0] = &ifp->if_rcv;
616
617 for (i = 1; i < niqs; i++) {
618 ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
619 ifiq_init(ifiq, ifp, i);
620 map[i] = ifiq;
621 }
622
623 ifp->if_iqs = map;
624 ifp->if_niqs = niqs;
625 }
626
627 void
if_attach_common(struct ifnet * ifp)628 if_attach_common(struct ifnet *ifp)
629 {
630 KASSERT(ifp->if_ioctl != NULL);
631
632 TAILQ_INIT(&ifp->if_addrlist);
633 TAILQ_INIT(&ifp->if_maddrlist);
634 TAILQ_INIT(&ifp->if_groups);
635
636 if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
637 KASSERTMSG(ifp->if_qstart == NULL,
638 "%s: if_qstart set without MPSAFE set", ifp->if_xname);
639 ifp->if_qstart = if_qstart_compat;
640 } else {
641 KASSERTMSG(ifp->if_start == NULL,
642 "%s: if_start set with MPSAFE set", ifp->if_xname);
643 KASSERTMSG(ifp->if_qstart != NULL,
644 "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
645 }
646
647 if_idxmap_alloc(ifp);
648
649 ifq_init(&ifp->if_snd, ifp, 0);
650
651 ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
652 ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
653 ifp->if_nifqs = 1;
654 if (ifp->if_txmit == 0)
655 ifp->if_txmit = IF_TXMIT_DEFAULT;
656
657 ifiq_init(&ifp->if_rcv, ifp, 0);
658
659 ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
660 ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
661 ifp->if_niqs = 1;
662
663 TAILQ_INIT(&ifp->if_addrhooks);
664 TAILQ_INIT(&ifp->if_linkstatehooks);
665 TAILQ_INIT(&ifp->if_detachhooks);
666
667 if (ifp->if_rtrequest == NULL)
668 ifp->if_rtrequest = if_rtrequest_dummy;
669 if (ifp->if_enqueue == NULL)
670 ifp->if_enqueue = if_enqueue_ifq;
671 #if NBPFILTER > 0
672 if (ifp->if_bpf_mtap == NULL)
673 ifp->if_bpf_mtap = bpf_mtap_ether;
674 #endif
675 ifp->if_llprio = IFQ_DEFPRIO;
676 }
677
678 void
if_attach_ifq(struct ifnet * ifp,const struct ifq_ops * newops,void * args)679 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
680 {
681 /*
682 * only switch the ifq_ops on the first ifq on an interface.
683 *
684 * the only ifq_ops we provide priq and hfsc, and hfsc only
685 * works on a single ifq. because the code uses the ifq_ops
686 * on the first ifq (if_snd) to select a queue for an mbuf,
687 * by switching only the first one we change both the algorithm
688 * and force the routing of all new packets to it.
689 */
690 ifq_attach(&ifp->if_snd, newops, args);
691 }
692
693 void
if_start(struct ifnet * ifp)694 if_start(struct ifnet *ifp)
695 {
696 KASSERT(ifp->if_qstart == if_qstart_compat);
697 if_qstart_compat(&ifp->if_snd);
698 }
699 void
if_qstart_compat(struct ifqueue * ifq)700 if_qstart_compat(struct ifqueue *ifq)
701 {
702 struct ifnet *ifp = ifq->ifq_if;
703 int s;
704
705 /*
706 * the stack assumes that an interface can have multiple
707 * transmit rings, but a lot of drivers are still written
708 * so that interfaces and send rings have a 1:1 mapping.
709 * this provides compatibility between the stack and the older
710 * drivers by translating from the only queue they have
711 * (ifp->if_snd) back to the interface and calling if_start.
712 */
713
714 KERNEL_LOCK();
715 s = splnet();
716 (*ifp->if_start)(ifp);
717 splx(s);
718 KERNEL_UNLOCK();
719 }
720
721 int
if_enqueue(struct ifnet * ifp,struct mbuf * m)722 if_enqueue(struct ifnet *ifp, struct mbuf *m)
723 {
724 CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
725
726 #if NPF > 0
727 if (m->m_pkthdr.pf.delay > 0)
728 return (pf_delay_pkt(m, ifp->if_index));
729 #endif
730
731 #if NBRIDGE > 0
732 if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
733 int error;
734
735 error = bridge_enqueue(ifp, m);
736 return (error);
737 }
738 #endif
739
740 #if NPF > 0
741 pf_pkt_addr_changed(m);
742 #endif /* NPF > 0 */
743
744 return ((*ifp->if_enqueue)(ifp, m));
745 }
746
747 int
if_enqueue_ifq(struct ifnet * ifp,struct mbuf * m)748 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
749 {
750 struct ifqueue *ifq = &ifp->if_snd;
751 int error;
752
753 if (ifp->if_nifqs > 1) {
754 unsigned int idx;
755
756 /*
757 * use the operations on the first ifq to pick which of
758 * the array gets this mbuf.
759 */
760
761 idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
762 ifq = ifp->if_ifqs[idx];
763 }
764
765 error = ifq_enqueue(ifq, m);
766 if (error)
767 return (error);
768
769 ifq_start(ifq);
770
771 return (0);
772 }
773
774 void
if_input(struct ifnet * ifp,struct mbuf_list * ml)775 if_input(struct ifnet *ifp, struct mbuf_list *ml)
776 {
777 ifiq_input(&ifp->if_rcv, ml);
778 }
779
780 int
if_input_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)781 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
782 {
783 int keepflags, keepcksum;
784 uint16_t keepmss;
785
786 #if NBPFILTER > 0
787 /*
788 * Only send packets to bpf if they are destined to local
789 * addresses.
790 *
791 * if_input_local() is also called for SIMPLEX interfaces to
792 * duplicate packets for local use. But don't dup them to bpf.
793 */
794 if (ifp->if_flags & IFF_LOOPBACK) {
795 caddr_t if_bpf = ifp->if_bpf;
796
797 if (if_bpf)
798 bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
799 }
800 #endif
801 keepflags = m->m_flags & (M_BCAST|M_MCAST);
802 /*
803 * Preserve outgoing checksum flags, in case the packet is
804 * forwarded to another interface. Then the checksum, which
805 * is now incorrect, will be calculated before sending.
806 */
807 keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
808 M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
809 M_TCP_TSO);
810 keepmss = m->m_pkthdr.ph_mss;
811 m_resethdr(m);
812 m->m_flags |= M_LOOP | keepflags;
813 m->m_pkthdr.csum_flags = keepcksum;
814 m->m_pkthdr.ph_mss = keepmss;
815 m->m_pkthdr.ph_ifidx = ifp->if_index;
816 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
817
818 if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
819 if (ifp->if_mtu > 0 &&
820 ((af == AF_INET &&
821 ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
822 (af == AF_INET6 &&
823 ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) {
824 tcpstat_inc(tcps_inswlro);
825 tcpstat_add(tcps_inpktlro,
826 (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
827 } else {
828 tcpstat_inc(tcps_inbadlro);
829 m_freem(m);
830 return (EPROTONOSUPPORT);
831 }
832 }
833
834 if (ISSET(keepcksum, M_TCP_CSUM_OUT))
835 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
836 if (ISSET(keepcksum, M_UDP_CSUM_OUT))
837 m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
838 if (ISSET(keepcksum, M_ICMP_CSUM_OUT))
839 m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK;
840
841 /* do not count multicast loopback and simplex interfaces */
842 if (ISSET(ifp->if_flags, IFF_LOOPBACK)) {
843 counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
844 m->m_pkthdr.len);
845 }
846
847 switch (af) {
848 case AF_INET:
849 if (ISSET(keepcksum, M_IPV4_CSUM_OUT))
850 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
851 ipv4_input(ifp, m);
852 break;
853 #ifdef INET6
854 case AF_INET6:
855 ipv6_input(ifp, m);
856 break;
857 #endif /* INET6 */
858 #ifdef MPLS
859 case AF_MPLS:
860 mpls_input(ifp, m);
861 break;
862 #endif /* MPLS */
863 default:
864 printf("%s: can't handle af%d\n", ifp->if_xname, af);
865 m_freem(m);
866 return (EAFNOSUPPORT);
867 }
868
869 return (0);
870 }
871
872 int
if_output_ml(struct ifnet * ifp,struct mbuf_list * ml,struct sockaddr * dst,struct rtentry * rt)873 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
874 struct sockaddr *dst, struct rtentry *rt)
875 {
876 struct mbuf *m;
877 int error = 0;
878
879 while ((m = ml_dequeue(ml)) != NULL) {
880 error = ifp->if_output(ifp, m, dst, rt);
881 if (error)
882 break;
883 }
884 if (error)
885 ml_purge(ml);
886
887 return error;
888 }
889
890 int
if_output_tso(struct ifnet * ifp,struct mbuf ** mp,struct sockaddr * dst,struct rtentry * rt,u_int mtu)891 if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
892 struct rtentry *rt, u_int mtu)
893 {
894 uint32_t ifcap;
895 int error;
896
897 switch (dst->sa_family) {
898 case AF_INET:
899 ifcap = IFCAP_TSOv4;
900 break;
901 #ifdef INET6
902 case AF_INET6:
903 ifcap = IFCAP_TSOv6;
904 break;
905 #endif
906 default:
907 unhandled_af(dst->sa_family);
908 }
909
910 /*
911 * Try to send with TSO first. When forwarding LRO may set
912 * maximum segment size in mbuf header. Chop TCP segment
913 * even if it would fit interface MTU to preserve maximum
914 * path MTU.
915 */
916 error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
917 if (error || *mp == NULL)
918 return error;
919
920 if ((*mp)->m_pkthdr.len <= mtu) {
921 switch (dst->sa_family) {
922 case AF_INET:
923 in_hdr_cksum_out(*mp, ifp);
924 in_proto_cksum_out(*mp, ifp);
925 break;
926 #ifdef INET6
927 case AF_INET6:
928 in6_proto_cksum_out(*mp, ifp);
929 break;
930 #endif
931 }
932 error = ifp->if_output(ifp, *mp, dst, rt);
933 *mp = NULL;
934 return error;
935 }
936
937 /* mp still contains mbuf that has to be fragmented or dropped. */
938 return 0;
939 }
940
941 int
if_output_mq(struct ifnet * ifp,struct mbuf_queue * mq,unsigned int * total,struct sockaddr * dst,struct rtentry * rt)942 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
943 struct sockaddr *dst, struct rtentry *rt)
944 {
945 struct mbuf_list ml;
946 unsigned int len;
947 int error;
948
949 mq_delist(mq, &ml);
950 len = ml_len(&ml);
951 error = if_output_ml(ifp, &ml, dst, rt);
952
953 /* XXXSMP we also discard if other CPU enqueues */
954 if (mq_len(mq) > 0) {
955 /* mbuf is back in queue. Discard. */
956 atomic_sub_int(total, len + mq_purge(mq));
957 } else
958 atomic_sub_int(total, len);
959
960 return error;
961 }
962
963 int
if_output_local(struct ifnet * ifp,struct mbuf * m,sa_family_t af)964 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
965 {
966 struct ifiqueue *ifiq;
967 unsigned int flow = 0;
968
969 m->m_pkthdr.ph_family = af;
970 m->m_pkthdr.ph_ifidx = ifp->if_index;
971 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
972
973 if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
974 flow = m->m_pkthdr.ph_flowid;
975
976 ifiq = ifp->if_iqs[flow % ifp->if_niqs];
977
978 return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
979 }
980
981 void
if_input_process(struct ifnet * ifp,struct mbuf_list * ml)982 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
983 {
984 struct mbuf *m;
985
986 if (ml_empty(ml))
987 return;
988
989 if (!ISSET(ifp->if_xflags, IFXF_CLONED))
990 enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
991
992 /*
993 * We grab the shared netlock for packet processing in the softnet
994 * threads. Packets can regrab the exclusive lock via queues.
995 * ioctl, sysctl, and socket syscall may use shared lock if access is
996 * read only or MP safe. Usually they hold the exclusive net lock.
997 */
998
999 NET_LOCK_SHARED();
1000 while ((m = ml_dequeue(ml)) != NULL)
1001 (*ifp->if_input)(ifp, m);
1002 NET_UNLOCK_SHARED();
1003 }
1004
1005 void
if_vinput(struct ifnet * ifp,struct mbuf * m)1006 if_vinput(struct ifnet *ifp, struct mbuf *m)
1007 {
1008 #if NBPFILTER > 0
1009 caddr_t if_bpf;
1010 #endif
1011
1012 m->m_pkthdr.ph_ifidx = ifp->if_index;
1013 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1014
1015 counters_pkt(ifp->if_counters,
1016 ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1017
1018 #if NPF > 0
1019 pf_pkt_addr_changed(m);
1020 #endif
1021
1022 #if NBPFILTER > 0
1023 if_bpf = ifp->if_bpf;
1024 if (if_bpf) {
1025 if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
1026 m_freem(m);
1027 return;
1028 }
1029 }
1030 #endif
1031
1032 if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
1033 (*ifp->if_input)(ifp, m);
1034 else
1035 m_freem(m);
1036 }
1037
1038 void
if_netisr(void * unused)1039 if_netisr(void *unused)
1040 {
1041 int n, t = 0;
1042
1043 NET_LOCK();
1044
1045 while ((n = netisr) != 0) {
1046 /* Like sched_pause() but with a rwlock dance. */
1047 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
1048 NET_UNLOCK();
1049 yield();
1050 NET_LOCK();
1051 }
1052
1053 atomic_clearbits_int(&netisr, n);
1054
1055 #if NETHER > 0
1056 if (n & (1 << NETISR_ARP))
1057 arpintr();
1058 #endif
1059 if (n & (1 << NETISR_IP))
1060 ipintr();
1061 #ifdef INET6
1062 if (n & (1 << NETISR_IPV6))
1063 ip6intr();
1064 #endif
1065 #if NPPP > 0
1066 if (n & (1 << NETISR_PPP)) {
1067 KERNEL_LOCK();
1068 pppintr();
1069 KERNEL_UNLOCK();
1070 }
1071 #endif
1072 #if NBRIDGE > 0
1073 if (n & (1 << NETISR_BRIDGE))
1074 bridgeintr();
1075 #endif
1076 #ifdef PIPEX
1077 if (n & (1 << NETISR_PIPEX))
1078 pipexintr();
1079 #endif
1080 #if NPPPOE > 0
1081 if (n & (1 << NETISR_PPPOE)) {
1082 KERNEL_LOCK();
1083 pppoeintr();
1084 KERNEL_UNLOCK();
1085 }
1086 #endif
1087 t |= n;
1088 }
1089
1090 NET_UNLOCK();
1091 }
1092
1093 void
if_hooks_run(struct task_list * hooks)1094 if_hooks_run(struct task_list *hooks)
1095 {
1096 struct task *t, *nt;
1097 struct task cursor = { .t_func = NULL };
1098 void (*func)(void *);
1099 void *arg;
1100
1101 mtx_enter(&if_hooks_mtx);
1102 for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1103 if (t->t_func == NULL) { /* skip cursors */
1104 nt = TAILQ_NEXT(t, t_entry);
1105 continue;
1106 }
1107 func = t->t_func;
1108 arg = t->t_arg;
1109
1110 TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1111 mtx_leave(&if_hooks_mtx);
1112
1113 (*func)(arg);
1114
1115 mtx_enter(&if_hooks_mtx);
1116 nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1117 TAILQ_REMOVE(hooks, &cursor, t_entry);
1118 }
1119 mtx_leave(&if_hooks_mtx);
1120 }
1121
1122 void
if_remove(struct ifnet * ifp)1123 if_remove(struct ifnet *ifp)
1124 {
1125 /* Remove the interface from the list of all interfaces. */
1126 NET_LOCK();
1127 TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1128 NET_UNLOCK();
1129
1130 /* Remove the interface from the interface index map. */
1131 if_idxmap_remove(ifp);
1132
1133 /* Sleep until the last reference is released. */
1134 refcnt_finalize(&ifp->if_refcnt, "ifrm");
1135 }
1136
1137 void
if_deactivate(struct ifnet * ifp)1138 if_deactivate(struct ifnet *ifp)
1139 {
1140 /*
1141 * Call detach hooks from head to tail. To make sure detach
1142 * hooks are executed in the reverse order they were added, all
1143 * the hooks have to be added to the head!
1144 */
1145
1146 NET_LOCK();
1147 if_hooks_run(&ifp->if_detachhooks);
1148 NET_UNLOCK();
1149 }
1150
1151 void
if_detachhook_add(struct ifnet * ifp,struct task * t)1152 if_detachhook_add(struct ifnet *ifp, struct task *t)
1153 {
1154 mtx_enter(&if_hooks_mtx);
1155 TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1156 mtx_leave(&if_hooks_mtx);
1157 }
1158
1159 void
if_detachhook_del(struct ifnet * ifp,struct task * t)1160 if_detachhook_del(struct ifnet *ifp, struct task *t)
1161 {
1162 mtx_enter(&if_hooks_mtx);
1163 TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1164 mtx_leave(&if_hooks_mtx);
1165 }
1166
1167 /*
1168 * Detach an interface from everything in the kernel. Also deallocate
1169 * private resources.
1170 */
1171 void
if_detach(struct ifnet * ifp)1172 if_detach(struct ifnet *ifp)
1173 {
1174 struct ifaddr *ifa;
1175 struct ifg_list *ifg;
1176 int i, s;
1177
1178 /* Undo pseudo-driver changes. */
1179 if_deactivate(ifp);
1180
1181 /* Other CPUs must not have a reference before we start destroying. */
1182 if_remove(ifp);
1183
1184 ifp->if_qstart = if_detached_qstart;
1185
1186 /* Wait until the start routines finished. */
1187 ifq_barrier(&ifp->if_snd);
1188 ifq_clr_oactive(&ifp->if_snd);
1189
1190 #if NBPFILTER > 0
1191 bpfdetach(ifp);
1192 #endif
1193
1194 NET_LOCK();
1195 s = splnet();
1196 ifp->if_ioctl = if_detached_ioctl;
1197 ifp->if_watchdog = NULL;
1198
1199 /* Remove the watchdog timeout & task */
1200 timeout_del(&ifp->if_slowtimo);
1201 task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1202
1203 /* Remove the link state task */
1204 task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1205
1206 rti_delete(ifp);
1207 #if NETHER > 0 && defined(NFSCLIENT)
1208 if (ifp->if_index == revarp_ifidx)
1209 revarp_ifidx = 0;
1210 #endif
1211 #ifdef MROUTING
1212 vif_delete(ifp);
1213 #endif
1214 in_ifdetach(ifp);
1215 #ifdef INET6
1216 in6_ifdetach(ifp);
1217 #endif
1218 #if NPF > 0
1219 pfi_detach_ifnet(ifp);
1220 #endif
1221
1222 while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1223 if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1224
1225 if_free_sadl(ifp);
1226
1227 /* We should not have any address left at this point. */
1228 if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1229 #ifdef DIAGNOSTIC
1230 printf("%s: address list non empty\n", ifp->if_xname);
1231 #endif
1232 while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1233 ifa_del(ifp, ifa);
1234 ifa->ifa_ifp = NULL;
1235 ifafree(ifa);
1236 }
1237 }
1238 splx(s);
1239 NET_UNLOCK();
1240
1241 KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1242 KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1243 KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1244
1245 #ifdef INET6
1246 nd6_ifdetach(ifp);
1247 #endif
1248
1249 /* Announce that the interface is gone. */
1250 rtm_ifannounce(ifp, IFAN_DEPARTURE);
1251
1252 if (ifp->if_counters != NULL)
1253 if_counters_free(ifp);
1254
1255 for (i = 0; i < ifp->if_nifqs; i++)
1256 ifq_destroy(ifp->if_ifqs[i]);
1257 if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1258 for (i = 1; i < ifp->if_nifqs; i++) {
1259 free(ifp->if_ifqs[i], M_DEVBUF,
1260 sizeof(struct ifqueue));
1261 }
1262 free(ifp->if_ifqs, M_DEVBUF,
1263 sizeof(struct ifqueue *) * ifp->if_nifqs);
1264 }
1265
1266 for (i = 0; i < ifp->if_niqs; i++)
1267 ifiq_destroy(ifp->if_iqs[i]);
1268 if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1269 for (i = 1; i < ifp->if_niqs; i++) {
1270 free(ifp->if_iqs[i], M_DEVBUF,
1271 sizeof(struct ifiqueue));
1272 }
1273 free(ifp->if_iqs, M_DEVBUF,
1274 sizeof(struct ifiqueue *) * ifp->if_niqs);
1275 }
1276 }
1277
1278 /*
1279 * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1280 */
1281 int
if_isconnected(const struct ifnet * ifp0,unsigned int ifidx)1282 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1283 {
1284 struct ifnet *ifp;
1285 int connected = 0;
1286
1287 ifp = if_get(ifidx);
1288 if (ifp == NULL)
1289 return (0);
1290
1291 if (ifp0->if_index == ifp->if_index)
1292 connected = 1;
1293
1294 #if NBRIDGE > 0
1295 if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1296 connected = 1;
1297 #endif
1298 #if NCARP > 0
1299 if ((ifp0->if_type == IFT_CARP &&
1300 ifp0->if_carpdevidx == ifp->if_index) ||
1301 (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1302 connected = 1;
1303 #endif
1304
1305 if_put(ifp);
1306 return (connected);
1307 }
1308
1309 /*
1310 * Create a clone network interface.
1311 */
1312 int
if_clone_create(const char * name,int rdomain)1313 if_clone_create(const char *name, int rdomain)
1314 {
1315 struct if_clone *ifc;
1316 struct ifnet *ifp;
1317 int unit, ret;
1318
1319 ifc = if_clone_lookup(name, &unit);
1320 if (ifc == NULL)
1321 return (EINVAL);
1322
1323 rw_enter_write(&if_cloners_lock);
1324
1325 if ((ifp = if_unit(name)) != NULL) {
1326 ret = EEXIST;
1327 goto unlock;
1328 }
1329
1330 ret = (*ifc->ifc_create)(ifc, unit);
1331
1332 if (ret != 0 || (ifp = if_unit(name)) == NULL)
1333 goto unlock;
1334
1335 NET_LOCK();
1336 if_addgroup(ifp, ifc->ifc_name);
1337 if (rdomain != 0)
1338 if_setrdomain(ifp, rdomain);
1339 NET_UNLOCK();
1340 unlock:
1341 rw_exit_write(&if_cloners_lock);
1342 if_put(ifp);
1343
1344 return (ret);
1345 }
1346
1347 /*
1348 * Destroy a clone network interface.
1349 */
1350 int
if_clone_destroy(const char * name)1351 if_clone_destroy(const char *name)
1352 {
1353 struct if_clone *ifc;
1354 struct ifnet *ifp;
1355 int ret;
1356
1357 ifc = if_clone_lookup(name, NULL);
1358 if (ifc == NULL)
1359 return (EINVAL);
1360
1361 if (ifc->ifc_destroy == NULL)
1362 return (EOPNOTSUPP);
1363
1364 rw_enter_write(&if_cloners_lock);
1365
1366 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1367 if (strcmp(ifp->if_xname, name) == 0)
1368 break;
1369 }
1370 if (ifp == NULL) {
1371 rw_exit_write(&if_cloners_lock);
1372 return (ENXIO);
1373 }
1374
1375 NET_LOCK();
1376 if (ifp->if_flags & IFF_UP) {
1377 int s;
1378 s = splnet();
1379 if_down(ifp);
1380 splx(s);
1381 }
1382 NET_UNLOCK();
1383 ret = (*ifc->ifc_destroy)(ifp);
1384
1385 rw_exit_write(&if_cloners_lock);
1386
1387 return (ret);
1388 }
1389
1390 /*
1391 * Look up a network interface cloner.
1392 */
1393 struct if_clone *
if_clone_lookup(const char * name,int * unitp)1394 if_clone_lookup(const char *name, int *unitp)
1395 {
1396 struct if_clone *ifc;
1397 const char *cp;
1398 int unit;
1399
1400 /* separate interface name from unit */
1401 for (cp = name;
1402 cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1403 cp++)
1404 continue;
1405
1406 if (cp == name || cp - name == IFNAMSIZ || !*cp)
1407 return (NULL); /* No name or unit number */
1408
1409 if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1410 return (NULL); /* unit number 0 padded */
1411
1412 LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1413 if (strlen(ifc->ifc_name) == cp - name &&
1414 !strncmp(name, ifc->ifc_name, cp - name))
1415 break;
1416 }
1417
1418 if (ifc == NULL)
1419 return (NULL);
1420
1421 unit = 0;
1422 while (cp - name < IFNAMSIZ && *cp) {
1423 if (*cp < '0' || *cp > '9' ||
1424 unit > (INT_MAX - (*cp - '0')) / 10) {
1425 /* Bogus unit number. */
1426 return (NULL);
1427 }
1428 unit = (unit * 10) + (*cp++ - '0');
1429 }
1430
1431 if (unitp != NULL)
1432 *unitp = unit;
1433 return (ifc);
1434 }
1435
1436 /*
1437 * Register a network interface cloner.
1438 */
1439 void
if_clone_attach(struct if_clone * ifc)1440 if_clone_attach(struct if_clone *ifc)
1441 {
1442 /*
1443 * we are called at kernel boot by main(), when pseudo devices are
1444 * being attached. The main() is the only guy which may alter the
1445 * if_cloners. While system is running and main() is done with
1446 * initialization, the if_cloners becomes immutable.
1447 */
1448 KASSERT(pdevinit_done == 0);
1449 LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1450 if_cloners_count++;
1451 }
1452
1453 /*
1454 * Provide list of interface cloners to userspace.
1455 */
1456 int
if_clone_list(struct if_clonereq * ifcr)1457 if_clone_list(struct if_clonereq *ifcr)
1458 {
1459 char outbuf[IFNAMSIZ], *dst;
1460 struct if_clone *ifc;
1461 int count, error = 0;
1462
1463 if ((dst = ifcr->ifcr_buffer) == NULL) {
1464 /* Just asking how many there are. */
1465 ifcr->ifcr_total = if_cloners_count;
1466 return (0);
1467 }
1468
1469 if (ifcr->ifcr_count < 0)
1470 return (EINVAL);
1471
1472 ifcr->ifcr_total = if_cloners_count;
1473 count = MIN(if_cloners_count, ifcr->ifcr_count);
1474
1475 LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1476 if (count == 0)
1477 break;
1478 bzero(outbuf, sizeof outbuf);
1479 strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1480 error = copyout(outbuf, dst, IFNAMSIZ);
1481 if (error)
1482 break;
1483 count--;
1484 dst += IFNAMSIZ;
1485 }
1486
1487 return (error);
1488 }
1489
1490 /*
1491 * set queue congestion marker
1492 */
1493 void
if_congestion(void)1494 if_congestion(void)
1495 {
1496 extern int ticks;
1497
1498 ifq_congestion = ticks;
1499 }
1500
1501 int
if_congested(void)1502 if_congested(void)
1503 {
1504 extern int ticks;
1505 int diff;
1506
1507 diff = ticks - ifq_congestion;
1508 if (diff < 0) {
1509 ifq_congestion = ticks - hz;
1510 return (0);
1511 }
1512
1513 return (diff <= (hz / 100));
1514 }
1515
1516 #define equal(a1, a2) \
1517 (bcmp((caddr_t)(a1), (caddr_t)(a2), \
1518 (a1)->sa_len) == 0)
1519
1520 /*
1521 * Locate an interface based on a complete address.
1522 */
1523 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr,u_int rtableid)1524 ifa_ifwithaddr(const struct sockaddr *addr, u_int rtableid)
1525 {
1526 struct ifnet *ifp;
1527 struct ifaddr *ifa;
1528 u_int rdomain;
1529
1530 NET_ASSERT_LOCKED();
1531
1532 rdomain = rtable_l2(rtableid);
1533 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1534 if (ifp->if_rdomain != rdomain)
1535 continue;
1536
1537 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1538 if (ifa->ifa_addr->sa_family != addr->sa_family)
1539 continue;
1540
1541 if (equal(addr, ifa->ifa_addr)) {
1542 return (ifa);
1543 }
1544 }
1545 }
1546 return (NULL);
1547 }
1548
1549 /*
1550 * Locate the point to point interface with a given destination address.
1551 */
1552 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr,u_int rdomain)1553 ifa_ifwithdstaddr(const struct sockaddr *addr, u_int rdomain)
1554 {
1555 struct ifnet *ifp;
1556 struct ifaddr *ifa;
1557
1558 NET_ASSERT_LOCKED();
1559
1560 rdomain = rtable_l2(rdomain);
1561 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1562 if (ifp->if_rdomain != rdomain)
1563 continue;
1564 if (ifp->if_flags & IFF_POINTOPOINT) {
1565 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1566 if (ifa->ifa_addr->sa_family !=
1567 addr->sa_family || ifa->ifa_dstaddr == NULL)
1568 continue;
1569 if (equal(addr, ifa->ifa_dstaddr)) {
1570 return (ifa);
1571 }
1572 }
1573 }
1574 }
1575 return (NULL);
1576 }
1577
1578 /*
1579 * Find an interface address specific to an interface best matching
1580 * a given address.
1581 */
1582 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)1583 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1584 {
1585 struct ifaddr *ifa;
1586 const char *cp, *cp2, *cp3;
1587 char *cplim;
1588 struct ifaddr *ifa_maybe = NULL;
1589 u_int af = addr->sa_family;
1590
1591 if (af >= AF_MAX)
1592 return (NULL);
1593 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1594 if (ifa->ifa_addr->sa_family != af)
1595 continue;
1596 if (ifa_maybe == NULL)
1597 ifa_maybe = ifa;
1598 if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1599 if (equal(addr, ifa->ifa_addr) ||
1600 (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1601 return (ifa);
1602 continue;
1603 }
1604 cp = addr->sa_data;
1605 cp2 = ifa->ifa_addr->sa_data;
1606 cp3 = ifa->ifa_netmask->sa_data;
1607 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1608 for (; cp3 < cplim; cp3++)
1609 if ((*cp++ ^ *cp2++) & *cp3)
1610 break;
1611 if (cp3 == cplim)
1612 return (ifa);
1613 }
1614 return (ifa_maybe);
1615 }
1616
1617 void
if_rtrequest_dummy(struct ifnet * ifp,int req,struct rtentry * rt)1618 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1619 {
1620 }
1621
1622 /*
1623 * Default action when installing a local route on a point-to-point
1624 * interface.
1625 */
1626 void
p2p_rtrequest(struct ifnet * ifp,int req,struct rtentry * rt)1627 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1628 {
1629 struct ifnet *lo0ifp;
1630 struct ifaddr *ifa, *lo0ifa;
1631
1632 switch (req) {
1633 case RTM_ADD:
1634 if (!ISSET(rt->rt_flags, RTF_LOCAL))
1635 break;
1636
1637 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1638 if (memcmp(rt_key(rt), ifa->ifa_addr,
1639 rt_key(rt)->sa_len) == 0)
1640 break;
1641 }
1642
1643 if (ifa == NULL)
1644 break;
1645
1646 KASSERT(ifa == rt->rt_ifa);
1647
1648 lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1649 KASSERT(lo0ifp != NULL);
1650 TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1651 if (lo0ifa->ifa_addr->sa_family ==
1652 ifa->ifa_addr->sa_family)
1653 break;
1654 }
1655 if_put(lo0ifp);
1656
1657 if (lo0ifa == NULL)
1658 break;
1659
1660 rt->rt_flags &= ~RTF_LLINFO;
1661 break;
1662 case RTM_DELETE:
1663 case RTM_RESOLVE:
1664 default:
1665 break;
1666 }
1667 }
1668
1669 int
p2p_bpf_mtap(caddr_t if_bpf,const struct mbuf * m,u_int dir)1670 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1671 {
1672 #if NBPFILTER > 0
1673 return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1674 #else
1675 return (0);
1676 #endif
1677 }
1678
1679 void
p2p_input(struct ifnet * ifp,struct mbuf * m)1680 p2p_input(struct ifnet *ifp, struct mbuf *m)
1681 {
1682 void (*input)(struct ifnet *, struct mbuf *);
1683
1684 switch (m->m_pkthdr.ph_family) {
1685 case AF_INET:
1686 input = ipv4_input;
1687 break;
1688 #ifdef INET6
1689 case AF_INET6:
1690 input = ipv6_input;
1691 break;
1692 #endif
1693 #ifdef MPLS
1694 case AF_MPLS:
1695 input = mpls_input;
1696 break;
1697 #endif
1698 default:
1699 m_freem(m);
1700 return;
1701 }
1702
1703 (*input)(ifp, m);
1704 }
1705
1706 /*
1707 * Bring down all interfaces
1708 */
1709 void
if_downall(void)1710 if_downall(void)
1711 {
1712 struct ifreq ifrq; /* XXX only partly built */
1713 struct ifnet *ifp;
1714
1715 NET_LOCK();
1716 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1717 if ((ifp->if_flags & IFF_UP) == 0)
1718 continue;
1719 if_down(ifp);
1720 ifrq.ifr_flags = ifp->if_flags;
1721 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1722 }
1723 NET_UNLOCK();
1724 }
1725
1726 /*
1727 * Mark an interface down and notify protocols of
1728 * the transition.
1729 */
1730 void
if_down(struct ifnet * ifp)1731 if_down(struct ifnet *ifp)
1732 {
1733 NET_ASSERT_LOCKED();
1734
1735 ifp->if_flags &= ~IFF_UP;
1736 getmicrotime(&ifp->if_lastchange);
1737 ifq_purge(&ifp->if_snd);
1738
1739 if_linkstate(ifp);
1740 }
1741
1742 /*
1743 * Mark an interface up and notify protocols of
1744 * the transition.
1745 */
1746 void
if_up(struct ifnet * ifp)1747 if_up(struct ifnet *ifp)
1748 {
1749 NET_ASSERT_LOCKED();
1750
1751 ifp->if_flags |= IFF_UP;
1752 getmicrotime(&ifp->if_lastchange);
1753
1754 #ifdef INET6
1755 /* Userland expects the kernel to set ::1 on default lo(4). */
1756 if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1757 in6_ifattach(ifp);
1758 #endif
1759
1760 if_linkstate(ifp);
1761 }
1762
1763 /*
1764 * Notify userland, the routing table and hooks owner of
1765 * a link-state transition.
1766 */
1767 void
if_linkstate_task(void * xifidx)1768 if_linkstate_task(void *xifidx)
1769 {
1770 unsigned int ifidx = (unsigned long)xifidx;
1771 struct ifnet *ifp;
1772
1773 NET_LOCK();
1774 KERNEL_LOCK();
1775
1776 ifp = if_get(ifidx);
1777 if (ifp != NULL)
1778 if_linkstate(ifp);
1779 if_put(ifp);
1780
1781 KERNEL_UNLOCK();
1782 NET_UNLOCK();
1783 }
1784
1785 void
if_linkstate(struct ifnet * ifp)1786 if_linkstate(struct ifnet *ifp)
1787 {
1788 NET_ASSERT_LOCKED();
1789
1790 if (panicstr == NULL) {
1791 rtm_ifchg(ifp);
1792 rt_if_track(ifp);
1793 }
1794
1795 if_hooks_run(&ifp->if_linkstatehooks);
1796 }
1797
1798 void
if_linkstatehook_add(struct ifnet * ifp,struct task * t)1799 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1800 {
1801 mtx_enter(&if_hooks_mtx);
1802 TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1803 mtx_leave(&if_hooks_mtx);
1804 }
1805
1806 void
if_linkstatehook_del(struct ifnet * ifp,struct task * t)1807 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1808 {
1809 mtx_enter(&if_hooks_mtx);
1810 TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1811 mtx_leave(&if_hooks_mtx);
1812 }
1813
1814 /*
1815 * Schedule a link state change task.
1816 */
1817 void
if_link_state_change(struct ifnet * ifp)1818 if_link_state_change(struct ifnet *ifp)
1819 {
1820 task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1821 }
1822
1823 /*
1824 * Handle interface watchdog timer routine. Called
1825 * from softclock, we decrement timer (if set) and
1826 * call the appropriate interface routine on expiration.
1827 */
1828 void
if_slowtimo(void * arg)1829 if_slowtimo(void *arg)
1830 {
1831 struct ifnet *ifp = arg;
1832 int s = splnet();
1833
1834 if (ifp->if_watchdog) {
1835 if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1836 task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1837 timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1838 }
1839 splx(s);
1840 }
1841
1842 void
if_watchdog_task(void * xifidx)1843 if_watchdog_task(void *xifidx)
1844 {
1845 unsigned int ifidx = (unsigned long)xifidx;
1846 struct ifnet *ifp;
1847 int s;
1848
1849 ifp = if_get(ifidx);
1850 if (ifp == NULL)
1851 return;
1852
1853 KERNEL_LOCK();
1854 s = splnet();
1855 if (ifp->if_watchdog)
1856 (*ifp->if_watchdog)(ifp);
1857 splx(s);
1858 KERNEL_UNLOCK();
1859
1860 if_put(ifp);
1861 }
1862
1863 /*
1864 * Map interface name to interface structure pointer.
1865 */
1866 struct ifnet *
if_unit(const char * name)1867 if_unit(const char *name)
1868 {
1869 struct ifnet *ifp;
1870
1871 KERNEL_ASSERT_LOCKED();
1872
1873 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1874 if (strcmp(ifp->if_xname, name) == 0) {
1875 if_ref(ifp);
1876 return (ifp);
1877 }
1878 }
1879
1880 return (NULL);
1881 }
1882
1883 /*
1884 * Map interface index to interface structure pointer.
1885 */
1886 struct ifnet *
if_get(unsigned int index)1887 if_get(unsigned int index)
1888 {
1889 struct ifnet **if_map;
1890 struct ifnet *ifp = NULL;
1891
1892 if (index == 0)
1893 return (NULL);
1894
1895 smr_read_enter();
1896 if_map = SMR_PTR_GET(&if_idxmap.map);
1897 if (index < if_idxmap_limit(if_map)) {
1898 ifp = SMR_PTR_GET(&if_map[index]);
1899 if (ifp != NULL) {
1900 KASSERT(ifp->if_index == index);
1901 if_ref(ifp);
1902 }
1903 }
1904 smr_read_leave();
1905
1906 return (ifp);
1907 }
1908
1909 struct ifnet *
if_ref(struct ifnet * ifp)1910 if_ref(struct ifnet *ifp)
1911 {
1912 refcnt_take(&ifp->if_refcnt);
1913
1914 return (ifp);
1915 }
1916
1917 void
if_put(struct ifnet * ifp)1918 if_put(struct ifnet *ifp)
1919 {
1920 if (ifp == NULL)
1921 return;
1922
1923 refcnt_rele_wake(&ifp->if_refcnt);
1924 }
1925
1926 int
if_setlladdr(struct ifnet * ifp,const uint8_t * lladdr)1927 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1928 {
1929 if (ifp->if_sadl == NULL)
1930 return (EINVAL);
1931
1932 memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1933 memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1934
1935 return (0);
1936 }
1937
1938 int
if_createrdomain(int rdomain,struct ifnet * ifp)1939 if_createrdomain(int rdomain, struct ifnet *ifp)
1940 {
1941 int error;
1942 struct ifnet *loifp;
1943 char loifname[IFNAMSIZ];
1944 unsigned int unit = rdomain;
1945
1946 if ((error = rtable_add(rdomain)) != 0)
1947 return (error);
1948 if (!rtable_empty(rdomain))
1949 return (EEXIST);
1950
1951 /* Create rdomain including its loopback if with unit == rdomain */
1952 snprintf(loifname, sizeof(loifname), "lo%u", unit);
1953 error = if_clone_create(loifname, 0);
1954 if ((loifp = if_unit(loifname)) == NULL)
1955 return (ENXIO);
1956 if (error && (ifp != loifp || error != EEXIST)) {
1957 if_put(loifp);
1958 return (error);
1959 }
1960
1961 rtable_l2set(rdomain, rdomain, loifp->if_index);
1962 loifp->if_rdomain = rdomain;
1963 if_put(loifp);
1964
1965 return (0);
1966 }
1967
1968 int
if_setrdomain(struct ifnet * ifp,int rdomain)1969 if_setrdomain(struct ifnet *ifp, int rdomain)
1970 {
1971 struct ifreq ifr;
1972 int error, up = 0, s;
1973
1974 if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1975 return (EINVAL);
1976
1977 if (rdomain != ifp->if_rdomain &&
1978 (ifp->if_flags & IFF_LOOPBACK) &&
1979 (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1980 return (EPERM);
1981
1982 if (!rtable_exists(rdomain))
1983 return (ESRCH);
1984
1985 /* make sure that the routing table is a real rdomain */
1986 if (rdomain != rtable_l2(rdomain))
1987 return (EINVAL);
1988
1989 if (rdomain != ifp->if_rdomain) {
1990 s = splnet();
1991 /*
1992 * We are tearing down the world.
1993 * Take down the IF so:
1994 * 1. everything that cares gets a message
1995 * 2. the automagic IPv6 bits are recreated
1996 */
1997 if (ifp->if_flags & IFF_UP) {
1998 up = 1;
1999 if_down(ifp);
2000 }
2001 rti_delete(ifp);
2002 #ifdef MROUTING
2003 vif_delete(ifp);
2004 #endif
2005 in_ifdetach(ifp);
2006 #ifdef INET6
2007 in6_ifdetach(ifp);
2008 #endif
2009 splx(s);
2010 }
2011
2012 /* Let devices like enc(4) or mpe(4) know about the change */
2013 ifr.ifr_rdomainid = rdomain;
2014 if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
2015 (caddr_t)&ifr)) != ENOTTY)
2016 return (error);
2017 error = 0;
2018
2019 /* Add interface to the specified rdomain */
2020 ifp->if_rdomain = rdomain;
2021
2022 /* If we took down the IF, bring it back */
2023 if (up) {
2024 s = splnet();
2025 if_up(ifp);
2026 splx(s);
2027 }
2028
2029 return (0);
2030 }
2031
2032 /*
2033 * Interface ioctls.
2034 */
2035 int
ifioctl(struct socket * so,u_long cmd,caddr_t data,struct proc * p)2036 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
2037 {
2038 struct ifnet *ifp;
2039 struct ifreq *ifr = (struct ifreq *)data;
2040 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
2041 struct if_afreq *ifar = (struct if_afreq *)data;
2042 char ifdescrbuf[IFDESCRSIZE];
2043 char ifrtlabelbuf[RTLABEL_LEN];
2044 int s, error = 0, oif_xflags;
2045 size_t bytesdone;
2046 unsigned short oif_flags;
2047
2048 switch (cmd) {
2049 case SIOCIFCREATE:
2050 if ((error = suser(p)) != 0)
2051 return (error);
2052 KERNEL_LOCK();
2053 error = if_clone_create(ifr->ifr_name, 0);
2054 KERNEL_UNLOCK();
2055 return (error);
2056 case SIOCIFDESTROY:
2057 if ((error = suser(p)) != 0)
2058 return (error);
2059 KERNEL_LOCK();
2060 error = if_clone_destroy(ifr->ifr_name);
2061 KERNEL_UNLOCK();
2062 return (error);
2063 case SIOCSIFGATTR:
2064 if ((error = suser(p)) != 0)
2065 return (error);
2066 KERNEL_LOCK();
2067 NET_LOCK();
2068 error = if_setgroupattribs(data);
2069 NET_UNLOCK();
2070 KERNEL_UNLOCK();
2071 return (error);
2072 case SIOCGIFCONF:
2073 case SIOCIFGCLONERS:
2074 case SIOCGIFGMEMB:
2075 case SIOCGIFGATTR:
2076 case SIOCGIFGLIST:
2077 case SIOCGIFFLAGS:
2078 case SIOCGIFXFLAGS:
2079 case SIOCGIFMETRIC:
2080 case SIOCGIFMTU:
2081 case SIOCGIFHARDMTU:
2082 case SIOCGIFDATA:
2083 case SIOCGIFDESCR:
2084 case SIOCGIFRTLABEL:
2085 case SIOCGIFPRIORITY:
2086 case SIOCGIFRDOMAIN:
2087 case SIOCGIFGROUP:
2088 case SIOCGIFLLPRIO:
2089 error = ifioctl_get(cmd, data);
2090 return (error);
2091 }
2092
2093 KERNEL_LOCK();
2094
2095 ifp = if_unit(ifr->ifr_name);
2096 if (ifp == NULL) {
2097 KERNEL_UNLOCK();
2098 return (ENXIO);
2099 }
2100 oif_flags = ifp->if_flags;
2101 oif_xflags = ifp->if_xflags;
2102
2103 switch (cmd) {
2104 case SIOCIFAFATTACH:
2105 case SIOCIFAFDETACH:
2106 if ((error = suser(p)) != 0)
2107 break;
2108 NET_LOCK();
2109 switch (ifar->ifar_af) {
2110 case AF_INET:
2111 /* attach is a noop for AF_INET */
2112 if (cmd == SIOCIFAFDETACH)
2113 in_ifdetach(ifp);
2114 break;
2115 #ifdef INET6
2116 case AF_INET6:
2117 if (cmd == SIOCIFAFATTACH)
2118 error = in6_ifattach(ifp);
2119 else
2120 in6_ifdetach(ifp);
2121 break;
2122 #endif /* INET6 */
2123 default:
2124 error = EAFNOSUPPORT;
2125 }
2126 NET_UNLOCK();
2127 break;
2128
2129 case SIOCSIFXFLAGS:
2130 if ((error = suser(p)) != 0)
2131 break;
2132
2133 NET_LOCK();
2134 #ifdef INET6
2135 if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2136 ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2137 !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2138 !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2139 error = in6_ifattach(ifp);
2140 if (error != 0) {
2141 NET_UNLOCK();
2142 break;
2143 }
2144 }
2145
2146 if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2147 !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2148 ifp->if_xflags |= IFXF_INET6_NOSOII;
2149
2150 if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2151 ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2152 ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2153
2154 #endif /* INET6 */
2155
2156 #ifdef MPLS
2157 if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2158 !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2159 s = splnet();
2160 ifp->if_xflags |= IFXF_MPLS;
2161 ifp->if_ll_output = ifp->if_output;
2162 ifp->if_output = mpls_output;
2163 splx(s);
2164 }
2165 if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2166 !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2167 s = splnet();
2168 ifp->if_xflags &= ~IFXF_MPLS;
2169 ifp->if_output = ifp->if_ll_output;
2170 ifp->if_ll_output = NULL;
2171 splx(s);
2172 }
2173 #endif /* MPLS */
2174
2175 #ifndef SMALL_KERNEL
2176 if (ifp->if_capabilities & IFCAP_WOL) {
2177 if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2178 !ISSET(ifp->if_xflags, IFXF_WOL)) {
2179 s = splnet();
2180 ifp->if_xflags |= IFXF_WOL;
2181 error = ifp->if_wol(ifp, 1);
2182 splx(s);
2183 }
2184 if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2185 !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2186 s = splnet();
2187 ifp->if_xflags &= ~IFXF_WOL;
2188 error = ifp->if_wol(ifp, 0);
2189 splx(s);
2190 }
2191 } else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2192 ifr->ifr_flags &= ~IFXF_WOL;
2193 error = ENOTSUP;
2194 }
2195 #endif
2196 if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2197 ISSET(ifp->if_xflags, IFXF_LRO))
2198 error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2199
2200 if (error == 0)
2201 ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2202 (ifr->ifr_flags & ~IFXF_CANTCHANGE);
2203
2204 if (!ISSET(ifp->if_flags, IFF_UP) &&
2205 ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2206 ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2207 (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2208 ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2209 (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2210 ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2211 ifr->ifr_flags = ifp->if_flags | IFF_UP;
2212 goto forceup;
2213 }
2214
2215 NET_UNLOCK();
2216 break;
2217
2218 case SIOCSIFFLAGS:
2219 if ((error = suser(p)) != 0)
2220 break;
2221
2222 NET_LOCK();
2223 forceup:
2224 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2225 (ifr->ifr_flags & ~IFF_CANTCHANGE);
2226 error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2227 if (error != 0) {
2228 ifp->if_flags = oif_flags;
2229 if (cmd == SIOCSIFXFLAGS)
2230 ifp->if_xflags = oif_xflags;
2231 } else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2232 s = splnet();
2233 if (ISSET(ifp->if_flags, IFF_UP))
2234 if_up(ifp);
2235 else
2236 if_down(ifp);
2237 splx(s);
2238 }
2239 NET_UNLOCK();
2240 break;
2241
2242 case SIOCSIFMETRIC:
2243 if ((error = suser(p)) != 0)
2244 break;
2245 NET_LOCK();
2246 ifp->if_metric = ifr->ifr_metric;
2247 NET_UNLOCK();
2248 break;
2249
2250 case SIOCSIFMTU:
2251 if ((error = suser(p)) != 0)
2252 break;
2253 NET_LOCK();
2254 error = (*ifp->if_ioctl)(ifp, cmd, data);
2255 NET_UNLOCK();
2256 if (error == 0)
2257 rtm_ifchg(ifp);
2258 break;
2259
2260 case SIOCSIFDESCR:
2261 if ((error = suser(p)) != 0)
2262 break;
2263 error = copyinstr(ifr->ifr_data, ifdescrbuf,
2264 IFDESCRSIZE, &bytesdone);
2265 if (error == 0) {
2266 (void)memset(ifp->if_description, 0, IFDESCRSIZE);
2267 strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2268 }
2269 break;
2270
2271 case SIOCSIFRTLABEL:
2272 if ((error = suser(p)) != 0)
2273 break;
2274 error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2275 RTLABEL_LEN, &bytesdone);
2276 if (error == 0) {
2277 rtlabel_unref(ifp->if_rtlabelid);
2278 ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2279 }
2280 break;
2281
2282 case SIOCSIFPRIORITY:
2283 if ((error = suser(p)) != 0)
2284 break;
2285 if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2286 error = EINVAL;
2287 break;
2288 }
2289 ifp->if_priority = ifr->ifr_metric;
2290 break;
2291
2292 case SIOCSIFRDOMAIN:
2293 if ((error = suser(p)) != 0)
2294 break;
2295 error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2296 if (!error || error == EEXIST) {
2297 NET_LOCK();
2298 error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2299 NET_UNLOCK();
2300 }
2301 break;
2302
2303 case SIOCAIFGROUP:
2304 if ((error = suser(p)))
2305 break;
2306 NET_LOCK();
2307 error = if_addgroup(ifp, ifgr->ifgr_group);
2308 if (error == 0) {
2309 error = (*ifp->if_ioctl)(ifp, cmd, data);
2310 if (error == ENOTTY)
2311 error = 0;
2312 }
2313 NET_UNLOCK();
2314 break;
2315
2316 case SIOCDIFGROUP:
2317 if ((error = suser(p)))
2318 break;
2319 NET_LOCK();
2320 error = (*ifp->if_ioctl)(ifp, cmd, data);
2321 if (error == ENOTTY)
2322 error = 0;
2323 if (error == 0)
2324 error = if_delgroup(ifp, ifgr->ifgr_group);
2325 NET_UNLOCK();
2326 break;
2327
2328 case SIOCSIFLLADDR:
2329 if ((error = suser(p)))
2330 break;
2331 if ((ifp->if_sadl == NULL) ||
2332 (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2333 (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2334 error = EINVAL;
2335 break;
2336 }
2337 NET_LOCK();
2338 switch (ifp->if_type) {
2339 case IFT_ETHER:
2340 case IFT_CARP:
2341 case IFT_XETHER:
2342 case IFT_ISO88025:
2343 error = (*ifp->if_ioctl)(ifp, cmd, data);
2344 if (error == ENOTTY)
2345 error = 0;
2346 if (error == 0)
2347 error = if_setlladdr(ifp,
2348 ifr->ifr_addr.sa_data);
2349 break;
2350 default:
2351 error = ENODEV;
2352 }
2353
2354 if (error == 0)
2355 ifnewlladdr(ifp);
2356 NET_UNLOCK();
2357 if (error == 0)
2358 rtm_ifchg(ifp);
2359 break;
2360
2361 case SIOCSIFLLPRIO:
2362 if ((error = suser(p)))
2363 break;
2364 if (ifr->ifr_llprio < IFQ_MINPRIO ||
2365 ifr->ifr_llprio > IFQ_MAXPRIO) {
2366 error = EINVAL;
2367 break;
2368 }
2369 NET_LOCK();
2370 ifp->if_llprio = ifr->ifr_llprio;
2371 NET_UNLOCK();
2372 break;
2373
2374 case SIOCGIFSFFPAGE:
2375 error = suser(p);
2376 if (error != 0)
2377 break;
2378
2379 error = if_sffpage_check(data);
2380 if (error != 0)
2381 break;
2382
2383 /* don't take NET_LOCK because i2c reads take a long time */
2384 error = ((*ifp->if_ioctl)(ifp, cmd, data));
2385 break;
2386
2387 case SIOCSIFMEDIA:
2388 if ((error = suser(p)) != 0)
2389 break;
2390 /* FALLTHROUGH */
2391 case SIOCGIFMEDIA:
2392 /* net lock is not needed */
2393 error = ((*ifp->if_ioctl)(ifp, cmd, data));
2394 break;
2395
2396 case SIOCSETKALIVE:
2397 case SIOCDIFPHYADDR:
2398 case SIOCSLIFPHYADDR:
2399 case SIOCSLIFPHYRTABLE:
2400 case SIOCSLIFPHYTTL:
2401 case SIOCSLIFPHYDF:
2402 case SIOCSLIFPHYECN:
2403 case SIOCADDMULTI:
2404 case SIOCDELMULTI:
2405 case SIOCSVNETID:
2406 case SIOCDVNETID:
2407 case SIOCSVNETFLOWID:
2408 case SIOCSTXHPRIO:
2409 case SIOCSRXHPRIO:
2410 case SIOCSIFPAIR:
2411 case SIOCSIFPARENT:
2412 case SIOCDIFPARENT:
2413 case SIOCSETMPWCFG:
2414 case SIOCSETLABEL:
2415 case SIOCDELLABEL:
2416 case SIOCSPWE3CTRLWORD:
2417 case SIOCSPWE3FAT:
2418 case SIOCSPWE3NEIGHBOR:
2419 case SIOCDPWE3NEIGHBOR:
2420 #if NBRIDGE > 0
2421 case SIOCBRDGADD:
2422 case SIOCBRDGDEL:
2423 case SIOCBRDGSIFFLGS:
2424 case SIOCBRDGSCACHE:
2425 case SIOCBRDGADDS:
2426 case SIOCBRDGDELS:
2427 case SIOCBRDGSADDR:
2428 case SIOCBRDGSTO:
2429 case SIOCBRDGDADDR:
2430 case SIOCBRDGFLUSH:
2431 case SIOCBRDGADDL:
2432 case SIOCBRDGSIFPROT:
2433 case SIOCBRDGARL:
2434 case SIOCBRDGFRL:
2435 case SIOCBRDGSPRI:
2436 case SIOCBRDGSHT:
2437 case SIOCBRDGSFD:
2438 case SIOCBRDGSMA:
2439 case SIOCBRDGSIFPRIO:
2440 case SIOCBRDGSIFCOST:
2441 case SIOCBRDGSTXHC:
2442 case SIOCBRDGSPROTO:
2443 #endif
2444 if ((error = suser(p)) != 0)
2445 break;
2446 /* FALLTHROUGH */
2447 default:
2448 error = pru_control(so, cmd, data, ifp);
2449 if (error != EOPNOTSUPP)
2450 break;
2451 switch (cmd) {
2452 case SIOCAIFADDR:
2453 case SIOCDIFADDR:
2454 case SIOCSIFADDR:
2455 case SIOCSIFNETMASK:
2456 case SIOCSIFDSTADDR:
2457 case SIOCSIFBRDADDR:
2458 #ifdef INET6
2459 case SIOCAIFADDR_IN6:
2460 case SIOCDIFADDR_IN6:
2461 #endif
2462 error = suser(p);
2463 break;
2464 default:
2465 error = 0;
2466 break;
2467 }
2468 if (error)
2469 break;
2470 NET_LOCK();
2471 error = ((*ifp->if_ioctl)(ifp, cmd, data));
2472 NET_UNLOCK();
2473 break;
2474 }
2475
2476 if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2477 /* if_up() and if_down() already sent an update, skip here */
2478 if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2479 rtm_ifchg(ifp);
2480 }
2481
2482 if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2483 getmicrotime(&ifp->if_lastchange);
2484
2485 KERNEL_UNLOCK();
2486
2487 if_put(ifp);
2488
2489 return (error);
2490 }
2491
2492 int
ifioctl_get(u_long cmd,caddr_t data)2493 ifioctl_get(u_long cmd, caddr_t data)
2494 {
2495 struct ifnet *ifp;
2496 struct ifreq *ifr = (struct ifreq *)data;
2497 int error = 0;
2498 size_t bytesdone;
2499
2500 switch(cmd) {
2501 case SIOCGIFCONF:
2502 NET_LOCK_SHARED();
2503 error = ifconf(data);
2504 NET_UNLOCK_SHARED();
2505 return (error);
2506 case SIOCIFGCLONERS:
2507 error = if_clone_list((struct if_clonereq *)data);
2508 return (error);
2509 case SIOCGIFGMEMB:
2510 error = if_getgroupmembers(data);
2511 return (error);
2512 case SIOCGIFGATTR:
2513 NET_LOCK_SHARED();
2514 error = if_getgroupattribs(data);
2515 NET_UNLOCK_SHARED();
2516 return (error);
2517 case SIOCGIFGLIST:
2518 error = if_getgrouplist(data);
2519 return (error);
2520 }
2521
2522 KERNEL_LOCK();
2523 ifp = if_unit(ifr->ifr_name);
2524 KERNEL_UNLOCK();
2525
2526 if (ifp == NULL)
2527 return (ENXIO);
2528
2529 switch(cmd) {
2530 case SIOCGIFFLAGS:
2531 ifr->ifr_flags = ifp->if_flags;
2532 if (ifq_is_oactive(&ifp->if_snd))
2533 ifr->ifr_flags |= IFF_OACTIVE;
2534 break;
2535
2536 case SIOCGIFXFLAGS:
2537 ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2538 break;
2539
2540 case SIOCGIFMETRIC:
2541 ifr->ifr_metric = ifp->if_metric;
2542 break;
2543
2544 case SIOCGIFMTU:
2545 ifr->ifr_mtu = ifp->if_mtu;
2546 break;
2547
2548 case SIOCGIFHARDMTU:
2549 ifr->ifr_hardmtu = ifp->if_hardmtu;
2550 break;
2551
2552 case SIOCGIFDATA: {
2553 struct if_data ifdata;
2554
2555 NET_LOCK_SHARED();
2556 KERNEL_LOCK();
2557 if_getdata(ifp, &ifdata);
2558 KERNEL_UNLOCK();
2559 NET_UNLOCK_SHARED();
2560
2561 error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2562 break;
2563 }
2564
2565 case SIOCGIFDESCR: {
2566 char ifdescrbuf[IFDESCRSIZE];
2567 KERNEL_LOCK();
2568 strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2569 KERNEL_UNLOCK();
2570
2571 error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2572 &bytesdone);
2573 break;
2574 }
2575 case SIOCGIFRTLABEL: {
2576 char ifrtlabelbuf[RTLABEL_LEN];
2577 u_short rtlabelid = READ_ONCE(ifp->if_rtlabelid);
2578
2579 if (rtlabelid && rtlabel_id2name(rtlabelid,
2580 ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2581 error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2582 RTLABEL_LEN, &bytesdone);
2583 } else
2584 error = ENOENT;
2585 break;
2586 }
2587 case SIOCGIFPRIORITY:
2588 ifr->ifr_metric = ifp->if_priority;
2589 break;
2590
2591 case SIOCGIFRDOMAIN:
2592 ifr->ifr_rdomainid = ifp->if_rdomain;
2593 break;
2594
2595 case SIOCGIFGROUP:
2596 error = if_getgroup(data, ifp);
2597 break;
2598
2599 case SIOCGIFLLPRIO:
2600 ifr->ifr_llprio = ifp->if_llprio;
2601 break;
2602
2603 default:
2604 panic("invalid ioctl %lu", cmd);
2605 }
2606
2607 if_put(ifp);
2608
2609 return (error);
2610 }
2611
2612 static int
if_sffpage_check(const caddr_t data)2613 if_sffpage_check(const caddr_t data)
2614 {
2615 const struct if_sffpage *sff = (const struct if_sffpage *)data;
2616
2617 switch (sff->sff_addr) {
2618 case IFSFF_ADDR_EEPROM:
2619 case IFSFF_ADDR_DDM:
2620 break;
2621 default:
2622 return (EINVAL);
2623 }
2624
2625 return (0);
2626 }
2627
2628 int
if_txhprio_l2_check(int hdrprio)2629 if_txhprio_l2_check(int hdrprio)
2630 {
2631 switch (hdrprio) {
2632 case IF_HDRPRIO_PACKET:
2633 return (0);
2634 default:
2635 if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2636 return (0);
2637 break;
2638 }
2639
2640 return (EINVAL);
2641 }
2642
2643 int
if_txhprio_l3_check(int hdrprio)2644 if_txhprio_l3_check(int hdrprio)
2645 {
2646 switch (hdrprio) {
2647 case IF_HDRPRIO_PACKET:
2648 case IF_HDRPRIO_PAYLOAD:
2649 return (0);
2650 default:
2651 if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2652 return (0);
2653 break;
2654 }
2655
2656 return (EINVAL);
2657 }
2658
2659 int
if_rxhprio_l2_check(int hdrprio)2660 if_rxhprio_l2_check(int hdrprio)
2661 {
2662 switch (hdrprio) {
2663 case IF_HDRPRIO_PACKET:
2664 case IF_HDRPRIO_OUTER:
2665 return (0);
2666 default:
2667 if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2668 return (0);
2669 break;
2670 }
2671
2672 return (EINVAL);
2673 }
2674
2675 int
if_rxhprio_l3_check(int hdrprio)2676 if_rxhprio_l3_check(int hdrprio)
2677 {
2678 switch (hdrprio) {
2679 case IF_HDRPRIO_PACKET:
2680 case IF_HDRPRIO_PAYLOAD:
2681 case IF_HDRPRIO_OUTER:
2682 return (0);
2683 default:
2684 if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2685 return (0);
2686 break;
2687 }
2688
2689 return (EINVAL);
2690 }
2691
2692 /*
2693 * Return interface configuration
2694 * of system. List may be used
2695 * in later ioctl's (above) to get
2696 * other information.
2697 */
2698 int
ifconf(caddr_t data)2699 ifconf(caddr_t data)
2700 {
2701 struct ifconf *ifc = (struct ifconf *)data;
2702 struct ifnet *ifp;
2703 struct ifaddr *ifa;
2704 struct ifreq ifr, *ifrp;
2705 int space = ifc->ifc_len, error = 0;
2706
2707 /* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2708 if (space == 0) {
2709 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2710 struct sockaddr *sa;
2711
2712 if (TAILQ_EMPTY(&ifp->if_addrlist))
2713 space += sizeof (ifr);
2714 else
2715 TAILQ_FOREACH(ifa,
2716 &ifp->if_addrlist, ifa_list) {
2717 sa = ifa->ifa_addr;
2718 if (sa->sa_len > sizeof(*sa))
2719 space += sa->sa_len -
2720 sizeof(*sa);
2721 space += sizeof(ifr);
2722 }
2723 }
2724 ifc->ifc_len = space;
2725 return (0);
2726 }
2727
2728 ifrp = ifc->ifc_req;
2729 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2730 if (space < sizeof(ifr))
2731 break;
2732 bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2733 if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2734 bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2735 error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2736 sizeof(ifr));
2737 if (error)
2738 break;
2739 space -= sizeof (ifr), ifrp++;
2740 } else
2741 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2742 struct sockaddr *sa = ifa->ifa_addr;
2743
2744 if (space < sizeof(ifr))
2745 break;
2746 if (sa->sa_len <= sizeof(*sa)) {
2747 ifr.ifr_addr = *sa;
2748 error = copyout((caddr_t)&ifr,
2749 (caddr_t)ifrp, sizeof (ifr));
2750 ifrp++;
2751 } else {
2752 space -= sa->sa_len - sizeof(*sa);
2753 if (space < sizeof (ifr))
2754 break;
2755 error = copyout((caddr_t)&ifr,
2756 (caddr_t)ifrp,
2757 sizeof(ifr.ifr_name));
2758 if (error == 0)
2759 error = copyout((caddr_t)sa,
2760 (caddr_t)&ifrp->ifr_addr,
2761 sa->sa_len);
2762 ifrp = (struct ifreq *)(sa->sa_len +
2763 (caddr_t)&ifrp->ifr_addr);
2764 }
2765 if (error)
2766 break;
2767 space -= sizeof (ifr);
2768 }
2769 }
2770 ifc->ifc_len -= space;
2771 return (error);
2772 }
2773
2774 void
if_counters_alloc(struct ifnet * ifp)2775 if_counters_alloc(struct ifnet *ifp)
2776 {
2777 KASSERT(ifp->if_counters == NULL);
2778
2779 ifp->if_counters = counters_alloc(ifc_ncounters);
2780 }
2781
2782 void
if_counters_free(struct ifnet * ifp)2783 if_counters_free(struct ifnet *ifp)
2784 {
2785 KASSERT(ifp->if_counters != NULL);
2786
2787 counters_free(ifp->if_counters, ifc_ncounters);
2788 ifp->if_counters = NULL;
2789 }
2790
2791 void
if_getdata(struct ifnet * ifp,struct if_data * data)2792 if_getdata(struct ifnet *ifp, struct if_data *data)
2793 {
2794 unsigned int i;
2795
2796 data->ifi_type = ifp->if_type;
2797 data->ifi_addrlen = ifp->if_addrlen;
2798 data->ifi_hdrlen = ifp->if_hdrlen;
2799 data->ifi_link_state = ifp->if_link_state;
2800 data->ifi_mtu = ifp->if_mtu;
2801 data->ifi_metric = ifp->if_metric;
2802 data->ifi_baudrate = ifp->if_baudrate;
2803 data->ifi_capabilities = ifp->if_capabilities;
2804 data->ifi_rdomain = ifp->if_rdomain;
2805 data->ifi_lastchange = ifp->if_lastchange;
2806
2807 data->ifi_ipackets = ifp->if_data_counters[ifc_ipackets];
2808 data->ifi_ierrors = ifp->if_data_counters[ifc_ierrors];
2809 data->ifi_opackets = ifp->if_data_counters[ifc_opackets];
2810 data->ifi_oerrors = ifp->if_data_counters[ifc_oerrors];
2811 data->ifi_collisions = ifp->if_data_counters[ifc_collisions];
2812 data->ifi_ibytes = ifp->if_data_counters[ifc_ibytes];
2813 data->ifi_obytes = ifp->if_data_counters[ifc_obytes];
2814 data->ifi_imcasts = ifp->if_data_counters[ifc_imcasts];
2815 data->ifi_omcasts = ifp->if_data_counters[ifc_omcasts];
2816 data->ifi_iqdrops = ifp->if_data_counters[ifc_iqdrops];
2817 data->ifi_oqdrops = ifp->if_data_counters[ifc_oqdrops];
2818 data->ifi_noproto = ifp->if_data_counters[ifc_noproto];
2819
2820 if (ifp->if_counters != NULL) {
2821 uint64_t counters[ifc_ncounters];
2822
2823 counters_read(ifp->if_counters, counters, nitems(counters),
2824 NULL);
2825
2826 data->ifi_ipackets += counters[ifc_ipackets];
2827 data->ifi_ierrors += counters[ifc_ierrors];
2828 data->ifi_opackets += counters[ifc_opackets];
2829 data->ifi_oerrors += counters[ifc_oerrors];
2830 data->ifi_collisions += counters[ifc_collisions];
2831 data->ifi_ibytes += counters[ifc_ibytes];
2832 data->ifi_obytes += counters[ifc_obytes];
2833 data->ifi_imcasts += counters[ifc_imcasts];
2834 data->ifi_omcasts += counters[ifc_omcasts];
2835 data->ifi_iqdrops += counters[ifc_iqdrops];
2836 data->ifi_oqdrops += counters[ifc_oqdrops];
2837 data->ifi_noproto += counters[ifc_noproto];
2838 }
2839
2840 for (i = 0; i < ifp->if_nifqs; i++) {
2841 struct ifqueue *ifq = ifp->if_ifqs[i];
2842
2843 ifq_add_data(ifq, data);
2844 }
2845
2846 for (i = 0; i < ifp->if_niqs; i++) {
2847 struct ifiqueue *ifiq = ifp->if_iqs[i];
2848
2849 ifiq_add_data(ifiq, data);
2850 }
2851 }
2852
2853 /*
2854 * Dummy functions replaced in ifnet during detach (if protocols decide to
2855 * fiddle with the if during detach.
2856 */
2857 void
if_detached_qstart(struct ifqueue * ifq)2858 if_detached_qstart(struct ifqueue *ifq)
2859 {
2860 ifq_purge(ifq);
2861 }
2862
2863 int
if_detached_ioctl(struct ifnet * ifp,u_long a,caddr_t b)2864 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2865 {
2866 return ENODEV;
2867 }
2868
2869 static inline void
ifgroup_icref(struct ifg_group * ifg)2870 ifgroup_icref(struct ifg_group *ifg)
2871 {
2872 refcnt_take(&ifg->ifg_tmprefcnt);
2873 }
2874
2875 static inline void
ifgroup_icrele(struct ifg_group * ifg)2876 ifgroup_icrele(struct ifg_group *ifg)
2877 {
2878 if (refcnt_rele(&ifg->ifg_tmprefcnt) != 0)
2879 free(ifg, M_IFGROUP, sizeof(*ifg));
2880 }
2881
2882 /*
2883 * Create interface group without members
2884 */
2885 struct ifg_group *
if_creategroup(const char * groupname)2886 if_creategroup(const char *groupname)
2887 {
2888 struct ifg_group *ifg;
2889
2890 if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL)
2891 return (NULL);
2892
2893 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2894 ifg->ifg_refcnt = 1;
2895 ifg->ifg_carp_demoted = 0;
2896 TAILQ_INIT(&ifg->ifg_members);
2897 refcnt_init(&ifg->ifg_tmprefcnt);
2898 #if NPF > 0
2899 pfi_attach_ifgroup(ifg);
2900 #endif
2901 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2902
2903 return (ifg);
2904 }
2905
2906 /*
2907 * Add a group to an interface
2908 */
2909 int
if_addgroup(struct ifnet * ifp,const char * groupname)2910 if_addgroup(struct ifnet *ifp, const char *groupname)
2911 {
2912 struct ifg_list *ifgl;
2913 struct ifg_group *ifg = NULL;
2914 struct ifg_member *ifgm;
2915 size_t namelen;
2916
2917 namelen = strlen(groupname);
2918 if (namelen == 0 || namelen >= IFNAMSIZ ||
2919 (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2920 return (EINVAL);
2921
2922 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2923 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2924 return (EEXIST);
2925
2926 if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL)
2927 return (ENOMEM);
2928
2929 if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) {
2930 free(ifgl, M_IFGROUP, sizeof(*ifgl));
2931 return (ENOMEM);
2932 }
2933
2934 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2935 if (!strcmp(ifg->ifg_group, groupname))
2936 break;
2937
2938 if (ifg == NULL) {
2939 ifg = if_creategroup(groupname);
2940 if (ifg == NULL) {
2941 free(ifgl, M_IFGROUP, sizeof(*ifgl));
2942 free(ifgm, M_IFGROUP, sizeof(*ifgm));
2943 return (ENOMEM);
2944 }
2945 } else
2946 ifg->ifg_refcnt++;
2947 KASSERT(ifg->ifg_refcnt != 0);
2948
2949 ifgl->ifgl_group = ifg;
2950 ifgm->ifgm_ifp = ifp;
2951
2952 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2953 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2954
2955 #if NPF > 0
2956 pfi_group_addmember(groupname);
2957 #endif
2958
2959 return (0);
2960 }
2961
2962 /*
2963 * Remove a group from an interface
2964 */
2965 int
if_delgroup(struct ifnet * ifp,const char * groupname)2966 if_delgroup(struct ifnet *ifp, const char *groupname)
2967 {
2968 struct ifg_list *ifgl;
2969 struct ifg_member *ifgm;
2970
2971 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2972 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2973 break;
2974 if (ifgl == NULL)
2975 return (ENOENT);
2976
2977 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2978
2979 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2980 if (ifgm->ifgm_ifp == ifp)
2981 break;
2982
2983 if (ifgm != NULL) {
2984 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2985 free(ifgm, M_IFGROUP, sizeof(*ifgm));
2986 }
2987
2988 #if NPF > 0
2989 pfi_group_delmember(groupname);
2990 #endif
2991
2992 KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2993 if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2994 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2995 #if NPF > 0
2996 pfi_detach_ifgroup(ifgl->ifgl_group);
2997 #endif
2998 ifgroup_icrele(ifgl->ifgl_group);
2999 }
3000
3001 free(ifgl, M_IFGROUP, sizeof(*ifgl));
3002
3003 return (0);
3004 }
3005
3006 /*
3007 * Stores all groups from an interface in memory pointed
3008 * to by data
3009 */
3010 int
if_getgroup(caddr_t data,struct ifnet * ifp)3011 if_getgroup(caddr_t data, struct ifnet *ifp)
3012 {
3013 TAILQ_HEAD(, ifg_group) ifg_tmplist =
3014 TAILQ_HEAD_INITIALIZER(ifg_tmplist);
3015 struct ifg_list *ifgl;
3016 struct ifg_req ifgrq, *ifgp;
3017 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
3018 struct ifg_group *ifg;
3019 int len, error = 0;
3020
3021 if (ifgr->ifgr_len == 0) {
3022 NET_LOCK_SHARED();
3023 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
3024 ifgr->ifgr_len += sizeof(struct ifg_req);
3025 NET_UNLOCK_SHARED();
3026 return (0);
3027 }
3028
3029 len = ifgr->ifgr_len;
3030 ifgp = ifgr->ifgr_groups;
3031
3032 rw_enter_write(&if_tmplist_lock);
3033
3034 NET_LOCK_SHARED();
3035 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
3036 ifgroup_icref(ifgl->ifgl_group);
3037 TAILQ_INSERT_TAIL(&ifg_tmplist, ifgl->ifgl_group, ifg_tmplist);
3038 }
3039 NET_UNLOCK_SHARED();
3040
3041 TAILQ_FOREACH(ifg, &ifg_tmplist, ifg_tmplist) {
3042 if (len < sizeof(ifgrq)) {
3043 error = EINVAL;
3044 break;
3045 }
3046 bzero(&ifgrq, sizeof ifgrq);
3047 strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3048 sizeof(ifgrq.ifgrq_group));
3049 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3050 sizeof(struct ifg_req))))
3051 break;
3052 len -= sizeof(ifgrq);
3053 ifgp++;
3054 }
3055
3056 while ((ifg = TAILQ_FIRST(&ifg_tmplist))){
3057 TAILQ_REMOVE(&ifg_tmplist, ifg, ifg_tmplist);
3058 ifgroup_icrele(ifg);
3059 }
3060
3061 rw_exit_write(&if_tmplist_lock);
3062
3063 return (error);
3064 }
3065
3066 /*
3067 * Stores all members of a group in memory pointed to by data
3068 */
3069 int
if_getgroupmembers(caddr_t data)3070 if_getgroupmembers(caddr_t data)
3071 {
3072 TAILQ_HEAD(, ifnet) if_tmplist =
3073 TAILQ_HEAD_INITIALIZER(if_tmplist);
3074 struct ifnet *ifp;
3075 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
3076 struct ifg_group *ifg;
3077 struct ifg_member *ifgm;
3078 struct ifg_req ifgrq, *ifgp;
3079 int len, error = 0;
3080
3081 rw_enter_write(&if_tmplist_lock);
3082 NET_LOCK_SHARED();
3083
3084 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3085 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3086 break;
3087 if (ifg == NULL) {
3088 error = ENOENT;
3089 goto unlock;
3090 }
3091
3092 if (ifgr->ifgr_len == 0) {
3093 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3094 ifgr->ifgr_len += sizeof(ifgrq);
3095 goto unlock;
3096 }
3097
3098 TAILQ_FOREACH (ifgm, &ifg->ifg_members, ifgm_next) {
3099 if_ref(ifgm->ifgm_ifp);
3100 TAILQ_INSERT_TAIL(&if_tmplist, ifgm->ifgm_ifp, if_tmplist);
3101 }
3102 NET_UNLOCK_SHARED();
3103
3104 len = ifgr->ifgr_len;
3105 ifgp = ifgr->ifgr_groups;
3106
3107 TAILQ_FOREACH (ifp, &if_tmplist, if_tmplist) {
3108 if (len < sizeof(ifgrq)) {
3109 error = EINVAL;
3110 break;
3111 }
3112 bzero(&ifgrq, sizeof ifgrq);
3113 strlcpy(ifgrq.ifgrq_member, ifp->if_xname,
3114 sizeof(ifgrq.ifgrq_member));
3115 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3116 sizeof(struct ifg_req))))
3117 break;
3118 len -= sizeof(ifgrq);
3119 ifgp++;
3120 }
3121
3122 while ((ifp = TAILQ_FIRST(&if_tmplist))) {
3123 TAILQ_REMOVE(&if_tmplist, ifp, if_tmplist);
3124 if_put(ifp);
3125 }
3126 rw_exit_write(&if_tmplist_lock);
3127
3128 return (error);
3129
3130 unlock:
3131 NET_UNLOCK_SHARED();
3132 rw_exit_write(&if_tmplist_lock);
3133
3134 return (error);
3135 }
3136
3137 int
if_getgroupattribs(caddr_t data)3138 if_getgroupattribs(caddr_t data)
3139 {
3140 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
3141 struct ifg_group *ifg;
3142
3143 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3144 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3145 break;
3146 if (ifg == NULL)
3147 return (ENOENT);
3148
3149 ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
3150
3151 return (0);
3152 }
3153
3154 int
if_setgroupattribs(caddr_t data)3155 if_setgroupattribs(caddr_t data)
3156 {
3157 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
3158 struct ifg_group *ifg;
3159 struct ifg_member *ifgm;
3160 int demote;
3161
3162 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3163 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3164 break;
3165 if (ifg == NULL)
3166 return (ENOENT);
3167
3168 demote = ifgr->ifgr_attrib.ifg_carp_demoted;
3169 if (demote + ifg->ifg_carp_demoted > 0xff ||
3170 demote + ifg->ifg_carp_demoted < 0)
3171 return (EINVAL);
3172
3173 ifg->ifg_carp_demoted += demote;
3174
3175 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3176 ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3177
3178 return (0);
3179 }
3180
3181 /*
3182 * Stores all groups in memory pointed to by data
3183 */
3184 int
if_getgrouplist(caddr_t data)3185 if_getgrouplist(caddr_t data)
3186 {
3187 TAILQ_HEAD(, ifg_group) ifg_tmplist =
3188 TAILQ_HEAD_INITIALIZER(ifg_tmplist);
3189 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
3190 struct ifg_group *ifg;
3191 struct ifg_req ifgrq, *ifgp;
3192 int len, error = 0;
3193
3194 if (ifgr->ifgr_len == 0) {
3195 NET_LOCK_SHARED();
3196 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3197 ifgr->ifgr_len += sizeof(ifgrq);
3198 NET_UNLOCK_SHARED();
3199 return (0);
3200 }
3201
3202 len = ifgr->ifgr_len;
3203 ifgp = ifgr->ifgr_groups;
3204
3205 rw_enter_write(&if_tmplist_lock);
3206
3207 NET_LOCK_SHARED();
3208 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3209 ifgroup_icref(ifg);
3210 TAILQ_INSERT_TAIL(&ifg_tmplist, ifg, ifg_tmplist);
3211 }
3212 NET_UNLOCK_SHARED();
3213
3214 TAILQ_FOREACH(ifg, &ifg_tmplist, ifg_tmplist) {
3215 if (len < sizeof(ifgrq)) {
3216 error = EINVAL;
3217 break;
3218 }
3219 bzero(&ifgrq, sizeof ifgrq);
3220 strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3221 sizeof(ifgrq.ifgrq_group));
3222 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3223 sizeof(struct ifg_req))))
3224 break;
3225 len -= sizeof(ifgrq);
3226 ifgp++;
3227 }
3228
3229 while ((ifg = TAILQ_FIRST(&ifg_tmplist))){
3230 TAILQ_REMOVE(&ifg_tmplist, ifg, ifg_tmplist);
3231 ifgroup_icrele(ifg);
3232 }
3233
3234 rw_exit_write(&if_tmplist_lock);
3235
3236 return (error);
3237 }
3238
3239 void
if_group_routechange(const struct sockaddr * dst,const struct sockaddr * mask)3240 if_group_routechange(const struct sockaddr *dst, const struct sockaddr *mask)
3241 {
3242 switch (dst->sa_family) {
3243 case AF_INET:
3244 if (satosin_const(dst)->sin_addr.s_addr == INADDR_ANY &&
3245 mask && (mask->sa_len == 0 ||
3246 satosin_const(mask)->sin_addr.s_addr == INADDR_ANY))
3247 if_group_egress_build();
3248 break;
3249 #ifdef INET6
3250 case AF_INET6:
3251 if (IN6_ARE_ADDR_EQUAL(&(satosin6_const(dst))->sin6_addr,
3252 &in6addr_any) && mask && (mask->sa_len == 0 ||
3253 IN6_ARE_ADDR_EQUAL(&(satosin6_const(mask))->sin6_addr,
3254 &in6addr_any)))
3255 if_group_egress_build();
3256 break;
3257 #endif
3258 }
3259 }
3260
3261 int
if_group_egress_build(void)3262 if_group_egress_build(void)
3263 {
3264 struct ifnet *ifp;
3265 struct ifg_group *ifg;
3266 struct ifg_member *ifgm, *next;
3267 struct sockaddr_in sa_in;
3268 #ifdef INET6
3269 struct sockaddr_in6 sa_in6;
3270 #endif
3271 struct rtentry *rt;
3272
3273 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3274 if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3275 break;
3276
3277 if (ifg != NULL)
3278 TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3279 if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3280
3281 bzero(&sa_in, sizeof(sa_in));
3282 sa_in.sin_len = sizeof(sa_in);
3283 sa_in.sin_family = AF_INET;
3284 rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3285 while (rt != NULL) {
3286 ifp = if_get(rt->rt_ifidx);
3287 if (ifp != NULL) {
3288 if_addgroup(ifp, IFG_EGRESS);
3289 if_put(ifp);
3290 }
3291 rt = rtable_iterate(rt);
3292 }
3293
3294 #ifdef INET6
3295 bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3296 rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3297 RTP_ANY);
3298 while (rt != NULL) {
3299 ifp = if_get(rt->rt_ifidx);
3300 if (ifp != NULL) {
3301 if_addgroup(ifp, IFG_EGRESS);
3302 if_put(ifp);
3303 }
3304 rt = rtable_iterate(rt);
3305 }
3306 #endif /* INET6 */
3307
3308 return (0);
3309 }
3310
3311 /*
3312 * Set/clear promiscuous mode on interface ifp based on the truth value
3313 * of pswitch. The calls are reference counted so that only the first
3314 * "on" request actually has an effect, as does the final "off" request.
3315 * Results are undefined if the "off" and "on" requests are not matched.
3316 */
3317 int
ifpromisc(struct ifnet * ifp,int pswitch)3318 ifpromisc(struct ifnet *ifp, int pswitch)
3319 {
3320 struct ifreq ifr;
3321 unsigned short oif_flags;
3322 int oif_pcount, error;
3323
3324 NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3325
3326 oif_flags = ifp->if_flags;
3327 oif_pcount = ifp->if_pcount;
3328 if (pswitch) {
3329 if (ifp->if_pcount++ != 0)
3330 return (0);
3331 ifp->if_flags |= IFF_PROMISC;
3332 } else {
3333 if (--ifp->if_pcount > 0)
3334 return (0);
3335 ifp->if_flags &= ~IFF_PROMISC;
3336 }
3337
3338 if ((ifp->if_flags & IFF_UP) == 0)
3339 return (0);
3340
3341 memset(&ifr, 0, sizeof(ifr));
3342 ifr.ifr_flags = ifp->if_flags;
3343 error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3344 if (error) {
3345 ifp->if_flags = oif_flags;
3346 ifp->if_pcount = oif_pcount;
3347 }
3348
3349 return (error);
3350 }
3351
3352 /* Set/clear LRO flag and restart interface if needed. */
3353 int
ifsetlro(struct ifnet * ifp,int on)3354 ifsetlro(struct ifnet *ifp, int on)
3355 {
3356 struct ifreq ifrq;
3357 int error = 0;
3358 int s = splnet();
3359 struct if_parent parent;
3360
3361 memset(&parent, 0, sizeof(parent));
3362 if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) {
3363 struct ifnet *ifp0 = if_unit(parent.ifp_parent);
3364
3365 if (ifp0 != NULL) {
3366 ifsetlro(ifp0, on);
3367 if_put(ifp0);
3368 }
3369 }
3370
3371 if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3372 error = ENOTSUP;
3373 goto out;
3374 }
3375
3376 NET_ASSERT_LOCKED(); /* for ioctl */
3377 KERNEL_ASSERT_LOCKED(); /* for if_flags */
3378
3379 if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3380 if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) {
3381 error = EBUSY;
3382 goto out;
3383 }
3384 SET(ifp->if_xflags, IFXF_LRO);
3385 } else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3386 CLR(ifp->if_xflags, IFXF_LRO);
3387 else
3388 goto out;
3389
3390 /* restart interface */
3391 if (ISSET(ifp->if_flags, IFF_UP)) {
3392 /* go down for a moment... */
3393 CLR(ifp->if_flags, IFF_UP);
3394 ifrq.ifr_flags = ifp->if_flags;
3395 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3396
3397 /* ... and up again */
3398 SET(ifp->if_flags, IFF_UP);
3399 ifrq.ifr_flags = ifp->if_flags;
3400 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3401 }
3402 out:
3403 splx(s);
3404
3405 return error;
3406 }
3407
3408 void
ifa_add(struct ifnet * ifp,struct ifaddr * ifa)3409 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3410 {
3411 NET_ASSERT_LOCKED_EXCLUSIVE();
3412 TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3413 }
3414
3415 void
ifa_del(struct ifnet * ifp,struct ifaddr * ifa)3416 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3417 {
3418 NET_ASSERT_LOCKED_EXCLUSIVE();
3419 TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3420 }
3421
3422 void
ifa_update_broadaddr(struct ifnet * ifp,struct ifaddr * ifa,struct sockaddr * sa)3423 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3424 {
3425 if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3426 panic("ifa_update_broadaddr does not support dynamic length");
3427 bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3428 }
3429
3430 #ifdef DDB
3431 /* debug function, can be called from ddb> */
3432 void
ifa_print_all(void)3433 ifa_print_all(void)
3434 {
3435 struct ifnet *ifp;
3436 struct ifaddr *ifa;
3437
3438 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3439 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3440 char addr[INET6_ADDRSTRLEN];
3441
3442 switch (ifa->ifa_addr->sa_family) {
3443 case AF_INET:
3444 printf("%s", inet_ntop(AF_INET,
3445 &satosin(ifa->ifa_addr)->sin_addr,
3446 addr, sizeof(addr)));
3447 break;
3448 #ifdef INET6
3449 case AF_INET6:
3450 printf("%s", inet_ntop(AF_INET6,
3451 &(satosin6(ifa->ifa_addr))->sin6_addr,
3452 addr, sizeof(addr)));
3453 break;
3454 #endif
3455 }
3456 printf(" on %s\n", ifp->if_xname);
3457 }
3458 }
3459 }
3460 #endif /* DDB */
3461
3462 void
ifnewlladdr(struct ifnet * ifp)3463 ifnewlladdr(struct ifnet *ifp)
3464 {
3465 #ifdef INET6
3466 struct ifaddr *ifa;
3467 int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
3468 #endif
3469 struct ifreq ifrq;
3470 short up;
3471
3472 NET_ASSERT_LOCKED(); /* for ioctl and in6 */
3473 KERNEL_ASSERT_LOCKED(); /* for if_flags */
3474
3475 up = ifp->if_flags & IFF_UP;
3476
3477 if (up) {
3478 /* go down for a moment... */
3479 ifp->if_flags &= ~IFF_UP;
3480 ifrq.ifr_flags = ifp->if_flags;
3481 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3482 }
3483
3484 ifp->if_flags |= IFF_UP;
3485 ifrq.ifr_flags = ifp->if_flags;
3486 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3487
3488 #ifdef INET6
3489 /*
3490 * Update the link-local address. Don't do it if we're
3491 * a router to avoid confusing hosts on the network.
3492 */
3493 if (!i_am_router) {
3494 ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3495 if (ifa) {
3496 in6_purgeaddr(ifa);
3497 if_hooks_run(&ifp->if_addrhooks);
3498 in6_ifattach(ifp);
3499 }
3500 }
3501 #endif
3502 if (!up) {
3503 /* go back down */
3504 ifp->if_flags &= ~IFF_UP;
3505 ifrq.ifr_flags = ifp->if_flags;
3506 (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3507 }
3508 }
3509
3510 void
if_addrhook_add(struct ifnet * ifp,struct task * t)3511 if_addrhook_add(struct ifnet *ifp, struct task *t)
3512 {
3513 mtx_enter(&if_hooks_mtx);
3514 TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3515 mtx_leave(&if_hooks_mtx);
3516 }
3517
3518 void
if_addrhook_del(struct ifnet * ifp,struct task * t)3519 if_addrhook_del(struct ifnet *ifp, struct task *t)
3520 {
3521 mtx_enter(&if_hooks_mtx);
3522 TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3523 mtx_leave(&if_hooks_mtx);
3524 }
3525
3526 void
if_addrhooks_run(struct ifnet * ifp)3527 if_addrhooks_run(struct ifnet *ifp)
3528 {
3529 if_hooks_run(&ifp->if_addrhooks);
3530 }
3531
3532 void
if_rxr_init(struct if_rxring * rxr,u_int lwm,u_int hwm)3533 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3534 {
3535 extern int ticks;
3536
3537 memset(rxr, 0, sizeof(*rxr));
3538
3539 rxr->rxr_adjusted = ticks;
3540 rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3541 rxr->rxr_hwm = hwm;
3542 }
3543
3544 static inline void
if_rxr_adjust_cwm(struct if_rxring * rxr)3545 if_rxr_adjust_cwm(struct if_rxring *rxr)
3546 {
3547 extern int ticks;
3548
3549 if (rxr->rxr_alive >= rxr->rxr_lwm)
3550 return;
3551 else if (rxr->rxr_cwm < rxr->rxr_hwm)
3552 rxr->rxr_cwm++;
3553
3554 rxr->rxr_adjusted = ticks;
3555 }
3556
3557 void
if_rxr_livelocked(struct if_rxring * rxr)3558 if_rxr_livelocked(struct if_rxring *rxr)
3559 {
3560 extern int ticks;
3561
3562 if (ticks - rxr->rxr_adjusted >= 1) {
3563 if (rxr->rxr_cwm > rxr->rxr_lwm)
3564 rxr->rxr_cwm--;
3565
3566 rxr->rxr_adjusted = ticks;
3567 }
3568 }
3569
3570 u_int
if_rxr_get(struct if_rxring * rxr,u_int max)3571 if_rxr_get(struct if_rxring *rxr, u_int max)
3572 {
3573 extern int ticks;
3574 u_int diff;
3575
3576 if (ticks - rxr->rxr_adjusted >= 1) {
3577 /* we're free to try for an adjustment */
3578 if_rxr_adjust_cwm(rxr);
3579 }
3580
3581 if (rxr->rxr_alive >= rxr->rxr_cwm)
3582 return (0);
3583
3584 diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3585 rxr->rxr_alive += diff;
3586
3587 return (diff);
3588 }
3589
3590 int
if_rxr_info_ioctl(struct if_rxrinfo * uifri,u_int t,struct if_rxring_info * e)3591 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3592 {
3593 struct if_rxrinfo kifri;
3594 int error;
3595 u_int n;
3596
3597 error = copyin(uifri, &kifri, sizeof(kifri));
3598 if (error)
3599 return (error);
3600
3601 n = min(t, kifri.ifri_total);
3602 kifri.ifri_total = t;
3603
3604 if (n > 0) {
3605 error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3606 if (error)
3607 return (error);
3608 }
3609
3610 return (copyout(&kifri, uifri, sizeof(kifri)));
3611 }
3612
3613 int
if_rxr_ioctl(struct if_rxrinfo * ifri,const char * name,u_int size,struct if_rxring * rxr)3614 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3615 struct if_rxring *rxr)
3616 {
3617 struct if_rxring_info ifr;
3618
3619 memset(&ifr, 0, sizeof(ifr));
3620
3621 if (name != NULL)
3622 strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3623
3624 ifr.ifr_size = size;
3625 ifr.ifr_info = *rxr;
3626
3627 return (if_rxr_info_ioctl(ifri, 1, &ifr));
3628 }
3629
3630 /*
3631 * Network stack input queues.
3632 */
3633
3634 void
niq_init(struct niqueue * niq,u_int maxlen,u_int isr)3635 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3636 {
3637 mq_init(&niq->ni_q, maxlen, IPL_NET);
3638 niq->ni_isr = isr;
3639 }
3640
3641 int
niq_enqueue(struct niqueue * niq,struct mbuf * m)3642 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3643 {
3644 int rv;
3645
3646 rv = mq_enqueue(&niq->ni_q, m);
3647 if (rv == 0)
3648 schednetisr(niq->ni_isr);
3649 else
3650 if_congestion();
3651
3652 return (rv);
3653 }
3654
3655 int
niq_enlist(struct niqueue * niq,struct mbuf_list * ml)3656 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3657 {
3658 int rv;
3659
3660 rv = mq_enlist(&niq->ni_q, ml);
3661 if (rv == 0)
3662 schednetisr(niq->ni_isr);
3663 else
3664 if_congestion();
3665
3666 return (rv);
3667 }
3668
3669 __dead void
unhandled_af(int af)3670 unhandled_af(int af)
3671 {
3672 panic("unhandled af %d", af);
3673 }
3674
3675 struct taskq *
net_tq(unsigned int ifindex)3676 net_tq(unsigned int ifindex)
3677 {
3678 struct softnet *sn;
3679 static int nettaskqs;
3680
3681 if (nettaskqs == 0)
3682 nettaskqs = min(NET_TASKQ, ncpus);
3683
3684 sn = &softnets[ifindex % nettaskqs];
3685
3686 return (sn->sn_taskq);
3687 }
3688
3689 void
net_tq_barriers(const char * wmesg)3690 net_tq_barriers(const char *wmesg)
3691 {
3692 struct task barriers[NET_TASKQ];
3693 struct refcnt r = REFCNT_INITIALIZER();
3694 int i;
3695
3696 for (i = 0; i < nitems(barriers); i++) {
3697 task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
3698 refcnt_take(&r);
3699 task_add(softnets[i].sn_taskq, &barriers[i]);
3700 }
3701
3702 refcnt_finalize(&r, wmesg);
3703 }
3704