1 /* $NetBSD: if.c,v 1.529 2023/02/24 11:02:45 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by William Studenmund and Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * Copyright (c) 1980, 1986, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. Neither the name of the University nor the names of its contributors
74 * may be used to endorse or promote products derived from this software
75 * without specific prior written permission.
76 *
77 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
78 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
79 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87 * SUCH DAMAGE.
88 *
89 * @(#)if.c 8.5 (Berkeley) 1/9/95
90 */
91
92 #include <sys/cdefs.h>
93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.529 2023/02/24 11:02:45 riastradh Exp $");
94
95 #if defined(_KERNEL_OPT)
96 #include "opt_inet.h"
97 #include "opt_ipsec.h"
98 #include "opt_atalk.h"
99 #include "opt_wlan.h"
100 #include "opt_net_mpsafe.h"
101 #include "opt_mrouting.h"
102 #endif
103
104 #include <sys/param.h>
105 #include <sys/mbuf.h>
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/socket.h>
110 #include <sys/socketvar.h>
111 #include <sys/domain.h>
112 #include <sys/protosw.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/sysctl.h>
116 #include <sys/syslog.h>
117 #include <sys/kauth.h>
118 #include <sys/kmem.h>
119 #include <sys/xcall.h>
120 #include <sys/cpu.h>
121 #include <sys/intr.h>
122 #include <sys/module_hook.h>
123 #include <sys/compat_stub.h>
124 #include <sys/msan.h>
125 #include <sys/hook.h>
126
127 #include <net/if.h>
128 #include <net/if_dl.h>
129 #include <net/if_ether.h>
130 #include <net/if_media.h>
131 #include <net80211/ieee80211.h>
132 #include <net80211/ieee80211_ioctl.h>
133 #include <net/if_types.h>
134 #include <net/route.h>
135 #include <sys/module.h>
136 #ifdef NETATALK
137 #include <netatalk/at_extern.h>
138 #include <netatalk/at.h>
139 #endif
140 #include <net/pfil.h>
141 #include <netinet/in.h>
142 #include <netinet/in_var.h>
143 #include <netinet/ip_encap.h>
144 #include <net/bpf.h>
145
146 #ifdef INET6
147 #include <netinet6/in6_var.h>
148 #include <netinet6/nd6.h>
149 #endif
150
151 #include "ether.h"
152
153 #include "bridge.h"
154 #if NBRIDGE > 0
155 #include <net/if_bridgevar.h>
156 #endif
157
158 #include "carp.h"
159 #if NCARP > 0
160 #include <netinet/ip_carp.h>
161 #endif
162
163 #include <compat/sys/sockio.h>
164
165 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
166 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
167
168 /*
169 * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex
170 * for each ifnet. It doesn't matter because:
171 * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on
172 * ifq_lock don't happen
173 * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock
174 * because if_snd, if_link_state_change and if_link_state_change_process
175 * are all called with KERNEL_LOCK
176 */
177 #define IF_LINK_STATE_CHANGE_LOCK(ifp) \
178 mutex_enter((ifp)->if_snd.ifq_lock)
179 #define IF_LINK_STATE_CHANGE_UNLOCK(ifp) \
180 mutex_exit((ifp)->if_snd.ifq_lock)
181
182 /*
183 * Global list of interfaces.
184 */
185 /* DEPRECATED. Remove it once kvm(3) users disappeared */
186 struct ifnet_head ifnet_list;
187
188 struct pslist_head ifnet_pslist;
189 static ifnet_t ** ifindex2ifnet = NULL;
190 static u_int if_index = 1;
191 static size_t if_indexlim = 0;
192 static uint64_t index_gen;
193 /* Mutex to protect the above objects. */
194 kmutex_t ifnet_mtx __cacheline_aligned;
195 static struct psref_class *ifnet_psref_class __read_mostly;
196 static pserialize_t ifnet_psz;
197 static struct workqueue *ifnet_link_state_wq __read_mostly;
198
199 static struct workqueue *if_slowtimo_wq __read_mostly;
200
201 static kmutex_t if_clone_mtx;
202
203 struct ifnet *lo0ifp;
204 int ifqmaxlen = IFQ_MAXLEN;
205
206 struct psref_class *ifa_psref_class __read_mostly;
207
208 static int if_delroute_matcher(struct rtentry *, void *);
209
210 static bool if_is_unit(const char *);
211 static struct if_clone *if_clone_lookup(const char *, int *);
212
213 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
214 static int if_cloners_count;
215
216 /* Packet filtering hook for interfaces. */
217 pfil_head_t * if_pfil __read_mostly;
218
219 static kauth_listener_t if_listener;
220
221 static int doifioctl(struct socket *, u_long, void *, struct lwp *);
222 static void sysctl_sndq_setup(struct sysctllog **, const char *,
223 struct ifaltq *);
224 static void if_slowtimo_intr(void *);
225 static void if_slowtimo_work(struct work *, void *);
226 static int sysctl_if_watchdog(SYSCTLFN_PROTO);
227 static void sysctl_watchdog_setup(struct ifnet *);
228 static void if_attachdomain1(struct ifnet *);
229 static int ifconf(u_long, void *);
230 static int if_transmit(struct ifnet *, struct mbuf *);
231 static int if_clone_create(const char *);
232 static int if_clone_destroy(const char *);
233 static void if_link_state_change_work(struct work *, void *);
234 static void if_up_locked(struct ifnet *);
235 static void _if_down(struct ifnet *);
236 static void if_down_deactivated(struct ifnet *);
237
238 struct if_percpuq {
239 struct ifnet *ipq_ifp;
240 void *ipq_si;
241 struct percpu *ipq_ifqs; /* struct ifqueue */
242 };
243
244 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);
245
246 static void if_percpuq_drops(void *, void *, struct cpu_info *);
247 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO);
248 static void sysctl_percpuq_setup(struct sysctllog **, const char *,
249 struct if_percpuq *);
250
251 struct if_deferred_start {
252 struct ifnet *ids_ifp;
253 void (*ids_if_start)(struct ifnet *);
254 void *ids_si;
255 };
256
257 static void if_deferred_start_softint(void *);
258 static void if_deferred_start_common(struct ifnet *);
259 static void if_deferred_start_destroy(struct ifnet *);
260
261 struct if_slowtimo_data {
262 kmutex_t isd_lock;
263 struct callout isd_ch;
264 struct work isd_work;
265 struct ifnet *isd_ifp;
266 bool isd_queued;
267 bool isd_dying;
268 bool isd_trigger;
269 };
270
271 /*
272 * Hook for if_vlan - needed by if_agr
273 */
274 struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook;
275
276 static void if_sysctl_setup(struct sysctllog **);
277
278 static int
if_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)279 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
280 void *arg0, void *arg1, void *arg2, void *arg3)
281 {
282 int result;
283 enum kauth_network_req req;
284
285 result = KAUTH_RESULT_DEFER;
286 req = (enum kauth_network_req)(uintptr_t)arg1;
287
288 if (action != KAUTH_NETWORK_INTERFACE)
289 return result;
290
291 if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) ||
292 (req == KAUTH_REQ_NETWORK_INTERFACE_SET))
293 result = KAUTH_RESULT_ALLOW;
294
295 return result;
296 }
297
298 /*
299 * Network interface utility routines.
300 *
301 * Routines with ifa_ifwith* names take sockaddr *'s as
302 * parameters.
303 */
304 void
ifinit(void)305 ifinit(void)
306 {
307
308 #if (defined(INET) || defined(INET6))
309 encapinit();
310 #endif
311
312 if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
313 if_listener_cb, NULL);
314
315 /* interfaces are available, inform socket code */
316 ifioctl = doifioctl;
317 }
318
319 /*
320 * XXX Initialization before configure().
321 * XXX hack to get pfil_add_hook working in autoconf.
322 */
323 void
ifinit1(void)324 ifinit1(void)
325 {
326 int error __diagused;
327
328 #ifdef NET_MPSAFE
329 printf("NET_MPSAFE enabled\n");
330 #endif
331
332 mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE);
333
334 TAILQ_INIT(&ifnet_list);
335 mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE);
336 ifnet_psz = pserialize_create();
337 ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET);
338 ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET);
339 error = workqueue_create(&ifnet_link_state_wq, "iflnkst",
340 if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET,
341 WQ_MPSAFE);
342 KASSERT(error == 0);
343 PSLIST_INIT(&ifnet_pslist);
344
345 error = workqueue_create(&if_slowtimo_wq, "ifwdog",
346 if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, WQ_MPSAFE);
347 KASSERTMSG(error == 0, "error=%d", error);
348
349 if_indexlim = 8;
350
351 if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
352 KASSERT(if_pfil != NULL);
353
354 #if NETHER > 0 || defined(NETATALK) || defined(WLAN)
355 etherinit();
356 #endif
357 }
358
359 /* XXX must be after domaininit() */
360 void
ifinit_post(void)361 ifinit_post(void)
362 {
363
364 if_sysctl_setup(NULL);
365 }
366
367 ifnet_t *
if_alloc(u_char type)368 if_alloc(u_char type)
369 {
370
371 return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP);
372 }
373
374 void
if_free(ifnet_t * ifp)375 if_free(ifnet_t *ifp)
376 {
377
378 kmem_free(ifp, sizeof(ifnet_t));
379 }
380
381 void
if_initname(struct ifnet * ifp,const char * name,int unit)382 if_initname(struct ifnet *ifp, const char *name, int unit)
383 {
384
385 (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname),
386 "%s%d", name, unit);
387 }
388
389 /*
390 * Null routines used while an interface is going away. These routines
391 * just return an error.
392 */
393
394 int
if_nulloutput(struct ifnet * ifp,struct mbuf * m,const struct sockaddr * so,const struct rtentry * rt)395 if_nulloutput(struct ifnet *ifp, struct mbuf *m,
396 const struct sockaddr *so, const struct rtentry *rt)
397 {
398
399 return ENXIO;
400 }
401
402 void
if_nullinput(struct ifnet * ifp,struct mbuf * m)403 if_nullinput(struct ifnet *ifp, struct mbuf *m)
404 {
405
406 /* Nothing. */
407 }
408
409 void
if_nullstart(struct ifnet * ifp)410 if_nullstart(struct ifnet *ifp)
411 {
412
413 /* Nothing. */
414 }
415
416 int
if_nulltransmit(struct ifnet * ifp,struct mbuf * m)417 if_nulltransmit(struct ifnet *ifp, struct mbuf *m)
418 {
419
420 m_freem(m);
421 return ENXIO;
422 }
423
424 int
if_nullioctl(struct ifnet * ifp,u_long cmd,void * data)425 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
426 {
427
428 return ENXIO;
429 }
430
431 int
if_nullinit(struct ifnet * ifp)432 if_nullinit(struct ifnet *ifp)
433 {
434
435 return ENXIO;
436 }
437
438 void
if_nullstop(struct ifnet * ifp,int disable)439 if_nullstop(struct ifnet *ifp, int disable)
440 {
441
442 /* Nothing. */
443 }
444
445 void
if_nullslowtimo(struct ifnet * ifp)446 if_nullslowtimo(struct ifnet *ifp)
447 {
448
449 /* Nothing. */
450 }
451
452 void
if_nulldrain(struct ifnet * ifp)453 if_nulldrain(struct ifnet *ifp)
454 {
455
456 /* Nothing. */
457 }
458
459 void
if_set_sadl(struct ifnet * ifp,const void * lla,u_char addrlen,bool factory)460 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory)
461 {
462 struct ifaddr *ifa;
463 struct sockaddr_dl *sdl;
464
465 ifp->if_addrlen = addrlen;
466 if_alloc_sadl(ifp);
467 ifa = ifp->if_dl;
468 sdl = satosdl(ifa->ifa_addr);
469
470 (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen);
471 if (factory) {
472 KASSERT(ifp->if_hwdl == NULL);
473 ifp->if_hwdl = ifp->if_dl;
474 ifaref(ifp->if_hwdl);
475 }
476 /* TBD routing socket */
477 }
478
479 struct ifaddr *
if_dl_create(const struct ifnet * ifp,const struct sockaddr_dl ** sdlp)480 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp)
481 {
482 unsigned socksize, ifasize;
483 int addrlen, namelen;
484 struct sockaddr_dl *mask, *sdl;
485 struct ifaddr *ifa;
486
487 namelen = strlen(ifp->if_xname);
488 addrlen = ifp->if_addrlen;
489 socksize = roundup(sockaddr_dl_measure(namelen, addrlen),
490 sizeof(long));
491 ifasize = sizeof(*ifa) + 2 * socksize;
492 ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
493
494 sdl = (struct sockaddr_dl *)(ifa + 1);
495 mask = (struct sockaddr_dl *)(socksize + (char *)sdl);
496
497 sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
498 ifp->if_xname, namelen, NULL, addrlen);
499 mask->sdl_family = AF_LINK;
500 mask->sdl_len = sockaddr_dl_measure(namelen, 0);
501 memset(&mask->sdl_data[0], 0xff, namelen);
502 ifa->ifa_rtrequest = link_rtrequest;
503 ifa->ifa_addr = (struct sockaddr *)sdl;
504 ifa->ifa_netmask = (struct sockaddr *)mask;
505 ifa_psref_init(ifa);
506
507 *sdlp = sdl;
508
509 return ifa;
510 }
511
512 static void
if_sadl_setrefs(struct ifnet * ifp,struct ifaddr * ifa)513 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa)
514 {
515 const struct sockaddr_dl *sdl;
516
517 ifp->if_dl = ifa;
518 ifaref(ifa);
519 sdl = satosdl(ifa->ifa_addr);
520 ifp->if_sadl = sdl;
521 }
522
523 /*
524 * Allocate the link level name for the specified interface. This
525 * is an attachment helper. It must be called after ifp->if_addrlen
526 * is initialized, which may not be the case when if_attach() is
527 * called.
528 */
529 void
if_alloc_sadl(struct ifnet * ifp)530 if_alloc_sadl(struct ifnet *ifp)
531 {
532 struct ifaddr *ifa;
533 const struct sockaddr_dl *sdl;
534
535 /*
536 * If the interface already has a link name, release it
537 * now. This is useful for interfaces that can change
538 * link types, and thus switch link names often.
539 */
540 if (ifp->if_sadl != NULL)
541 if_free_sadl(ifp, 0);
542
543 ifa = if_dl_create(ifp, &sdl);
544
545 ifa_insert(ifp, ifa);
546 if_sadl_setrefs(ifp, ifa);
547 }
548
549 static void
if_deactivate_sadl(struct ifnet * ifp)550 if_deactivate_sadl(struct ifnet *ifp)
551 {
552 struct ifaddr *ifa;
553
554 KASSERT(ifp->if_dl != NULL);
555
556 ifa = ifp->if_dl;
557
558 ifp->if_sadl = NULL;
559
560 ifp->if_dl = NULL;
561 ifafree(ifa);
562 }
563
564 static void
if_replace_sadl(struct ifnet * ifp,struct ifaddr * ifa)565 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa)
566 {
567 struct ifaddr *old;
568
569 KASSERT(ifp->if_dl != NULL);
570
571 old = ifp->if_dl;
572
573 ifaref(ifa);
574 /* XXX Update if_dl and if_sadl atomically */
575 ifp->if_dl = ifa;
576 ifp->if_sadl = satosdl(ifa->ifa_addr);
577
578 ifafree(old);
579 }
580
581 void
if_activate_sadl(struct ifnet * ifp,struct ifaddr * ifa0,const struct sockaddr_dl * sdl)582 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0,
583 const struct sockaddr_dl *sdl)
584 {
585 struct ifaddr *ifa;
586 const int bound = curlwp_bind();
587
588 KASSERT(ifa_held(ifa0));
589
590 const int s = splsoftnet();
591
592 if_replace_sadl(ifp, ifa0);
593
594 int ss = pserialize_read_enter();
595 IFADDR_READER_FOREACH(ifa, ifp) {
596 struct psref psref;
597 ifa_acquire(ifa, &psref);
598 pserialize_read_exit(ss);
599
600 rtinit(ifa, RTM_LLINFO_UPD, 0);
601
602 ss = pserialize_read_enter();
603 ifa_release(ifa, &psref);
604 }
605 pserialize_read_exit(ss);
606
607 splx(s);
608 curlwp_bindx(bound);
609 }
610
611 /*
612 * Free the link level name for the specified interface. This is
613 * a detach helper. This is called from if_detach().
614 */
615 void
if_free_sadl(struct ifnet * ifp,int factory)616 if_free_sadl(struct ifnet *ifp, int factory)
617 {
618 struct ifaddr *ifa;
619
620 if (factory && ifp->if_hwdl != NULL) {
621 ifa = ifp->if_hwdl;
622 ifp->if_hwdl = NULL;
623 ifafree(ifa);
624 }
625
626 ifa = ifp->if_dl;
627 if (ifa == NULL) {
628 KASSERT(ifp->if_sadl == NULL);
629 return;
630 }
631
632 KASSERT(ifp->if_sadl != NULL);
633
634 const int s = splsoftnet();
635 KASSERT(ifa->ifa_addr->sa_family == AF_LINK);
636 ifa_remove(ifp, ifa);
637 if_deactivate_sadl(ifp);
638 splx(s);
639 }
640
641 static void
if_getindex(ifnet_t * ifp)642 if_getindex(ifnet_t *ifp)
643 {
644 bool hitlimit = false;
645 char xnamebuf[HOOKNAMSIZ];
646
647 ifp->if_index_gen = index_gen++;
648 snprintf(xnamebuf, sizeof(xnamebuf), "%s-lshk", ifp->if_xname);
649 ifp->if_linkstate_hooks = simplehook_create(IPL_NET,
650 xnamebuf);
651
652 ifp->if_index = if_index;
653 if (ifindex2ifnet == NULL) {
654 if_index++;
655 goto skip;
656 }
657 while (if_byindex(ifp->if_index)) {
658 /*
659 * If we hit USHRT_MAX, we skip back to 0 since
660 * there are a number of places where the value
661 * of if_index or if_index itself is compared
662 * to or stored in an unsigned short. By
663 * jumping back, we won't botch those assignments
664 * or comparisons.
665 */
666 if (++if_index == 0) {
667 if_index = 1;
668 } else if (if_index == USHRT_MAX) {
669 /*
670 * However, if we have to jump back to
671 * zero *twice* without finding an empty
672 * slot in ifindex2ifnet[], then there
673 * there are too many (>65535) interfaces.
674 */
675 if (hitlimit)
676 panic("too many interfaces");
677 hitlimit = true;
678 if_index = 1;
679 }
680 ifp->if_index = if_index;
681 }
682 skip:
683 /*
684 * ifindex2ifnet is indexed by if_index. Since if_index will
685 * grow dynamically, it should grow too.
686 */
687 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
688 size_t m, n, oldlim;
689 void *q;
690
691 oldlim = if_indexlim;
692 while (ifp->if_index >= if_indexlim)
693 if_indexlim <<= 1;
694
695 /* grow ifindex2ifnet */
696 m = oldlim * sizeof(struct ifnet *);
697 n = if_indexlim * sizeof(struct ifnet *);
698 q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
699 if (ifindex2ifnet != NULL) {
700 memcpy(q, ifindex2ifnet, m);
701 free(ifindex2ifnet, M_IFADDR);
702 }
703 ifindex2ifnet = (struct ifnet **)q;
704 }
705 ifindex2ifnet[ifp->if_index] = ifp;
706 }
707
708 /*
709 * Initialize an interface and assign an index for it.
710 *
711 * It must be called prior to a device specific attach routine
712 * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
713 * and be followed by if_register:
714 *
715 * if_initialize(ifp);
716 * ether_ifattach(ifp, enaddr);
717 * if_register(ifp);
718 */
719 void
if_initialize(ifnet_t * ifp)720 if_initialize(ifnet_t *ifp)
721 {
722
723 KASSERT(if_indexlim > 0);
724 TAILQ_INIT(&ifp->if_addrlist);
725
726 /*
727 * Link level name is allocated later by a separate call to
728 * if_alloc_sadl().
729 */
730
731 if (ifp->if_snd.ifq_maxlen == 0)
732 ifp->if_snd.ifq_maxlen = ifqmaxlen;
733
734 ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
735
736 ifp->if_link_state = LINK_STATE_UNKNOWN;
737 ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
738 ifp->if_link_scheduled = false;
739
740 ifp->if_capenable = 0;
741 ifp->if_csum_flags_tx = 0;
742 ifp->if_csum_flags_rx = 0;
743
744 #ifdef ALTQ
745 ifp->if_snd.altq_type = 0;
746 ifp->if_snd.altq_disc = NULL;
747 ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
748 ifp->if_snd.altq_tbr = NULL;
749 ifp->if_snd.altq_ifp = ifp;
750 #endif
751
752 IFQ_LOCK_INIT(&ifp->if_snd);
753
754 ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp);
755 pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp);
756
757 IF_AFDATA_LOCK_INIT(ifp);
758
759 PSLIST_ENTRY_INIT(ifp, if_pslist_entry);
760 PSLIST_INIT(&ifp->if_addr_pslist);
761 psref_target_init(&ifp->if_psref, ifnet_psref_class);
762 ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
763 LIST_INIT(&ifp->if_multiaddrs);
764 if_stats_init(ifp);
765
766 IFNET_GLOBAL_LOCK();
767 if_getindex(ifp);
768 IFNET_GLOBAL_UNLOCK();
769 }
770
771 /*
772 * Register an interface to the list of "active" interfaces.
773 */
774 void
if_register(ifnet_t * ifp)775 if_register(ifnet_t *ifp)
776 {
777 /*
778 * If the driver has not supplied its own if_ioctl or if_stop,
779 * then supply the default.
780 */
781 if (ifp->if_ioctl == NULL)
782 ifp->if_ioctl = ifioctl_common;
783 if (ifp->if_stop == NULL)
784 ifp->if_stop = if_nullstop;
785
786 sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd);
787
788 if (!STAILQ_EMPTY(&domains))
789 if_attachdomain1(ifp);
790
791 /* Announce the interface. */
792 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
793
794 if (ifp->if_slowtimo != NULL) {
795 struct if_slowtimo_data *isd;
796
797 isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
798 mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
799 callout_init(&isd->isd_ch, CALLOUT_MPSAFE);
800 callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
801 isd->isd_ifp = ifp;
802
803 ifp->if_slowtimo_data = isd;
804
805 if_slowtimo_intr(ifp);
806
807 sysctl_watchdog_setup(ifp);
808 }
809
810 if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
811 ifp->if_transmit = if_transmit;
812
813 IFNET_GLOBAL_LOCK();
814 TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list);
815 IFNET_WRITER_INSERT_TAIL(ifp);
816 IFNET_GLOBAL_UNLOCK();
817 }
818
819 /*
820 * The if_percpuq framework
821 *
822 * It allows network device drivers to execute the network stack
823 * in softint (so called softint-based if_input). It utilizes
824 * softint and percpu ifqueue. It doesn't distribute any packets
825 * between CPUs, unlike pktqueue(9).
826 *
827 * Currently we support two options for device drivers to apply the framework:
828 * - Use it implicitly with less changes
829 * - If you use if_attach in driver's _attach function and if_input in
830 * driver's Rx interrupt handler, a packet is queued and a softint handles
831 * the packet implicitly
832 * - Use it explicitly in each driver (recommended)
833 * - You can use if_percpuq_* directly in your driver
834 * - In this case, you need to allocate struct if_percpuq in driver's softc
835 * - See wm(4) as a reference implementation
836 */
837
838 static void
if_percpuq_softint(void * arg)839 if_percpuq_softint(void *arg)
840 {
841 struct if_percpuq *ipq = arg;
842 struct ifnet *ifp = ipq->ipq_ifp;
843 struct mbuf *m;
844
845 while ((m = if_percpuq_dequeue(ipq)) != NULL) {
846 if_statinc(ifp, if_ipackets);
847 bpf_mtap(ifp, m, BPF_D_IN);
848
849 ifp->_if_input(ifp, m);
850 }
851 }
852
853 static void
if_percpuq_init_ifq(void * p,void * arg __unused,struct cpu_info * ci __unused)854 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
855 {
856 struct ifqueue *const ifq = p;
857
858 memset(ifq, 0, sizeof(*ifq));
859 ifq->ifq_maxlen = IFQ_MAXLEN;
860 }
861
862 struct if_percpuq *
if_percpuq_create(struct ifnet * ifp)863 if_percpuq_create(struct ifnet *ifp)
864 {
865 struct if_percpuq *ipq;
866 u_int flags = SOFTINT_NET;
867
868 flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
869
870 ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP);
871 ipq->ipq_ifp = ifp;
872 ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq);
873 ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue));
874 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL);
875
876 sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq);
877
878 return ipq;
879 }
880
881 static struct mbuf *
if_percpuq_dequeue(struct if_percpuq * ipq)882 if_percpuq_dequeue(struct if_percpuq *ipq)
883 {
884 struct mbuf *m;
885 struct ifqueue *ifq;
886
887 const int s = splnet();
888 ifq = percpu_getref(ipq->ipq_ifqs);
889 IF_DEQUEUE(ifq, m);
890 percpu_putref(ipq->ipq_ifqs);
891 splx(s);
892
893 return m;
894 }
895
896 static void
if_percpuq_purge_ifq(void * p,void * arg __unused,struct cpu_info * ci __unused)897 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
898 {
899 struct ifqueue *const ifq = p;
900
901 IF_PURGE(ifq);
902 }
903
904 void
if_percpuq_destroy(struct if_percpuq * ipq)905 if_percpuq_destroy(struct if_percpuq *ipq)
906 {
907
908 /* if_detach may already destroy it */
909 if (ipq == NULL)
910 return;
911
912 softint_disestablish(ipq->ipq_si);
913 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL);
914 percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue));
915 kmem_free(ipq, sizeof(*ipq));
916 }
917
918 void
if_percpuq_enqueue(struct if_percpuq * ipq,struct mbuf * m)919 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m)
920 {
921 struct ifqueue *ifq;
922
923 KASSERT(ipq != NULL);
924
925 const int s = splnet();
926 ifq = percpu_getref(ipq->ipq_ifqs);
927 if (IF_QFULL(ifq)) {
928 IF_DROP(ifq);
929 percpu_putref(ipq->ipq_ifqs);
930 m_freem(m);
931 goto out;
932 }
933 IF_ENQUEUE(ifq, m);
934 percpu_putref(ipq->ipq_ifqs);
935
936 softint_schedule(ipq->ipq_si);
937 out:
938 splx(s);
939 }
940
941 static void
if_percpuq_drops(void * p,void * arg,struct cpu_info * ci __unused)942 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused)
943 {
944 struct ifqueue *const ifq = p;
945 uint64_t *sum = arg;
946
947 *sum += ifq->ifq_drops;
948 }
949
950 static int
sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)951 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)
952 {
953 struct sysctlnode node;
954 struct if_percpuq *ipq;
955 uint64_t sum = 0;
956 int error;
957
958 node = *rnode;
959 ipq = node.sysctl_data;
960
961 percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum);
962
963 node.sysctl_data = ∑
964 error = sysctl_lookup(SYSCTLFN_CALL(&node));
965 if (error != 0 || newp == NULL)
966 return error;
967
968 return 0;
969 }
970
971 static void
sysctl_percpuq_setup(struct sysctllog ** clog,const char * ifname,struct if_percpuq * ipq)972 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname,
973 struct if_percpuq *ipq)
974 {
975 const struct sysctlnode *cnode, *rnode;
976
977 if (sysctl_createv(clog, 0, NULL, &rnode,
978 CTLFLAG_PERMANENT,
979 CTLTYPE_NODE, "interfaces",
980 SYSCTL_DESCR("Per-interface controls"),
981 NULL, 0, NULL, 0,
982 CTL_NET, CTL_CREATE, CTL_EOL) != 0)
983 goto bad;
984
985 if (sysctl_createv(clog, 0, &rnode, &rnode,
986 CTLFLAG_PERMANENT,
987 CTLTYPE_NODE, ifname,
988 SYSCTL_DESCR("Interface controls"),
989 NULL, 0, NULL, 0,
990 CTL_CREATE, CTL_EOL) != 0)
991 goto bad;
992
993 if (sysctl_createv(clog, 0, &rnode, &rnode,
994 CTLFLAG_PERMANENT,
995 CTLTYPE_NODE, "rcvq",
996 SYSCTL_DESCR("Interface input queue controls"),
997 NULL, 0, NULL, 0,
998 CTL_CREATE, CTL_EOL) != 0)
999 goto bad;
1000
1001 #ifdef NOTYET
1002 /* XXX Should show each per-CPU queue length? */
1003 if (sysctl_createv(clog, 0, &rnode, &rnode,
1004 CTLFLAG_PERMANENT,
1005 CTLTYPE_INT, "len",
1006 SYSCTL_DESCR("Current input queue length"),
1007 sysctl_percpuq_len, 0, NULL, 0,
1008 CTL_CREATE, CTL_EOL) != 0)
1009 goto bad;
1010
1011 if (sysctl_createv(clog, 0, &rnode, &cnode,
1012 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1013 CTLTYPE_INT, "maxlen",
1014 SYSCTL_DESCR("Maximum allowed input queue length"),
1015 sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0,
1016 CTL_CREATE, CTL_EOL) != 0)
1017 goto bad;
1018 #endif
1019
1020 if (sysctl_createv(clog, 0, &rnode, &cnode,
1021 CTLFLAG_PERMANENT,
1022 CTLTYPE_QUAD, "drops",
1023 SYSCTL_DESCR("Total packets dropped due to full input queue"),
1024 sysctl_percpuq_drops_handler, 0, (void *)ipq, 0,
1025 CTL_CREATE, CTL_EOL) != 0)
1026 goto bad;
1027
1028 return;
1029 bad:
1030 printf("%s: could not attach sysctl nodes\n", ifname);
1031 return;
1032 }
1033
1034 /*
1035 * The deferred if_start framework
1036 *
1037 * The common APIs to defer if_start to softint when if_start is requested
1038 * from a device driver running in hardware interrupt context.
1039 */
1040 /*
1041 * Call ifp->if_start (or equivalent) in a dedicated softint for
1042 * deferred if_start.
1043 */
1044 static void
if_deferred_start_softint(void * arg)1045 if_deferred_start_softint(void *arg)
1046 {
1047 struct if_deferred_start *ids = arg;
1048 struct ifnet *ifp = ids->ids_ifp;
1049
1050 ids->ids_if_start(ifp);
1051 }
1052
1053 /*
1054 * The default callback function for deferred if_start.
1055 */
1056 static void
if_deferred_start_common(struct ifnet * ifp)1057 if_deferred_start_common(struct ifnet *ifp)
1058 {
1059 const int s = splnet();
1060 if_start_lock(ifp);
1061 splx(s);
1062 }
1063
1064 static inline bool
if_snd_is_used(struct ifnet * ifp)1065 if_snd_is_used(struct ifnet *ifp)
1066 {
1067
1068 return ALTQ_IS_ENABLED(&ifp->if_snd) ||
1069 ifp->if_transmit == if_transmit ||
1070 ifp->if_transmit == NULL ||
1071 ifp->if_transmit == if_nulltransmit;
1072 }
1073
1074 /*
1075 * Schedule deferred if_start.
1076 */
1077 void
if_schedule_deferred_start(struct ifnet * ifp)1078 if_schedule_deferred_start(struct ifnet *ifp)
1079 {
1080
1081 KASSERT(ifp->if_deferred_start != NULL);
1082
1083 if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd))
1084 return;
1085
1086 softint_schedule(ifp->if_deferred_start->ids_si);
1087 }
1088
1089 /*
1090 * Create an instance of deferred if_start. A driver should call the function
1091 * only if the driver needs deferred if_start. Drivers can setup their own
1092 * deferred if_start function via 2nd argument.
1093 */
1094 void
if_deferred_start_init(struct ifnet * ifp,void (* func)(struct ifnet *))1095 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *))
1096 {
1097 struct if_deferred_start *ids;
1098 u_int flags = SOFTINT_NET;
1099
1100 flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
1101
1102 ids = kmem_zalloc(sizeof(*ids), KM_SLEEP);
1103 ids->ids_ifp = ifp;
1104 ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids);
1105 if (func != NULL)
1106 ids->ids_if_start = func;
1107 else
1108 ids->ids_if_start = if_deferred_start_common;
1109
1110 ifp->if_deferred_start = ids;
1111 }
1112
1113 static void
if_deferred_start_destroy(struct ifnet * ifp)1114 if_deferred_start_destroy(struct ifnet *ifp)
1115 {
1116
1117 if (ifp->if_deferred_start == NULL)
1118 return;
1119
1120 softint_disestablish(ifp->if_deferred_start->ids_si);
1121 kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start));
1122 ifp->if_deferred_start = NULL;
1123 }
1124
1125 /*
1126 * The common interface input routine that is called by device drivers,
1127 * which should be used only when the driver's rx handler already runs
1128 * in softint.
1129 */
1130 void
if_input(struct ifnet * ifp,struct mbuf * m)1131 if_input(struct ifnet *ifp, struct mbuf *m)
1132 {
1133
1134 KASSERT(ifp->if_percpuq == NULL);
1135 KASSERT(!cpu_intr_p());
1136
1137 if_statinc(ifp, if_ipackets);
1138 bpf_mtap(ifp, m, BPF_D_IN);
1139
1140 ifp->_if_input(ifp, m);
1141 }
1142
1143 /*
1144 * DEPRECATED. Use if_initialize and if_register instead.
1145 * See the above comment of if_initialize.
1146 *
1147 * Note that it implicitly enables if_percpuq to make drivers easy to
1148 * migrate softint-based if_input without much changes. If you don't
1149 * want to enable it, use if_initialize instead.
1150 */
1151 void
if_attach(ifnet_t * ifp)1152 if_attach(ifnet_t *ifp)
1153 {
1154
1155 if_initialize(ifp);
1156 ifp->if_percpuq = if_percpuq_create(ifp);
1157 if_register(ifp);
1158 }
1159
1160 void
if_attachdomain(void)1161 if_attachdomain(void)
1162 {
1163 struct ifnet *ifp;
1164 const int bound = curlwp_bind();
1165
1166 int s = pserialize_read_enter();
1167 IFNET_READER_FOREACH(ifp) {
1168 struct psref psref;
1169 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
1170 pserialize_read_exit(s);
1171 if_attachdomain1(ifp);
1172 s = pserialize_read_enter();
1173 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
1174 }
1175 pserialize_read_exit(s);
1176 curlwp_bindx(bound);
1177 }
1178
1179 static void
if_attachdomain1(struct ifnet * ifp)1180 if_attachdomain1(struct ifnet *ifp)
1181 {
1182 struct domain *dp;
1183 const int s = splsoftnet();
1184
1185 /* address family dependent data region */
1186 memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
1187 DOMAIN_FOREACH(dp) {
1188 if (dp->dom_ifattach != NULL)
1189 ifp->if_afdata[dp->dom_family] =
1190 (*dp->dom_ifattach)(ifp);
1191 }
1192
1193 splx(s);
1194 }
1195
1196 /*
1197 * Deactivate an interface. This points all of the procedure
1198 * handles at error stubs. May be called from interrupt context.
1199 */
1200 void
if_deactivate(struct ifnet * ifp)1201 if_deactivate(struct ifnet *ifp)
1202 {
1203 const int s = splsoftnet();
1204
1205 ifp->if_output = if_nulloutput;
1206 ifp->_if_input = if_nullinput;
1207 ifp->if_start = if_nullstart;
1208 ifp->if_transmit = if_nulltransmit;
1209 ifp->if_ioctl = if_nullioctl;
1210 ifp->if_init = if_nullinit;
1211 ifp->if_stop = if_nullstop;
1212 if (ifp->if_slowtimo)
1213 ifp->if_slowtimo = if_nullslowtimo;
1214 ifp->if_drain = if_nulldrain;
1215
1216 /* No more packets may be enqueued. */
1217 ifp->if_snd.ifq_maxlen = 0;
1218
1219 splx(s);
1220 }
1221
1222 bool
if_is_deactivated(const struct ifnet * ifp)1223 if_is_deactivated(const struct ifnet *ifp)
1224 {
1225
1226 return ifp->if_output == if_nulloutput;
1227 }
1228
1229 void
if_purgeaddrs(struct ifnet * ifp,int family,void (* purgeaddr)(struct ifaddr *))1230 if_purgeaddrs(struct ifnet *ifp, int family,
1231 void (*purgeaddr)(struct ifaddr *))
1232 {
1233 struct ifaddr *ifa, *nifa;
1234 int s;
1235
1236 s = pserialize_read_enter();
1237 for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) {
1238 nifa = IFADDR_READER_NEXT(ifa);
1239 if (ifa->ifa_addr->sa_family != family)
1240 continue;
1241 pserialize_read_exit(s);
1242
1243 (*purgeaddr)(ifa);
1244
1245 s = pserialize_read_enter();
1246 }
1247 pserialize_read_exit(s);
1248 }
1249
1250 #ifdef IFAREF_DEBUG
1251 static struct ifaddr **ifa_list;
1252 static int ifa_list_size;
1253
1254 /* Depends on only one if_attach runs at once */
1255 static void
if_build_ifa_list(struct ifnet * ifp)1256 if_build_ifa_list(struct ifnet *ifp)
1257 {
1258 struct ifaddr *ifa;
1259 int i;
1260
1261 KASSERT(ifa_list == NULL);
1262 KASSERT(ifa_list_size == 0);
1263
1264 IFADDR_READER_FOREACH(ifa, ifp)
1265 ifa_list_size++;
1266
1267 ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP);
1268 i = 0;
1269 IFADDR_READER_FOREACH(ifa, ifp) {
1270 ifa_list[i++] = ifa;
1271 ifaref(ifa);
1272 }
1273 }
1274
1275 static void
if_check_and_free_ifa_list(struct ifnet * ifp)1276 if_check_and_free_ifa_list(struct ifnet *ifp)
1277 {
1278 int i;
1279 struct ifaddr *ifa;
1280
1281 if (ifa_list == NULL)
1282 return;
1283
1284 for (i = 0; i < ifa_list_size; i++) {
1285 char buf[64];
1286
1287 ifa = ifa_list[i];
1288 sockaddr_format(ifa->ifa_addr, buf, sizeof(buf));
1289 if (ifa->ifa_refcnt > 1) {
1290 log(LOG_WARNING,
1291 "ifa(%s) still referenced (refcnt=%d)\n",
1292 buf, ifa->ifa_refcnt - 1);
1293 } else
1294 log(LOG_DEBUG,
1295 "ifa(%s) not referenced (refcnt=%d)\n",
1296 buf, ifa->ifa_refcnt - 1);
1297 ifafree(ifa);
1298 }
1299
1300 kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size);
1301 ifa_list = NULL;
1302 ifa_list_size = 0;
1303 }
1304 #endif
1305
1306 /*
1307 * Detach an interface from the list of "active" interfaces,
1308 * freeing any resources as we go along.
1309 *
1310 * NOTE: This routine must be called with a valid thread context,
1311 * as it may block.
1312 */
1313 void
if_detach(struct ifnet * ifp)1314 if_detach(struct ifnet *ifp)
1315 {
1316 struct socket so;
1317 struct ifaddr *ifa;
1318 #ifdef IFAREF_DEBUG
1319 struct ifaddr *last_ifa = NULL;
1320 #endif
1321 struct domain *dp;
1322 const struct protosw *pr;
1323 int i, family, purged;
1324
1325 #ifdef IFAREF_DEBUG
1326 if_build_ifa_list(ifp);
1327 #endif
1328 /*
1329 * XXX It's kind of lame that we have to have the
1330 * XXX socket structure...
1331 */
1332 memset(&so, 0, sizeof(so));
1333
1334 const int s = splnet();
1335
1336 sysctl_teardown(&ifp->if_sysctl_log);
1337
1338 IFNET_LOCK(ifp);
1339
1340 /*
1341 * Unset all queued link states and pretend a
1342 * link state change is scheduled.
1343 * This stops any more link state changes occurring for this
1344 * interface while it's being detached so it's safe
1345 * to drain the workqueue.
1346 */
1347 IF_LINK_STATE_CHANGE_LOCK(ifp);
1348 ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
1349 ifp->if_link_scheduled = true;
1350 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
1351 workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work);
1352
1353 if_deactivate(ifp);
1354 IFNET_UNLOCK(ifp);
1355
1356 /*
1357 * Unlink from the list and wait for all readers to leave
1358 * from pserialize read sections. Note that we can't do
1359 * psref_target_destroy here. See below.
1360 */
1361 IFNET_GLOBAL_LOCK();
1362 ifindex2ifnet[ifp->if_index] = NULL;
1363 TAILQ_REMOVE(&ifnet_list, ifp, if_list);
1364 IFNET_WRITER_REMOVE(ifp);
1365 pserialize_perform(ifnet_psz);
1366 IFNET_GLOBAL_UNLOCK();
1367
1368 if (ifp->if_slowtimo != NULL) {
1369 struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
1370
1371 mutex_enter(&isd->isd_lock);
1372 isd->isd_dying = true;
1373 mutex_exit(&isd->isd_lock);
1374 callout_halt(&isd->isd_ch, NULL);
1375 workqueue_wait(if_slowtimo_wq, &isd->isd_work);
1376 callout_destroy(&isd->isd_ch);
1377 mutex_destroy(&isd->isd_lock);
1378 kmem_free(isd, sizeof(*isd));
1379
1380 ifp->if_slowtimo_data = NULL; /* paraonia */
1381 ifp->if_slowtimo = NULL; /* paranoia */
1382 }
1383 if_deferred_start_destroy(ifp);
1384
1385 /*
1386 * Do an if_down() to give protocols a chance to do something.
1387 */
1388 if_down_deactivated(ifp);
1389
1390 #ifdef ALTQ
1391 if (ALTQ_IS_ENABLED(&ifp->if_snd))
1392 altq_disable(&ifp->if_snd);
1393 if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1394 altq_detach(&ifp->if_snd);
1395 #endif
1396
1397 #if NCARP > 0
1398 /* Remove the interface from any carp group it is a part of. */
1399 if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
1400 carp_ifdetach(ifp);
1401 #endif
1402
1403 /*
1404 * Ensure that all packets on protocol input pktqueues have been
1405 * processed, or, at least, removed from the queues.
1406 *
1407 * A cross-call will ensure that the interrupts have completed.
1408 * FIXME: not quite..
1409 */
1410 pktq_ifdetach();
1411 xc_barrier(0);
1412
1413 /*
1414 * Rip all the addresses off the interface. This should make
1415 * all of the routes go away.
1416 *
1417 * pr_usrreq calls can remove an arbitrary number of ifaddrs
1418 * from the list, including our "cursor", ifa. For safety,
1419 * and to honor the TAILQ abstraction, I just restart the
1420 * loop after each removal. Note that the loop will exit
1421 * when all of the remaining ifaddrs belong to the AF_LINK
1422 * family. I am counting on the historical fact that at
1423 * least one pr_usrreq in each address domain removes at
1424 * least one ifaddr.
1425 */
1426 again:
1427 /*
1428 * At this point, no other one tries to remove ifa in the list,
1429 * so we don't need to take a lock or psref. Avoid using
1430 * IFADDR_READER_FOREACH to pass over an inspection of contract
1431 * violations of pserialize.
1432 */
1433 IFADDR_WRITER_FOREACH(ifa, ifp) {
1434 family = ifa->ifa_addr->sa_family;
1435 #ifdef IFAREF_DEBUG
1436 printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
1437 ifa, family, ifa->ifa_refcnt);
1438 if (last_ifa != NULL && ifa == last_ifa)
1439 panic("if_detach: loop detected");
1440 last_ifa = ifa;
1441 #endif
1442 if (family == AF_LINK)
1443 continue;
1444 dp = pffinddomain(family);
1445 KASSERTMSG(dp != NULL, "no domain for AF %d", family);
1446 /*
1447 * XXX These PURGEIF calls are redundant with the
1448 * purge-all-families calls below, but are left in for
1449 * now both to make a smaller change, and to avoid
1450 * unplanned interactions with clearing of
1451 * ifp->if_addrlist.
1452 */
1453 purged = 0;
1454 for (pr = dp->dom_protosw;
1455 pr < dp->dom_protoswNPROTOSW; pr++) {
1456 so.so_proto = pr;
1457 if (pr->pr_usrreqs) {
1458 (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1459 purged = 1;
1460 }
1461 }
1462 if (purged == 0) {
1463 /*
1464 * XXX What's really the best thing to do
1465 * XXX here? --thorpej@NetBSD.org
1466 */
1467 printf("if_detach: WARNING: AF %d not purged\n",
1468 family);
1469 ifa_remove(ifp, ifa);
1470 }
1471 goto again;
1472 }
1473
1474 if_free_sadl(ifp, 1);
1475
1476 restart:
1477 IFADDR_WRITER_FOREACH(ifa, ifp) {
1478 family = ifa->ifa_addr->sa_family;
1479 KASSERT(family == AF_LINK);
1480 ifa_remove(ifp, ifa);
1481 goto restart;
1482 }
1483
1484 /* Delete stray routes from the routing table. */
1485 for (i = 0; i <= AF_MAX; i++)
1486 rt_delete_matched_entries(i, if_delroute_matcher, ifp, false);
1487
1488 DOMAIN_FOREACH(dp) {
1489 if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
1490 {
1491 void *p = ifp->if_afdata[dp->dom_family];
1492 if (p) {
1493 ifp->if_afdata[dp->dom_family] = NULL;
1494 (*dp->dom_ifdetach)(ifp, p);
1495 }
1496 }
1497
1498 /*
1499 * One would expect multicast memberships (INET and
1500 * INET6) on UDP sockets to be purged by the PURGEIF
1501 * calls above, but if all addresses were removed from
1502 * the interface prior to destruction, the calls will
1503 * not be made (e.g. ppp, for which pppd(8) generally
1504 * removes addresses before destroying the interface).
1505 * Because there is no invariant that multicast
1506 * memberships only exist for interfaces with IPv4
1507 * addresses, we must call PURGEIF regardless of
1508 * addresses. (Protocols which might store ifnet
1509 * pointers are marked with PR_PURGEIF.)
1510 */
1511 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
1512 {
1513 so.so_proto = pr;
1514 if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF)
1515 (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1516 }
1517 }
1518
1519 /*
1520 * Must be done after the above pr_purgeif because if_psref may be
1521 * still used in pr_purgeif.
1522 */
1523 psref_target_destroy(&ifp->if_psref, ifnet_psref_class);
1524 PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry);
1525
1526 pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
1527 (void)pfil_head_destroy(ifp->if_pfil);
1528
1529 /* Announce that the interface is gone. */
1530 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1531
1532 IF_AFDATA_LOCK_DESTROY(ifp);
1533
1534 if (ifp->if_percpuq != NULL) {
1535 if_percpuq_destroy(ifp->if_percpuq);
1536 ifp->if_percpuq = NULL;
1537 }
1538
1539 mutex_obj_free(ifp->if_ioctl_lock);
1540 ifp->if_ioctl_lock = NULL;
1541 mutex_obj_free(ifp->if_snd.ifq_lock);
1542 if_stats_fini(ifp);
1543 KASSERT(!simplehook_has_hooks(ifp->if_linkstate_hooks));
1544 simplehook_destroy(ifp->if_linkstate_hooks);
1545
1546 splx(s);
1547
1548 #ifdef IFAREF_DEBUG
1549 if_check_and_free_ifa_list(ifp);
1550 #endif
1551 }
1552
1553 /*
1554 * Callback for a radix tree walk to delete all references to an
1555 * ifnet.
1556 */
1557 static int
if_delroute_matcher(struct rtentry * rt,void * v)1558 if_delroute_matcher(struct rtentry *rt, void *v)
1559 {
1560 struct ifnet *ifp = (struct ifnet *)v;
1561
1562 if (rt->rt_ifp == ifp)
1563 return 1;
1564 else
1565 return 0;
1566 }
1567
1568 /*
1569 * Create a clone network interface.
1570 */
1571 static int
if_clone_create(const char * name)1572 if_clone_create(const char *name)
1573 {
1574 struct if_clone *ifc;
1575 struct ifnet *ifp;
1576 struct psref psref;
1577 int unit;
1578
1579 KASSERT(mutex_owned(&if_clone_mtx));
1580
1581 ifc = if_clone_lookup(name, &unit);
1582 if (ifc == NULL)
1583 return EINVAL;
1584
1585 ifp = if_get(name, &psref);
1586 if (ifp != NULL) {
1587 if_put(ifp, &psref);
1588 return EEXIST;
1589 }
1590
1591 return (*ifc->ifc_create)(ifc, unit);
1592 }
1593
1594 /*
1595 * Destroy a clone network interface.
1596 */
1597 static int
if_clone_destroy(const char * name)1598 if_clone_destroy(const char *name)
1599 {
1600 struct if_clone *ifc;
1601 struct ifnet *ifp;
1602 struct psref psref;
1603 int error;
1604 int (*if_ioctlfn)(struct ifnet *, u_long, void *);
1605
1606 KASSERT(mutex_owned(&if_clone_mtx));
1607
1608 ifc = if_clone_lookup(name, NULL);
1609 if (ifc == NULL)
1610 return EINVAL;
1611
1612 if (ifc->ifc_destroy == NULL)
1613 return EOPNOTSUPP;
1614
1615 ifp = if_get(name, &psref);
1616 if (ifp == NULL)
1617 return ENXIO;
1618
1619 /* We have to disable ioctls here */
1620 IFNET_LOCK(ifp);
1621 if_ioctlfn = ifp->if_ioctl;
1622 ifp->if_ioctl = if_nullioctl;
1623 IFNET_UNLOCK(ifp);
1624
1625 /*
1626 * We cannot call ifc_destroy with holding ifp.
1627 * Releasing ifp here is safe thanks to if_clone_mtx.
1628 */
1629 if_put(ifp, &psref);
1630
1631 error = (*ifc->ifc_destroy)(ifp);
1632
1633 if (error != 0) {
1634 /* We have to restore if_ioctl on error */
1635 IFNET_LOCK(ifp);
1636 ifp->if_ioctl = if_ioctlfn;
1637 IFNET_UNLOCK(ifp);
1638 }
1639
1640 return error;
1641 }
1642
1643 static bool
if_is_unit(const char * name)1644 if_is_unit(const char *name)
1645 {
1646
1647 while (*name != '\0') {
1648 if (*name < '0' || *name > '9')
1649 return false;
1650 name++;
1651 }
1652
1653 return true;
1654 }
1655
1656 /*
1657 * Look up a network interface cloner.
1658 */
1659 static struct if_clone *
if_clone_lookup(const char * name,int * unitp)1660 if_clone_lookup(const char *name, int *unitp)
1661 {
1662 struct if_clone *ifc;
1663 const char *cp;
1664 char *dp, ifname[IFNAMSIZ + 3];
1665 int unit;
1666
1667 KASSERT(mutex_owned(&if_clone_mtx));
1668
1669 strcpy(ifname, "if_");
1670 /* separate interface name from unit */
1671 /* TODO: search unit number from backward */
1672 for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
1673 *cp && !if_is_unit(cp);)
1674 *dp++ = *cp++;
1675
1676 if (cp == name || cp - name == IFNAMSIZ || !*cp)
1677 return NULL; /* No name or unit number */
1678 *dp++ = '\0';
1679
1680 again:
1681 LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1682 if (strcmp(ifname + 3, ifc->ifc_name) == 0)
1683 break;
1684 }
1685
1686 if (ifc == NULL) {
1687 int error;
1688 if (*ifname == '\0')
1689 return NULL;
1690 mutex_exit(&if_clone_mtx);
1691 error = module_autoload(ifname, MODULE_CLASS_DRIVER);
1692 mutex_enter(&if_clone_mtx);
1693 if (error)
1694 return NULL;
1695 *ifname = '\0';
1696 goto again;
1697 }
1698
1699 unit = 0;
1700 while (cp - name < IFNAMSIZ && *cp) {
1701 if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) {
1702 /* Bogus unit number. */
1703 return NULL;
1704 }
1705 unit = (unit * 10) + (*cp++ - '0');
1706 }
1707
1708 if (unitp != NULL)
1709 *unitp = unit;
1710 return ifc;
1711 }
1712
1713 /*
1714 * Register a network interface cloner.
1715 */
1716 void
if_clone_attach(struct if_clone * ifc)1717 if_clone_attach(struct if_clone *ifc)
1718 {
1719
1720 mutex_enter(&if_clone_mtx);
1721 LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1722 if_cloners_count++;
1723 mutex_exit(&if_clone_mtx);
1724 }
1725
1726 /*
1727 * Unregister a network interface cloner.
1728 */
1729 void
if_clone_detach(struct if_clone * ifc)1730 if_clone_detach(struct if_clone *ifc)
1731 {
1732
1733 mutex_enter(&if_clone_mtx);
1734 LIST_REMOVE(ifc, ifc_list);
1735 if_cloners_count--;
1736 mutex_exit(&if_clone_mtx);
1737 }
1738
1739 /*
1740 * Provide list of interface cloners to userspace.
1741 */
1742 int
if_clone_list(int buf_count,char * buffer,int * total)1743 if_clone_list(int buf_count, char *buffer, int *total)
1744 {
1745 char outbuf[IFNAMSIZ], *dst;
1746 struct if_clone *ifc;
1747 int count, error = 0;
1748
1749 mutex_enter(&if_clone_mtx);
1750 *total = if_cloners_count;
1751 if ((dst = buffer) == NULL) {
1752 /* Just asking how many there are. */
1753 goto out;
1754 }
1755
1756 if (buf_count < 0) {
1757 error = EINVAL;
1758 goto out;
1759 }
1760
1761 count = (if_cloners_count < buf_count) ? if_cloners_count : buf_count;
1762
1763 for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
1764 ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
1765 (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
1766 if (outbuf[sizeof(outbuf) - 1] != '\0') {
1767 error = ENAMETOOLONG;
1768 goto out;
1769 }
1770 error = copyout(outbuf, dst, sizeof(outbuf));
1771 if (error != 0)
1772 break;
1773 }
1774
1775 out:
1776 mutex_exit(&if_clone_mtx);
1777 return error;
1778 }
1779
1780 void
ifa_psref_init(struct ifaddr * ifa)1781 ifa_psref_init(struct ifaddr *ifa)
1782 {
1783
1784 psref_target_init(&ifa->ifa_psref, ifa_psref_class);
1785 }
1786
1787 void
ifaref(struct ifaddr * ifa)1788 ifaref(struct ifaddr *ifa)
1789 {
1790
1791 atomic_inc_uint(&ifa->ifa_refcnt);
1792 }
1793
1794 void
ifafree(struct ifaddr * ifa)1795 ifafree(struct ifaddr *ifa)
1796 {
1797 KASSERT(ifa != NULL);
1798 KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt);
1799
1800 membar_release();
1801 if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0)
1802 return;
1803 membar_acquire();
1804 free(ifa, M_IFADDR);
1805 }
1806
1807 bool
ifa_is_destroying(struct ifaddr * ifa)1808 ifa_is_destroying(struct ifaddr *ifa)
1809 {
1810
1811 return ISSET(ifa->ifa_flags, IFA_DESTROYING);
1812 }
1813
1814 void
ifa_insert(struct ifnet * ifp,struct ifaddr * ifa)1815 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa)
1816 {
1817
1818 ifa->ifa_ifp = ifp;
1819
1820 /*
1821 * Check MP-safety for IFEF_MPSAFE drivers.
1822 * Check !IFF_RUNNING for initialization routines that normally don't
1823 * take IFNET_LOCK but it's safe because there is no competitor.
1824 * XXX there are false positive cases because IFF_RUNNING can be off on
1825 * if_stop.
1826 */
1827 KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) ||
1828 IFNET_LOCKED(ifp));
1829
1830 TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
1831 IFADDR_ENTRY_INIT(ifa);
1832 IFADDR_WRITER_INSERT_TAIL(ifp, ifa);
1833
1834 ifaref(ifa);
1835 }
1836
1837 void
ifa_remove(struct ifnet * ifp,struct ifaddr * ifa)1838 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa)
1839 {
1840
1841 KASSERT(ifa->ifa_ifp == ifp);
1842 /*
1843 * Check MP-safety for IFEF_MPSAFE drivers.
1844 * if_is_deactivated indicates ifa_remove is called from if_detach
1845 * where it is safe even if IFNET_LOCK isn't held.
1846 */
1847 KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) ||
1848 IFNET_LOCKED(ifp));
1849
1850 TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
1851 IFADDR_WRITER_REMOVE(ifa);
1852 #ifdef NET_MPSAFE
1853 IFNET_GLOBAL_LOCK();
1854 pserialize_perform(ifnet_psz);
1855 IFNET_GLOBAL_UNLOCK();
1856 #endif
1857
1858 #ifdef NET_MPSAFE
1859 psref_target_destroy(&ifa->ifa_psref, ifa_psref_class);
1860 #endif
1861 IFADDR_ENTRY_DESTROY(ifa);
1862 ifafree(ifa);
1863 }
1864
1865 void
ifa_acquire(struct ifaddr * ifa,struct psref * psref)1866 ifa_acquire(struct ifaddr *ifa, struct psref *psref)
1867 {
1868
1869 PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
1870 psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class);
1871 }
1872
1873 void
ifa_release(struct ifaddr * ifa,struct psref * psref)1874 ifa_release(struct ifaddr *ifa, struct psref *psref)
1875 {
1876
1877 if (ifa == NULL)
1878 return;
1879
1880 psref_release(psref, &ifa->ifa_psref, ifa_psref_class);
1881 }
1882
1883 bool
ifa_held(struct ifaddr * ifa)1884 ifa_held(struct ifaddr *ifa)
1885 {
1886
1887 return psref_held(&ifa->ifa_psref, ifa_psref_class);
1888 }
1889
1890 static inline int
equal(const struct sockaddr * sa1,const struct sockaddr * sa2)1891 equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
1892 {
1893
1894 return sockaddr_cmp(sa1, sa2) == 0;
1895 }
1896
1897 /*
1898 * Locate an interface based on a complete address.
1899 */
1900 /*ARGSUSED*/
1901 struct ifaddr *
ifa_ifwithaddr(const struct sockaddr * addr)1902 ifa_ifwithaddr(const struct sockaddr *addr)
1903 {
1904 struct ifnet *ifp;
1905 struct ifaddr *ifa;
1906
1907 IFNET_READER_FOREACH(ifp) {
1908 if (if_is_deactivated(ifp))
1909 continue;
1910 IFADDR_READER_FOREACH(ifa, ifp) {
1911 if (ifa->ifa_addr->sa_family != addr->sa_family)
1912 continue;
1913 if (equal(addr, ifa->ifa_addr))
1914 return ifa;
1915 if ((ifp->if_flags & IFF_BROADCAST) &&
1916 ifa->ifa_broadaddr &&
1917 /* IP6 doesn't have broadcast */
1918 ifa->ifa_broadaddr->sa_len != 0 &&
1919 equal(ifa->ifa_broadaddr, addr))
1920 return ifa;
1921 }
1922 }
1923 return NULL;
1924 }
1925
1926 struct ifaddr *
ifa_ifwithaddr_psref(const struct sockaddr * addr,struct psref * psref)1927 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref)
1928 {
1929 struct ifaddr *ifa;
1930 int s = pserialize_read_enter();
1931
1932 ifa = ifa_ifwithaddr(addr);
1933 if (ifa != NULL)
1934 ifa_acquire(ifa, psref);
1935 pserialize_read_exit(s);
1936
1937 return ifa;
1938 }
1939
1940 /*
1941 * Locate the point to point interface with a given destination address.
1942 */
1943 /*ARGSUSED*/
1944 struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr * addr)1945 ifa_ifwithdstaddr(const struct sockaddr *addr)
1946 {
1947 struct ifnet *ifp;
1948 struct ifaddr *ifa;
1949
1950 IFNET_READER_FOREACH(ifp) {
1951 if (if_is_deactivated(ifp))
1952 continue;
1953 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1954 continue;
1955 IFADDR_READER_FOREACH(ifa, ifp) {
1956 if (ifa->ifa_addr->sa_family != addr->sa_family ||
1957 ifa->ifa_dstaddr == NULL)
1958 continue;
1959 if (equal(addr, ifa->ifa_dstaddr))
1960 return ifa;
1961 }
1962 }
1963
1964 return NULL;
1965 }
1966
1967 struct ifaddr *
ifa_ifwithdstaddr_psref(const struct sockaddr * addr,struct psref * psref)1968 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref)
1969 {
1970 struct ifaddr *ifa;
1971 int s;
1972
1973 s = pserialize_read_enter();
1974 ifa = ifa_ifwithdstaddr(addr);
1975 if (ifa != NULL)
1976 ifa_acquire(ifa, psref);
1977 pserialize_read_exit(s);
1978
1979 return ifa;
1980 }
1981
1982 /*
1983 * Find an interface on a specific network. If many, choice
1984 * is most specific found.
1985 */
1986 struct ifaddr *
ifa_ifwithnet(const struct sockaddr * addr)1987 ifa_ifwithnet(const struct sockaddr *addr)
1988 {
1989 struct ifnet *ifp;
1990 struct ifaddr *ifa, *ifa_maybe = NULL;
1991 const struct sockaddr_dl *sdl;
1992 u_int af = addr->sa_family;
1993 const char *addr_data = addr->sa_data, *cplim;
1994
1995 if (af == AF_LINK) {
1996 sdl = satocsdl(addr);
1997 if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
1998 ifindex2ifnet[sdl->sdl_index] &&
1999 !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) {
2000 return ifindex2ifnet[sdl->sdl_index]->if_dl;
2001 }
2002 }
2003 #ifdef NETATALK
2004 if (af == AF_APPLETALK) {
2005 const struct sockaddr_at *sat, *sat2;
2006 sat = (const struct sockaddr_at *)addr;
2007 IFNET_READER_FOREACH(ifp) {
2008 if (if_is_deactivated(ifp))
2009 continue;
2010 ifa = at_ifawithnet((const struct sockaddr_at *)addr,
2011 ifp);
2012 if (ifa == NULL)
2013 continue;
2014 sat2 = (struct sockaddr_at *)ifa->ifa_addr;
2015 if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
2016 return ifa; /* exact match */
2017 if (ifa_maybe == NULL) {
2018 /* else keep the if with the right range */
2019 ifa_maybe = ifa;
2020 }
2021 }
2022 return ifa_maybe;
2023 }
2024 #endif
2025 IFNET_READER_FOREACH(ifp) {
2026 if (if_is_deactivated(ifp))
2027 continue;
2028 IFADDR_READER_FOREACH(ifa, ifp) {
2029 const char *cp, *cp2, *cp3;
2030
2031 if (ifa->ifa_addr->sa_family != af ||
2032 ifa->ifa_netmask == NULL)
2033 next: continue;
2034 cp = addr_data;
2035 cp2 = ifa->ifa_addr->sa_data;
2036 cp3 = ifa->ifa_netmask->sa_data;
2037 cplim = (const char *)ifa->ifa_netmask +
2038 ifa->ifa_netmask->sa_len;
2039 while (cp3 < cplim) {
2040 if ((*cp++ ^ *cp2++) & *cp3++) {
2041 /* want to continue for() loop */
2042 goto next;
2043 }
2044 }
2045 if (ifa_maybe == NULL ||
2046 rt_refines(ifa->ifa_netmask,
2047 ifa_maybe->ifa_netmask))
2048 ifa_maybe = ifa;
2049 }
2050 }
2051 return ifa_maybe;
2052 }
2053
2054 struct ifaddr *
ifa_ifwithnet_psref(const struct sockaddr * addr,struct psref * psref)2055 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref)
2056 {
2057 struct ifaddr *ifa;
2058 int s;
2059
2060 s = pserialize_read_enter();
2061 ifa = ifa_ifwithnet(addr);
2062 if (ifa != NULL)
2063 ifa_acquire(ifa, psref);
2064 pserialize_read_exit(s);
2065
2066 return ifa;
2067 }
2068
2069 /*
2070 * Find the interface of the address.
2071 */
2072 struct ifaddr *
ifa_ifwithladdr(const struct sockaddr * addr)2073 ifa_ifwithladdr(const struct sockaddr *addr)
2074 {
2075 struct ifaddr *ia;
2076
2077 if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
2078 (ia = ifa_ifwithnet(addr)))
2079 return ia;
2080 return NULL;
2081 }
2082
2083 struct ifaddr *
ifa_ifwithladdr_psref(const struct sockaddr * addr,struct psref * psref)2084 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref)
2085 {
2086 struct ifaddr *ifa;
2087 int s;
2088
2089 s = pserialize_read_enter();
2090 ifa = ifa_ifwithladdr(addr);
2091 if (ifa != NULL)
2092 ifa_acquire(ifa, psref);
2093 pserialize_read_exit(s);
2094
2095 return ifa;
2096 }
2097
2098 /*
2099 * Find an interface using a specific address family
2100 */
2101 struct ifaddr *
ifa_ifwithaf(int af)2102 ifa_ifwithaf(int af)
2103 {
2104 struct ifnet *ifp;
2105 struct ifaddr *ifa = NULL;
2106 int s;
2107
2108 s = pserialize_read_enter();
2109 IFNET_READER_FOREACH(ifp) {
2110 if (if_is_deactivated(ifp))
2111 continue;
2112 IFADDR_READER_FOREACH(ifa, ifp) {
2113 if (ifa->ifa_addr->sa_family == af)
2114 goto out;
2115 }
2116 }
2117 out:
2118 pserialize_read_exit(s);
2119 return ifa;
2120 }
2121
2122 /*
2123 * Find an interface address specific to an interface best matching
2124 * a given address.
2125 */
2126 struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr * addr,struct ifnet * ifp)2127 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2128 {
2129 struct ifaddr *ifa;
2130 const char *cp, *cp2, *cp3;
2131 const char *cplim;
2132 struct ifaddr *ifa_maybe = 0;
2133 u_int af = addr->sa_family;
2134
2135 if (if_is_deactivated(ifp))
2136 return NULL;
2137
2138 if (af >= AF_MAX)
2139 return NULL;
2140
2141 IFADDR_READER_FOREACH(ifa, ifp) {
2142 if (ifa->ifa_addr->sa_family != af)
2143 continue;
2144 ifa_maybe = ifa;
2145 if (ifa->ifa_netmask == NULL) {
2146 if (equal(addr, ifa->ifa_addr) ||
2147 (ifa->ifa_dstaddr &&
2148 equal(addr, ifa->ifa_dstaddr)))
2149 return ifa;
2150 continue;
2151 }
2152 cp = addr->sa_data;
2153 cp2 = ifa->ifa_addr->sa_data;
2154 cp3 = ifa->ifa_netmask->sa_data;
2155 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2156 for (; cp3 < cplim; cp3++) {
2157 if ((*cp++ ^ *cp2++) & *cp3)
2158 break;
2159 }
2160 if (cp3 == cplim)
2161 return ifa;
2162 }
2163 return ifa_maybe;
2164 }
2165
2166 struct ifaddr *
ifaof_ifpforaddr_psref(const struct sockaddr * addr,struct ifnet * ifp,struct psref * psref)2167 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp,
2168 struct psref *psref)
2169 {
2170 struct ifaddr *ifa;
2171 int s;
2172
2173 s = pserialize_read_enter();
2174 ifa = ifaof_ifpforaddr(addr, ifp);
2175 if (ifa != NULL)
2176 ifa_acquire(ifa, psref);
2177 pserialize_read_exit(s);
2178
2179 return ifa;
2180 }
2181
2182 /*
2183 * Default action when installing a route with a Link Level gateway.
2184 * Lookup an appropriate real ifa to point to.
2185 * This should be moved to /sys/net/link.c eventually.
2186 */
2187 void
link_rtrequest(int cmd,struct rtentry * rt,const struct rt_addrinfo * info)2188 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info)
2189 {
2190 struct ifaddr *ifa;
2191 const struct sockaddr *dst;
2192 struct ifnet *ifp;
2193 struct psref psref;
2194
2195 if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2196 return;
2197 ifp = rt->rt_ifa->ifa_ifp;
2198 dst = rt_getkey(rt);
2199 if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) {
2200 rt_replace_ifa(rt, ifa);
2201 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2202 ifa->ifa_rtrequest(cmd, rt, info);
2203 ifa_release(ifa, &psref);
2204 }
2205 }
2206
2207 /*
2208 * bitmask macros to manage a densely packed link_state change queue.
2209 * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
2210 * LINK_STATE_UP(2) we need 2 bits for each state change.
2211 * As a state change to store is 0, treat all bits set as an unset item.
2212 */
2213 #define LQ_ITEM_BITS 2
2214 #define LQ_ITEM_MASK ((1 << LQ_ITEM_BITS) - 1)
2215 #define LQ_MASK(i) (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
2216 #define LINK_STATE_UNSET LQ_ITEM_MASK
2217 #define LQ_ITEM(q, i) (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
2218 #define LQ_STORE(q, i, v) \
2219 do { \
2220 (q) &= ~LQ_MASK((i)); \
2221 (q) |= (v) << (i) * LQ_ITEM_BITS; \
2222 } while (0 /* CONSTCOND */)
2223 #define LQ_MAX(q) ((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
2224 #define LQ_POP(q, v) \
2225 do { \
2226 (v) = LQ_ITEM((q), 0); \
2227 (q) >>= LQ_ITEM_BITS; \
2228 (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \
2229 } while (0 /* CONSTCOND */)
2230 #define LQ_PUSH(q, v) \
2231 do { \
2232 (q) >>= LQ_ITEM_BITS; \
2233 (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \
2234 } while (0 /* CONSTCOND */)
2235 #define LQ_FIND_UNSET(q, i) \
2236 for ((i) = 0; i < LQ_MAX((q)); (i)++) { \
2237 if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET) \
2238 break; \
2239 }
2240
2241 /*
2242 * Handle a change in the interface link state and
2243 * queue notifications.
2244 */
2245 void
if_link_state_change(struct ifnet * ifp,int link_state)2246 if_link_state_change(struct ifnet *ifp, int link_state)
2247 {
2248 int idx;
2249
2250 /* Ensure change is to a valid state */
2251 switch (link_state) {
2252 case LINK_STATE_UNKNOWN: /* FALLTHROUGH */
2253 case LINK_STATE_DOWN: /* FALLTHROUGH */
2254 case LINK_STATE_UP:
2255 break;
2256 default:
2257 #ifdef DEBUG
2258 printf("%s: invalid link state %d\n",
2259 ifp->if_xname, link_state);
2260 #endif
2261 return;
2262 }
2263
2264 IF_LINK_STATE_CHANGE_LOCK(ifp);
2265
2266 /* Find the last unset event in the queue. */
2267 LQ_FIND_UNSET(ifp->if_link_queue, idx);
2268
2269 if (idx == 0) {
2270 /*
2271 * There is no queue of link state changes.
2272 * As we have the lock we can safely compare against the
2273 * current link state and return if the same.
2274 * Otherwise, if scheduled is true then the interface is being
2275 * detached and the queue is being drained so we need
2276 * to avoid queuing more work.
2277 */
2278 if (ifp->if_link_state == link_state ||
2279 ifp->if_link_scheduled)
2280 goto out;
2281 } else {
2282 /* Ensure link_state doesn't match the last queued state. */
2283 if (LQ_ITEM(ifp->if_link_queue, idx - 1)
2284 == (uint8_t)link_state)
2285 goto out;
2286 }
2287
2288 /* Handle queue overflow. */
2289 if (idx == LQ_MAX(ifp->if_link_queue)) {
2290 uint8_t lost;
2291
2292 /*
2293 * The DOWN state must be protected from being pushed off
2294 * the queue to ensure that userland will always be
2295 * in a sane state.
2296 * Because DOWN is protected, there is no need to protect
2297 * UNKNOWN.
2298 * It should be invalid to change from any other state to
2299 * UNKNOWN anyway ...
2300 */
2301 lost = LQ_ITEM(ifp->if_link_queue, 0);
2302 LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
2303 if (lost == LINK_STATE_DOWN) {
2304 lost = LQ_ITEM(ifp->if_link_queue, 0);
2305 LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
2306 }
2307 printf("%s: lost link state change %s\n",
2308 ifp->if_xname,
2309 lost == LINK_STATE_UP ? "UP" :
2310 lost == LINK_STATE_DOWN ? "DOWN" :
2311 "UNKNOWN");
2312 } else
2313 LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);
2314
2315 if (ifp->if_link_scheduled)
2316 goto out;
2317
2318 ifp->if_link_scheduled = true;
2319 workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
2320
2321 out:
2322 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2323 }
2324
2325 /*
2326 * Handle interface link state change notifications.
2327 */
2328 static void
if_link_state_change_process(struct ifnet * ifp,int link_state)2329 if_link_state_change_process(struct ifnet *ifp, int link_state)
2330 {
2331 struct domain *dp;
2332 const int s = splnet();
2333 bool notify;
2334
2335 KASSERT(!cpu_intr_p());
2336
2337 IF_LINK_STATE_CHANGE_LOCK(ifp);
2338
2339 /* Ensure the change is still valid. */
2340 if (ifp->if_link_state == link_state) {
2341 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2342 splx(s);
2343 return;
2344 }
2345
2346 #ifdef DEBUG
2347 log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
2348 link_state == LINK_STATE_UP ? "UP" :
2349 link_state == LINK_STATE_DOWN ? "DOWN" :
2350 "UNKNOWN",
2351 ifp->if_link_state == LINK_STATE_UP ? "UP" :
2352 ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
2353 "UNKNOWN");
2354 #endif
2355
2356 /*
2357 * When going from UNKNOWN to UP, we need to mark existing
2358 * addresses as tentative and restart DAD as we may have
2359 * erroneously not found a duplicate.
2360 *
2361 * This needs to happen before rt_ifmsg to avoid a race where
2362 * listeners would have an address and expect it to work right
2363 * away.
2364 */
2365 notify = (link_state == LINK_STATE_UP &&
2366 ifp->if_link_state == LINK_STATE_UNKNOWN);
2367 ifp->if_link_state = link_state;
2368 /* The following routines may sleep so release the spin mutex */
2369 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2370
2371 KERNEL_LOCK_UNLESS_NET_MPSAFE();
2372 if (notify) {
2373 DOMAIN_FOREACH(dp) {
2374 if (dp->dom_if_link_state_change != NULL)
2375 dp->dom_if_link_state_change(ifp,
2376 LINK_STATE_DOWN);
2377 }
2378 }
2379
2380 /* Notify that the link state has changed. */
2381 rt_ifmsg(ifp);
2382
2383 simplehook_dohooks(ifp->if_linkstate_hooks);
2384
2385 DOMAIN_FOREACH(dp) {
2386 if (dp->dom_if_link_state_change != NULL)
2387 dp->dom_if_link_state_change(ifp, link_state);
2388 }
2389 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2390 splx(s);
2391 }
2392
2393 /*
2394 * Process the interface link state change queue.
2395 */
2396 static void
if_link_state_change_work(struct work * work,void * arg)2397 if_link_state_change_work(struct work *work, void *arg)
2398 {
2399 struct ifnet *ifp = container_of(work, struct ifnet, if_link_work);
2400 uint8_t state;
2401
2402 KERNEL_LOCK_UNLESS_NET_MPSAFE();
2403 const int s = splnet();
2404
2405 /*
2406 * Pop a link state change from the queue and process it.
2407 * If there is nothing to process then if_detach() has been called.
2408 * We keep if_link_scheduled = true so the queue can safely drain
2409 * without more work being queued.
2410 */
2411 IF_LINK_STATE_CHANGE_LOCK(ifp);
2412 LQ_POP(ifp->if_link_queue, state);
2413 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2414 if (state == LINK_STATE_UNSET)
2415 goto out;
2416
2417 if_link_state_change_process(ifp, state);
2418
2419 /* If there is a link state change to come, schedule it. */
2420 IF_LINK_STATE_CHANGE_LOCK(ifp);
2421 if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) {
2422 ifp->if_link_scheduled = true;
2423 workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work,
2424 NULL);
2425 } else
2426 ifp->if_link_scheduled = false;
2427 IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2428
2429 out:
2430 splx(s);
2431 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2432 }
2433
2434 void *
if_linkstate_change_establish(struct ifnet * ifp,void (* fn)(void *),void * arg)2435 if_linkstate_change_establish(struct ifnet *ifp, void (*fn)(void *), void *arg)
2436 {
2437 khook_t *hk;
2438
2439 hk = simplehook_establish(ifp->if_linkstate_hooks, fn, arg);
2440
2441 return (void *)hk;
2442 }
2443
2444 void
if_linkstate_change_disestablish(struct ifnet * ifp,void * vhook,kmutex_t * lock)2445 if_linkstate_change_disestablish(struct ifnet *ifp, void *vhook,
2446 kmutex_t *lock)
2447 {
2448
2449 simplehook_disestablish(ifp->if_linkstate_hooks, vhook, lock);
2450 }
2451
2452 /*
2453 * Used to mark addresses on an interface as DETATCHED or TENTATIVE
2454 * and thus start Duplicate Address Detection without changing the
2455 * real link state.
2456 */
2457 void
if_domain_link_state_change(struct ifnet * ifp,int link_state)2458 if_domain_link_state_change(struct ifnet *ifp, int link_state)
2459 {
2460 struct domain *dp;
2461
2462 const int s = splnet();
2463 KERNEL_LOCK_UNLESS_NET_MPSAFE();
2464
2465 DOMAIN_FOREACH(dp) {
2466 if (dp->dom_if_link_state_change != NULL)
2467 dp->dom_if_link_state_change(ifp, link_state);
2468 }
2469
2470 splx(s);
2471 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2472 }
2473
2474 /*
2475 * Default action when installing a local route on a point-to-point
2476 * interface.
2477 */
2478 void
p2p_rtrequest(int req,struct rtentry * rt,__unused const struct rt_addrinfo * info)2479 p2p_rtrequest(int req, struct rtentry *rt,
2480 __unused const struct rt_addrinfo *info)
2481 {
2482 struct ifnet *ifp = rt->rt_ifp;
2483 struct ifaddr *ifa, *lo0ifa;
2484 int s = pserialize_read_enter();
2485
2486 switch (req) {
2487 case RTM_ADD:
2488 if ((rt->rt_flags & RTF_LOCAL) == 0)
2489 break;
2490
2491 rt->rt_ifp = lo0ifp;
2492
2493 if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2494 break;
2495
2496 IFADDR_READER_FOREACH(ifa, ifp) {
2497 if (equal(rt_getkey(rt), ifa->ifa_addr))
2498 break;
2499 }
2500 if (ifa == NULL)
2501 break;
2502
2503 /*
2504 * Ensure lo0 has an address of the same family.
2505 */
2506 IFADDR_READER_FOREACH(lo0ifa, lo0ifp) {
2507 if (lo0ifa->ifa_addr->sa_family ==
2508 ifa->ifa_addr->sa_family)
2509 break;
2510 }
2511 if (lo0ifa == NULL)
2512 break;
2513
2514 /*
2515 * Make sure to set rt->rt_ifa to the interface
2516 * address we are using, otherwise we will have trouble
2517 * with source address selection.
2518 */
2519 if (ifa != rt->rt_ifa)
2520 rt_replace_ifa(rt, ifa);
2521 break;
2522 case RTM_DELETE:
2523 default:
2524 break;
2525 }
2526 pserialize_read_exit(s);
2527 }
2528
2529 static void
_if_down(struct ifnet * ifp)2530 _if_down(struct ifnet *ifp)
2531 {
2532 struct ifaddr *ifa;
2533 struct domain *dp;
2534 struct psref psref;
2535
2536 ifp->if_flags &= ~IFF_UP;
2537 nanotime(&ifp->if_lastchange);
2538
2539 const int bound = curlwp_bind();
2540 int s = pserialize_read_enter();
2541 IFADDR_READER_FOREACH(ifa, ifp) {
2542 ifa_acquire(ifa, &psref);
2543 pserialize_read_exit(s);
2544
2545 pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2546
2547 s = pserialize_read_enter();
2548 ifa_release(ifa, &psref);
2549 }
2550 pserialize_read_exit(s);
2551 curlwp_bindx(bound);
2552
2553 IFQ_PURGE(&ifp->if_snd);
2554 #if NCARP > 0
2555 if (ifp->if_carp)
2556 carp_carpdev_state(ifp);
2557 #endif
2558 rt_ifmsg(ifp);
2559 DOMAIN_FOREACH(dp) {
2560 if (dp->dom_if_down)
2561 dp->dom_if_down(ifp);
2562 }
2563 }
2564
2565 static void
if_down_deactivated(struct ifnet * ifp)2566 if_down_deactivated(struct ifnet *ifp)
2567 {
2568
2569 KASSERT(if_is_deactivated(ifp));
2570 _if_down(ifp);
2571 }
2572
2573 void
if_down_locked(struct ifnet * ifp)2574 if_down_locked(struct ifnet *ifp)
2575 {
2576
2577 KASSERT(IFNET_LOCKED(ifp));
2578 _if_down(ifp);
2579 }
2580
2581 /*
2582 * Mark an interface down and notify protocols of
2583 * the transition.
2584 * NOTE: must be called at splsoftnet or equivalent.
2585 */
2586 void
if_down(struct ifnet * ifp)2587 if_down(struct ifnet *ifp)
2588 {
2589
2590 IFNET_LOCK(ifp);
2591 if_down_locked(ifp);
2592 IFNET_UNLOCK(ifp);
2593 }
2594
2595 /*
2596 * Must be called with holding if_ioctl_lock.
2597 */
2598 static void
if_up_locked(struct ifnet * ifp)2599 if_up_locked(struct ifnet *ifp)
2600 {
2601 #ifdef notyet
2602 struct ifaddr *ifa;
2603 #endif
2604 struct domain *dp;
2605
2606 KASSERT(IFNET_LOCKED(ifp));
2607
2608 KASSERT(!if_is_deactivated(ifp));
2609 ifp->if_flags |= IFF_UP;
2610 nanotime(&ifp->if_lastchange);
2611 #ifdef notyet
2612 /* this has no effect on IP, and will kill all ISO connections XXX */
2613 IFADDR_READER_FOREACH(ifa, ifp)
2614 pfctlinput(PRC_IFUP, ifa->ifa_addr);
2615 #endif
2616 #if NCARP > 0
2617 if (ifp->if_carp)
2618 carp_carpdev_state(ifp);
2619 #endif
2620 rt_ifmsg(ifp);
2621 DOMAIN_FOREACH(dp) {
2622 if (dp->dom_if_up)
2623 dp->dom_if_up(ifp);
2624 }
2625 }
2626
2627 /*
2628 * Handle interface slowtimo timer routine. Called
2629 * from softclock, we decrement timer (if set) and
2630 * call the appropriate interface routine on expiration.
2631 */
2632 static bool
if_slowtimo_countdown(struct ifnet * ifp)2633 if_slowtimo_countdown(struct ifnet *ifp)
2634 {
2635 bool fire = false;
2636 const int s = splnet();
2637
2638 KERNEL_LOCK(1, NULL);
2639 if (ifp->if_timer != 0 && --ifp->if_timer == 0)
2640 fire = true;
2641 KERNEL_UNLOCK_ONE(NULL);
2642 splx(s);
2643
2644 return fire;
2645 }
2646
2647 static void
if_slowtimo_intr(void * arg)2648 if_slowtimo_intr(void *arg)
2649 {
2650 struct ifnet *ifp = arg;
2651 struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
2652
2653 mutex_enter(&isd->isd_lock);
2654 if (!isd->isd_dying) {
2655 if (isd->isd_trigger || if_slowtimo_countdown(ifp)) {
2656 if (!isd->isd_queued) {
2657 isd->isd_queued = true;
2658 workqueue_enqueue(if_slowtimo_wq,
2659 &isd->isd_work, NULL);
2660 }
2661 } else
2662 callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
2663 }
2664 mutex_exit(&isd->isd_lock);
2665 }
2666
2667 static void
if_slowtimo_work(struct work * work,void * arg)2668 if_slowtimo_work(struct work *work, void *arg)
2669 {
2670 struct if_slowtimo_data *isd =
2671 container_of(work, struct if_slowtimo_data, isd_work);
2672 struct ifnet *ifp = isd->isd_ifp;
2673 const int s = splnet();
2674
2675 KERNEL_LOCK(1, NULL);
2676 (*ifp->if_slowtimo)(ifp);
2677 KERNEL_UNLOCK_ONE(NULL);
2678 splx(s);
2679
2680 mutex_enter(&isd->isd_lock);
2681 if (isd->isd_trigger) {
2682 isd->isd_trigger = false;
2683 printf("%s: watchdog triggered\n", ifp->if_xname);
2684 }
2685 isd->isd_queued = false;
2686 if (!isd->isd_dying)
2687 callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
2688 mutex_exit(&isd->isd_lock);
2689 }
2690
2691 static int
sysctl_if_watchdog(SYSCTLFN_ARGS)2692 sysctl_if_watchdog(SYSCTLFN_ARGS)
2693 {
2694 struct sysctlnode node = *rnode;
2695 struct ifnet *ifp = node.sysctl_data;
2696 struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
2697 int arg = 0;
2698 int error;
2699
2700 node.sysctl_data = &arg;
2701 error = sysctl_lookup(SYSCTLFN_CALL(&node));
2702 if (error || newp == NULL)
2703 return error;
2704 if (arg) {
2705 mutex_enter(&isd->isd_lock);
2706 KASSERT(!isd->isd_dying);
2707 isd->isd_trigger = true;
2708 callout_schedule(&isd->isd_ch, 0);
2709 mutex_exit(&isd->isd_lock);
2710 }
2711
2712 return 0;
2713 }
2714
2715 static void
sysctl_watchdog_setup(struct ifnet * ifp)2716 sysctl_watchdog_setup(struct ifnet *ifp)
2717 {
2718 struct sysctllog **clog = &ifp->if_sysctl_log;
2719 const struct sysctlnode *rnode;
2720
2721 if (sysctl_createv(clog, 0, NULL, &rnode,
2722 CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
2723 SYSCTL_DESCR("Per-interface controls"),
2724 NULL, 0, NULL, 0,
2725 CTL_NET, CTL_CREATE, CTL_EOL) != 0)
2726 goto bad;
2727 if (sysctl_createv(clog, 0, &rnode, &rnode,
2728 CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
2729 SYSCTL_DESCR("Interface controls"),
2730 NULL, 0, NULL, 0,
2731 CTL_CREATE, CTL_EOL) != 0)
2732 goto bad;
2733 if (sysctl_createv(clog, 0, &rnode, &rnode,
2734 CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
2735 SYSCTL_DESCR("Interface watchdog controls"),
2736 NULL, 0, NULL, 0,
2737 CTL_CREATE, CTL_EOL) != 0)
2738 goto bad;
2739 if (sysctl_createv(clog, 0, &rnode, NULL,
2740 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
2741 SYSCTL_DESCR("Trigger watchdog timeout"),
2742 sysctl_if_watchdog, 0, (int *)ifp, 0,
2743 CTL_CREATE, CTL_EOL) != 0)
2744 goto bad;
2745
2746 return;
2747
2748 bad:
2749 printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
2750 }
2751
2752 /*
2753 * Mark an interface up and notify protocols of
2754 * the transition.
2755 * NOTE: must be called at splsoftnet or equivalent.
2756 */
2757 void
if_up(struct ifnet * ifp)2758 if_up(struct ifnet *ifp)
2759 {
2760
2761 IFNET_LOCK(ifp);
2762 if_up_locked(ifp);
2763 IFNET_UNLOCK(ifp);
2764 }
2765
2766 /*
2767 * Set/clear promiscuous mode on interface ifp based on the truth value
2768 * of pswitch. The calls are reference counted so that only the first
2769 * "on" request actually has an effect, as does the final "off" request.
2770 * Results are undefined if the "off" and "on" requests are not matched.
2771 */
2772 int
ifpromisc_locked(struct ifnet * ifp,int pswitch)2773 ifpromisc_locked(struct ifnet *ifp, int pswitch)
2774 {
2775 int pcount, ret = 0;
2776 u_short nflags;
2777
2778 KASSERT(IFNET_LOCKED(ifp));
2779
2780 pcount = ifp->if_pcount;
2781 if (pswitch) {
2782 /*
2783 * Allow the device to be "placed" into promiscuous
2784 * mode even if it is not configured up. It will
2785 * consult IFF_PROMISC when it is brought up.
2786 */
2787 if (ifp->if_pcount++ != 0)
2788 goto out;
2789 nflags = ifp->if_flags | IFF_PROMISC;
2790 } else {
2791 if (--ifp->if_pcount > 0)
2792 goto out;
2793 nflags = ifp->if_flags & ~IFF_PROMISC;
2794 }
2795 ret = if_flags_set(ifp, nflags);
2796 /* Restore interface state if not successful. */
2797 if (ret != 0)
2798 ifp->if_pcount = pcount;
2799
2800 out:
2801 return ret;
2802 }
2803
2804 int
ifpromisc(struct ifnet * ifp,int pswitch)2805 ifpromisc(struct ifnet *ifp, int pswitch)
2806 {
2807 int e;
2808
2809 IFNET_LOCK(ifp);
2810 e = ifpromisc_locked(ifp, pswitch);
2811 IFNET_UNLOCK(ifp);
2812
2813 return e;
2814 }
2815
2816 /*
2817 * if_ioctl(ifp, cmd, data)
2818 *
2819 * Apply an ioctl command to the interface. Returns 0 on success,
2820 * nonzero errno(3) number on failure.
2821 *
2822 * For SIOCADDMULTI/SIOCDELMULTI, caller need not hold locks -- it
2823 * is the driver's responsibility to take any internal locks.
2824 * (Kernel logic should generally invoke these only through
2825 * if_mcast_op.)
2826 *
2827 * For all other ioctls, caller must hold ifp->if_ioctl_lock,
2828 * a.k.a. IFNET_LOCK. May sleep.
2829 */
2830 int
if_ioctl(struct ifnet * ifp,u_long cmd,void * data)2831 if_ioctl(struct ifnet *ifp, u_long cmd, void *data)
2832 {
2833
2834 switch (cmd) {
2835 case SIOCADDMULTI:
2836 case SIOCDELMULTI:
2837 break;
2838 default:
2839 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2840 }
2841
2842 return (*ifp->if_ioctl)(ifp, cmd, data);
2843 }
2844
2845 /*
2846 * if_init(ifp)
2847 *
2848 * Prepare the hardware underlying ifp to process packets
2849 * according to its current configuration. Returns 0 on success,
2850 * nonzero errno(3) number on failure.
2851 *
2852 * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a
2853 * IFNET_LOCK.
2854 */
2855 int
if_init(struct ifnet * ifp)2856 if_init(struct ifnet *ifp)
2857 {
2858
2859 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2860
2861 return (*ifp->if_init)(ifp);
2862 }
2863
2864 /*
2865 * if_stop(ifp, disable)
2866 *
2867 * Stop the hardware underlying ifp from processing packets.
2868 *
2869 * If disable is true, ... XXX(?)
2870 *
2871 * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a
2872 * IFNET_LOCK.
2873 */
2874 void
if_stop(struct ifnet * ifp,int disable)2875 if_stop(struct ifnet *ifp, int disable)
2876 {
2877
2878 KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
2879
2880 (*ifp->if_stop)(ifp, disable);
2881 }
2882
2883 /*
2884 * Map interface name to
2885 * interface structure pointer.
2886 */
2887 struct ifnet *
ifunit(const char * name)2888 ifunit(const char *name)
2889 {
2890 struct ifnet *ifp;
2891 const char *cp = name;
2892 u_int unit = 0;
2893 u_int i;
2894
2895 /*
2896 * If the entire name is a number, treat it as an ifindex.
2897 */
2898 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++)
2899 unit = unit * 10 + (*cp - '0');
2900
2901 /*
2902 * If the number took all of the name, then it's a valid ifindex.
2903 */
2904 if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2905 return if_byindex(unit);
2906
2907 ifp = NULL;
2908 const int s = pserialize_read_enter();
2909 IFNET_READER_FOREACH(ifp) {
2910 if (if_is_deactivated(ifp))
2911 continue;
2912 if (strcmp(ifp->if_xname, name) == 0)
2913 goto out;
2914 }
2915 out:
2916 pserialize_read_exit(s);
2917 return ifp;
2918 }
2919
2920 /*
2921 * Get a reference of an ifnet object by an interface name.
2922 * The returned reference is protected by psref(9). The caller
2923 * must release a returned reference by if_put after use.
2924 */
2925 struct ifnet *
if_get(const char * name,struct psref * psref)2926 if_get(const char *name, struct psref *psref)
2927 {
2928 struct ifnet *ifp;
2929 const char *cp = name;
2930 u_int unit = 0;
2931 u_int i;
2932
2933 /*
2934 * If the entire name is a number, treat it as an ifindex.
2935 */
2936 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++)
2937 unit = unit * 10 + (*cp - '0');
2938
2939 /*
2940 * If the number took all of the name, then it's a valid ifindex.
2941 */
2942 if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2943 return if_get_byindex(unit, psref);
2944
2945 ifp = NULL;
2946 const int s = pserialize_read_enter();
2947 IFNET_READER_FOREACH(ifp) {
2948 if (if_is_deactivated(ifp))
2949 continue;
2950 if (strcmp(ifp->if_xname, name) == 0) {
2951 PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
2952 psref_acquire(psref, &ifp->if_psref,
2953 ifnet_psref_class);
2954 goto out;
2955 }
2956 }
2957 out:
2958 pserialize_read_exit(s);
2959 return ifp;
2960 }
2961
2962 /*
2963 * Release a reference of an ifnet object given by if_get, if_get_byindex
2964 * or if_get_bylla.
2965 */
2966 void
if_put(const struct ifnet * ifp,struct psref * psref)2967 if_put(const struct ifnet *ifp, struct psref *psref)
2968 {
2969
2970 if (ifp == NULL)
2971 return;
2972
2973 psref_release(psref, &ifp->if_psref, ifnet_psref_class);
2974 }
2975
2976 /*
2977 * Return ifp having idx. Return NULL if not found. Normally if_byindex
2978 * should be used.
2979 */
2980 ifnet_t *
_if_byindex(u_int idx)2981 _if_byindex(u_int idx)
2982 {
2983
2984 return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL;
2985 }
2986
2987 /*
2988 * Return ifp having idx. Return NULL if not found or the found ifp is
2989 * already deactivated.
2990 */
2991 ifnet_t *
if_byindex(u_int idx)2992 if_byindex(u_int idx)
2993 {
2994 ifnet_t *ifp;
2995
2996 ifp = _if_byindex(idx);
2997 if (ifp != NULL && if_is_deactivated(ifp))
2998 ifp = NULL;
2999 return ifp;
3000 }
3001
3002 /*
3003 * Get a reference of an ifnet object by an interface index.
3004 * The returned reference is protected by psref(9). The caller
3005 * must release a returned reference by if_put after use.
3006 */
3007 ifnet_t *
if_get_byindex(u_int idx,struct psref * psref)3008 if_get_byindex(u_int idx, struct psref *psref)
3009 {
3010 ifnet_t *ifp;
3011
3012 const int s = pserialize_read_enter();
3013 ifp = if_byindex(idx);
3014 if (__predict_true(ifp != NULL)) {
3015 PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
3016 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
3017 }
3018 pserialize_read_exit(s);
3019
3020 return ifp;
3021 }
3022
3023 ifnet_t *
if_get_bylla(const void * lla,unsigned char lla_len,struct psref * psref)3024 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref)
3025 {
3026 ifnet_t *ifp;
3027
3028 const int s = pserialize_read_enter();
3029 IFNET_READER_FOREACH(ifp) {
3030 if (if_is_deactivated(ifp))
3031 continue;
3032 if (ifp->if_addrlen != lla_len)
3033 continue;
3034 if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) {
3035 psref_acquire(psref, &ifp->if_psref,
3036 ifnet_psref_class);
3037 break;
3038 }
3039 }
3040 pserialize_read_exit(s);
3041
3042 return ifp;
3043 }
3044
3045 /*
3046 * Note that it's safe only if the passed ifp is guaranteed to not be freed,
3047 * for example using pserialize or the ifp is already held or some other
3048 * object is held which guarantes the ifp to not be freed indirectly.
3049 */
3050 void
if_acquire(struct ifnet * ifp,struct psref * psref)3051 if_acquire(struct ifnet *ifp, struct psref *psref)
3052 {
3053
3054 KASSERT(ifp->if_index != 0);
3055 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
3056 }
3057
3058 bool
if_held(struct ifnet * ifp)3059 if_held(struct ifnet *ifp)
3060 {
3061
3062 return psref_held(&ifp->if_psref, ifnet_psref_class);
3063 }
3064
3065 /*
3066 * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over
3067 * IPv4. Check the tunnel nesting count.
3068 * Return > 0, if tunnel nesting count is more than limit.
3069 * Return 0, if tunnel nesting count is equal or less than limit.
3070 */
3071 int
if_tunnel_check_nesting(struct ifnet * ifp,struct mbuf * m,int limit)3072 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit)
3073 {
3074 struct m_tag *mtag;
3075 int *count;
3076
3077 mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO);
3078 if (mtag != NULL) {
3079 count = (int *)(mtag + 1);
3080 if (++(*count) > limit) {
3081 log(LOG_NOTICE,
3082 "%s: recursively called too many times(%d)\n",
3083 ifp->if_xname, *count);
3084 return EIO;
3085 }
3086 } else {
3087 mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count),
3088 M_NOWAIT);
3089 if (mtag != NULL) {
3090 m_tag_prepend(m, mtag);
3091 count = (int *)(mtag + 1);
3092 *count = 0;
3093 } else {
3094 log(LOG_DEBUG, "%s: m_tag_get() failed, "
3095 "recursion calls are not prevented.\n",
3096 ifp->if_xname);
3097 }
3098 }
3099
3100 return 0;
3101 }
3102
3103 static void
if_tunnel_ro_init_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3104 if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
3105 {
3106 struct tunnel_ro *tro = p;
3107
3108 tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP);
3109 tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
3110 }
3111
3112 static void
if_tunnel_ro_fini_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3113 if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
3114 {
3115 struct tunnel_ro *tro = p;
3116
3117 rtcache_free(tro->tr_ro);
3118 kmem_free(tro->tr_ro, sizeof(*tro->tr_ro));
3119
3120 mutex_obj_free(tro->tr_lock);
3121 }
3122
3123 percpu_t *
if_tunnel_alloc_ro_percpu(void)3124 if_tunnel_alloc_ro_percpu(void)
3125 {
3126
3127 return percpu_create(sizeof(struct tunnel_ro),
3128 if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
3129 }
3130
3131 void
if_tunnel_free_ro_percpu(percpu_t * ro_percpu)3132 if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
3133 {
3134
3135 percpu_free(ro_percpu, sizeof(struct tunnel_ro));
3136 }
3137
3138
3139 static void
if_tunnel_rtcache_free_pc(void * p,void * arg __unused,struct cpu_info * ci __unused)3140 if_tunnel_rtcache_free_pc(void *p, void *arg __unused,
3141 struct cpu_info *ci __unused)
3142 {
3143 struct tunnel_ro *tro = p;
3144
3145 mutex_enter(tro->tr_lock);
3146 rtcache_free(tro->tr_ro);
3147 mutex_exit(tro->tr_lock);
3148 }
3149
if_tunnel_ro_percpu_rtcache_free(percpu_t * ro_percpu)3150 void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu)
3151 {
3152
3153 percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL);
3154 }
3155
3156 void
if_export_if_data(ifnet_t * const ifp,struct if_data * ifi,bool zero_stats)3157 if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats)
3158 {
3159
3160 /* Collect the volatile stats first; this zeros *ifi. */
3161 if_stats_to_if_data(ifp, ifi, zero_stats);
3162
3163 ifi->ifi_type = ifp->if_type;
3164 ifi->ifi_addrlen = ifp->if_addrlen;
3165 ifi->ifi_hdrlen = ifp->if_hdrlen;
3166 ifi->ifi_link_state = ifp->if_link_state;
3167 ifi->ifi_mtu = ifp->if_mtu;
3168 ifi->ifi_metric = ifp->if_metric;
3169 ifi->ifi_baudrate = ifp->if_baudrate;
3170 ifi->ifi_lastchange = ifp->if_lastchange;
3171 }
3172
3173 /* common */
3174 int
ifioctl_common(struct ifnet * ifp,u_long cmd,void * data)3175 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
3176 {
3177 struct ifreq *ifr;
3178 struct ifcapreq *ifcr;
3179 struct ifdatareq *ifdr;
3180 unsigned short flags;
3181 char *descr;
3182 int error;
3183
3184 switch (cmd) {
3185 case SIOCSIFCAP:
3186 ifcr = data;
3187 if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
3188 return EINVAL;
3189
3190 if (ifcr->ifcr_capenable == ifp->if_capenable)
3191 return 0;
3192
3193 ifp->if_capenable = ifcr->ifcr_capenable;
3194
3195 /* Pre-compute the checksum flags mask. */
3196 ifp->if_csum_flags_tx = 0;
3197 ifp->if_csum_flags_rx = 0;
3198 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx)
3199 ifp->if_csum_flags_tx |= M_CSUM_IPv4;
3200 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
3201 ifp->if_csum_flags_rx |= M_CSUM_IPv4;
3202
3203 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx)
3204 ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
3205 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx)
3206 ifp->if_csum_flags_rx |= M_CSUM_TCPv4;
3207
3208 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx)
3209 ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
3210 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx)
3211 ifp->if_csum_flags_rx |= M_CSUM_UDPv4;
3212
3213 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx)
3214 ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
3215 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx)
3216 ifp->if_csum_flags_rx |= M_CSUM_TCPv6;
3217
3218 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx)
3219 ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
3220 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx)
3221 ifp->if_csum_flags_rx |= M_CSUM_UDPv6;
3222
3223 if (ifp->if_capenable & IFCAP_TSOv4)
3224 ifp->if_csum_flags_tx |= M_CSUM_TSOv4;
3225 if (ifp->if_capenable & IFCAP_TSOv6)
3226 ifp->if_csum_flags_tx |= M_CSUM_TSOv6;
3227
3228 #if NBRIDGE > 0
3229 if (ifp->if_bridge != NULL)
3230 bridge_calc_csum_flags(ifp->if_bridge);
3231 #endif
3232
3233 if (ifp->if_flags & IFF_UP)
3234 return ENETRESET;
3235 return 0;
3236 case SIOCSIFFLAGS:
3237 ifr = data;
3238 /*
3239 * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up
3240 * and if_down aren't MP-safe yet, so we must hold the lock.
3241 */
3242 KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3243 if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
3244 const int s = splsoftnet();
3245 if_down_locked(ifp);
3246 splx(s);
3247 }
3248 if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
3249 const int s = splsoftnet();
3250 if_up_locked(ifp);
3251 splx(s);
3252 }
3253 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3254 flags = (ifp->if_flags & IFF_CANTCHANGE) |
3255 (ifr->ifr_flags &~ IFF_CANTCHANGE);
3256 if (ifp->if_flags != flags) {
3257 ifp->if_flags = flags;
3258 /* Notify that the flags have changed. */
3259 rt_ifmsg(ifp);
3260 }
3261 break;
3262 case SIOCGIFFLAGS:
3263 ifr = data;
3264 ifr->ifr_flags = ifp->if_flags;
3265 break;
3266
3267 case SIOCGIFMETRIC:
3268 ifr = data;
3269 ifr->ifr_metric = ifp->if_metric;
3270 break;
3271
3272 case SIOCGIFMTU:
3273 ifr = data;
3274 ifr->ifr_mtu = ifp->if_mtu;
3275 break;
3276
3277 case SIOCGIFDLT:
3278 ifr = data;
3279 ifr->ifr_dlt = ifp->if_dlt;
3280 break;
3281
3282 case SIOCGIFCAP:
3283 ifcr = data;
3284 ifcr->ifcr_capabilities = ifp->if_capabilities;
3285 ifcr->ifcr_capenable = ifp->if_capenable;
3286 break;
3287
3288 case SIOCSIFMETRIC:
3289 ifr = data;
3290 ifp->if_metric = ifr->ifr_metric;
3291 break;
3292
3293 case SIOCGIFDATA:
3294 ifdr = data;
3295 if_export_if_data(ifp, &ifdr->ifdr_data, false);
3296 break;
3297
3298 case SIOCGIFINDEX:
3299 ifr = data;
3300 ifr->ifr_index = ifp->if_index;
3301 break;
3302
3303 case SIOCZIFDATA:
3304 ifdr = data;
3305 if_export_if_data(ifp, &ifdr->ifdr_data, true);
3306 getnanotime(&ifp->if_lastchange);
3307 break;
3308 case SIOCSIFMTU:
3309 ifr = data;
3310 if (ifp->if_mtu == ifr->ifr_mtu)
3311 break;
3312 ifp->if_mtu = ifr->ifr_mtu;
3313 return ENETRESET;
3314 case SIOCSIFDESCR:
3315 error = kauth_authorize_network(kauth_cred_get(),
3316 KAUTH_NETWORK_INTERFACE,
3317 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3318 NULL);
3319 if (error)
3320 return error;
3321
3322 ifr = data;
3323
3324 if (ifr->ifr_buflen > IFDESCRSIZE)
3325 return ENAMETOOLONG;
3326
3327 if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) {
3328 /* unset description */
3329 descr = NULL;
3330 } else {
3331 descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP);
3332 /*
3333 * copy (IFDESCRSIZE - 1) bytes to ensure
3334 * terminating nul
3335 */
3336 error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1);
3337 if (error) {
3338 kmem_free(descr, IFDESCRSIZE);
3339 return error;
3340 }
3341 }
3342
3343 if (ifp->if_description != NULL)
3344 kmem_free(ifp->if_description, IFDESCRSIZE);
3345
3346 ifp->if_description = descr;
3347 break;
3348
3349 case SIOCGIFDESCR:
3350 ifr = data;
3351 descr = ifp->if_description;
3352
3353 if (descr == NULL)
3354 return ENOMSG;
3355
3356 if (ifr->ifr_buflen < IFDESCRSIZE)
3357 return EINVAL;
3358
3359 error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE);
3360 if (error)
3361 return error;
3362 break;
3363
3364 default:
3365 return ENOTTY;
3366 }
3367 return 0;
3368 }
3369
3370 int
ifaddrpref_ioctl(struct socket * so,u_long cmd,void * data,struct ifnet * ifp)3371 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
3372 {
3373 struct if_addrprefreq *ifap = (struct if_addrprefreq *)data;
3374 struct ifaddr *ifa;
3375 const struct sockaddr *any, *sa;
3376 union {
3377 struct sockaddr sa;
3378 struct sockaddr_storage ss;
3379 } u, v;
3380 int s, error = 0;
3381
3382 switch (cmd) {
3383 case SIOCSIFADDRPREF:
3384 error = kauth_authorize_network(kauth_cred_get(),
3385 KAUTH_NETWORK_INTERFACE,
3386 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3387 NULL);
3388 if (error)
3389 return error;
3390 break;
3391 case SIOCGIFADDRPREF:
3392 break;
3393 default:
3394 return EOPNOTSUPP;
3395 }
3396
3397 /* sanity checks */
3398 if (data == NULL || ifp == NULL) {
3399 panic("invalid argument to %s", __func__);
3400 /*NOTREACHED*/
3401 }
3402
3403 /* address must be specified on ADD and DELETE */
3404 sa = sstocsa(&ifap->ifap_addr);
3405 if (sa->sa_family != sofamily(so))
3406 return EINVAL;
3407 if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len)
3408 return EINVAL;
3409
3410 sockaddr_externalize(&v.sa, sizeof(v.ss), sa);
3411
3412 s = pserialize_read_enter();
3413 IFADDR_READER_FOREACH(ifa, ifp) {
3414 if (ifa->ifa_addr->sa_family != sa->sa_family)
3415 continue;
3416 sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr);
3417 if (sockaddr_cmp(&u.sa, &v.sa) == 0)
3418 break;
3419 }
3420 if (ifa == NULL) {
3421 error = EADDRNOTAVAIL;
3422 goto out;
3423 }
3424
3425 switch (cmd) {
3426 case SIOCSIFADDRPREF:
3427 ifa->ifa_preference = ifap->ifap_preference;
3428 goto out;
3429 case SIOCGIFADDRPREF:
3430 /* fill in the if_laddrreq structure */
3431 (void)sockaddr_copy(sstosa(&ifap->ifap_addr),
3432 sizeof(ifap->ifap_addr), ifa->ifa_addr);
3433 ifap->ifap_preference = ifa->ifa_preference;
3434 goto out;
3435 default:
3436 error = EOPNOTSUPP;
3437 }
3438 out:
3439 pserialize_read_exit(s);
3440 return error;
3441 }
3442
3443 /*
3444 * Interface ioctls.
3445 */
3446 static int
doifioctl(struct socket * so,u_long cmd,void * data,struct lwp * l)3447 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
3448 {
3449 struct ifnet *ifp;
3450 struct ifreq *ifr;
3451 int error = 0;
3452 u_long ocmd = cmd;
3453 u_short oif_flags;
3454 struct ifreq ifrb;
3455 struct oifreq *oifr = NULL;
3456 int r;
3457 struct psref psref;
3458 bool do_if43_post = false;
3459 bool do_ifm80_post = false;
3460
3461 switch (cmd) {
3462 case SIOCGIFCONF:
3463 return ifconf(cmd, data);
3464 case SIOCINITIFADDR:
3465 return EPERM;
3466 default:
3467 MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(),
3468 error);
3469 if (error != ENOSYS)
3470 return error;
3471 MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data),
3472 enosys(), error);
3473 if (error != ENOSYS)
3474 return error;
3475 error = 0;
3476 break;
3477 }
3478
3479 ifr = data;
3480 /* Pre-conversion */
3481 MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error);
3482 if (cmd != ocmd) {
3483 oifr = data;
3484 data = ifr = &ifrb;
3485 IFREQO2N_43(oifr, ifr);
3486 do_if43_post = true;
3487 }
3488 MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post),
3489 enosys(), error);
3490
3491 switch (cmd) {
3492 case SIOCIFCREATE:
3493 case SIOCIFDESTROY: {
3494 const int bound = curlwp_bind();
3495 if (l != NULL) {
3496 ifp = if_get(ifr->ifr_name, &psref);
3497 error = kauth_authorize_network(l->l_cred,
3498 KAUTH_NETWORK_INTERFACE,
3499 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3500 KAUTH_ARG(cmd), NULL);
3501 if (ifp != NULL)
3502 if_put(ifp, &psref);
3503 if (error != 0) {
3504 curlwp_bindx(bound);
3505 return error;
3506 }
3507 }
3508 KERNEL_LOCK_UNLESS_NET_MPSAFE();
3509 mutex_enter(&if_clone_mtx);
3510 r = (cmd == SIOCIFCREATE) ?
3511 if_clone_create(ifr->ifr_name) :
3512 if_clone_destroy(ifr->ifr_name);
3513 mutex_exit(&if_clone_mtx);
3514 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
3515 curlwp_bindx(bound);
3516 return r;
3517 }
3518 case SIOCIFGCLONERS: {
3519 struct if_clonereq *req = (struct if_clonereq *)data;
3520 return if_clone_list(req->ifcr_count, req->ifcr_buffer,
3521 &req->ifcr_total);
3522 }
3523 }
3524
3525 if ((cmd & IOC_IN) == 0 || IOCPARM_LEN(cmd) < sizeof(ifr->ifr_name))
3526 return EINVAL;
3527
3528 const int bound = curlwp_bind();
3529 ifp = if_get(ifr->ifr_name, &psref);
3530 if (ifp == NULL) {
3531 curlwp_bindx(bound);
3532 return ENXIO;
3533 }
3534
3535 switch (cmd) {
3536 case SIOCALIFADDR:
3537 case SIOCDLIFADDR:
3538 case SIOCSIFADDRPREF:
3539 case SIOCSIFFLAGS:
3540 case SIOCSIFCAP:
3541 case SIOCSIFMETRIC:
3542 case SIOCZIFDATA:
3543 case SIOCSIFMTU:
3544 case SIOCSIFPHYADDR:
3545 case SIOCDIFPHYADDR:
3546 #ifdef INET6
3547 case SIOCSIFPHYADDR_IN6:
3548 #endif
3549 case SIOCSLIFPHYADDR:
3550 case SIOCADDMULTI:
3551 case SIOCDELMULTI:
3552 case SIOCSETHERCAP:
3553 case SIOCSIFMEDIA:
3554 case SIOCSDRVSPEC:
3555 case SIOCG80211:
3556 case SIOCS80211:
3557 case SIOCS80211NWID:
3558 case SIOCS80211NWKEY:
3559 case SIOCS80211POWER:
3560 case SIOCS80211BSSID:
3561 case SIOCS80211CHANNEL:
3562 case SIOCSLINKSTR:
3563 if (l != NULL) {
3564 error = kauth_authorize_network(l->l_cred,
3565 KAUTH_NETWORK_INTERFACE,
3566 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3567 KAUTH_ARG(cmd), NULL);
3568 if (error != 0)
3569 goto out;
3570 }
3571 }
3572
3573 oif_flags = ifp->if_flags;
3574
3575 KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
3576 IFNET_LOCK(ifp);
3577
3578 error = if_ioctl(ifp, cmd, data);
3579 if (error != ENOTTY)
3580 ;
3581 else if (so->so_proto == NULL)
3582 error = EOPNOTSUPP;
3583 else {
3584 KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3585 MODULE_HOOK_CALL(if_ifioctl_43_hook,
3586 (so, ocmd, cmd, data, l), enosys(), error);
3587 if (error == ENOSYS)
3588 error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
3589 cmd, data, ifp);
3590 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3591 }
3592
3593 if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
3594 if ((ifp->if_flags & IFF_UP) != 0) {
3595 const int s = splsoftnet();
3596 if_up_locked(ifp);
3597 splx(s);
3598 }
3599 }
3600
3601 /* Post-conversion */
3602 if (do_ifm80_post && (error == 0))
3603 MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd),
3604 enosys(), error);
3605 if (do_if43_post)
3606 IFREQN2O_43(oifr, ifr);
3607
3608 IFNET_UNLOCK(ifp);
3609 KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
3610 out:
3611 if_put(ifp, &psref);
3612 curlwp_bindx(bound);
3613 return error;
3614 }
3615
3616 /*
3617 * Return interface configuration
3618 * of system. List may be used
3619 * in later ioctl's (above) to get
3620 * other information.
3621 *
3622 * Each record is a struct ifreq. Before the addition of
3623 * sockaddr_storage, the API rule was that sockaddr flavors that did
3624 * not fit would extend beyond the struct ifreq, with the next struct
3625 * ifreq starting sa_len beyond the struct sockaddr. Because the
3626 * union in struct ifreq includes struct sockaddr_storage, every kind
3627 * of sockaddr must fit. Thus, there are no longer any overlength
3628 * records.
3629 *
3630 * Records are added to the user buffer if they fit, and ifc_len is
3631 * adjusted to the length that was written. Thus, the user is only
3632 * assured of getting the complete list if ifc_len on return is at
3633 * least sizeof(struct ifreq) less than it was on entry.
3634 *
3635 * If the user buffer pointer is NULL, this routine copies no data and
3636 * returns the amount of space that would be needed.
3637 *
3638 * Invariants:
3639 * ifrp points to the next part of the user's buffer to be used. If
3640 * ifrp != NULL, space holds the number of bytes remaining that we may
3641 * write at ifrp. Otherwise, space holds the number of bytes that
3642 * would have been written had there been adequate space.
3643 */
3644 /*ARGSUSED*/
3645 static int
ifconf(u_long cmd,void * data)3646 ifconf(u_long cmd, void *data)
3647 {
3648 struct ifconf *ifc = (struct ifconf *)data;
3649 struct ifnet *ifp;
3650 struct ifaddr *ifa;
3651 struct ifreq ifr, *ifrp = NULL;
3652 int space = 0, error = 0;
3653 const int sz = (int)sizeof(struct ifreq);
3654 const bool docopy = ifc->ifc_req != NULL;
3655 struct psref psref;
3656
3657 if (docopy) {
3658 if (ifc->ifc_len < 0)
3659 return EINVAL;
3660
3661 space = ifc->ifc_len;
3662 ifrp = ifc->ifc_req;
3663 }
3664 memset(&ifr, 0, sizeof(ifr));
3665
3666 const int bound = curlwp_bind();
3667 int s = pserialize_read_enter();
3668 IFNET_READER_FOREACH(ifp) {
3669 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
3670 pserialize_read_exit(s);
3671
3672 (void)strncpy(ifr.ifr_name, ifp->if_xname,
3673 sizeof(ifr.ifr_name));
3674 if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
3675 error = ENAMETOOLONG;
3676 goto release_exit;
3677 }
3678 if (IFADDR_READER_EMPTY(ifp)) {
3679 /* Interface with no addresses - send zero sockaddr. */
3680 memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
3681 if (!docopy) {
3682 space += sz;
3683 goto next;
3684 }
3685 if (space >= sz) {
3686 error = copyout(&ifr, ifrp, sz);
3687 if (error != 0)
3688 goto release_exit;
3689 ifrp++;
3690 space -= sz;
3691 }
3692 }
3693
3694 s = pserialize_read_enter();
3695 IFADDR_READER_FOREACH(ifa, ifp) {
3696 struct sockaddr *sa = ifa->ifa_addr;
3697 /* all sockaddrs must fit in sockaddr_storage */
3698 KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));
3699
3700 if (!docopy) {
3701 space += sz;
3702 continue;
3703 }
3704 memcpy(&ifr.ifr_space, sa, sa->sa_len);
3705 pserialize_read_exit(s);
3706
3707 if (space >= sz) {
3708 error = copyout(&ifr, ifrp, sz);
3709 if (error != 0)
3710 goto release_exit;
3711 ifrp++; space -= sz;
3712 }
3713 s = pserialize_read_enter();
3714 }
3715 pserialize_read_exit(s);
3716
3717 next:
3718 s = pserialize_read_enter();
3719 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3720 }
3721 pserialize_read_exit(s);
3722 curlwp_bindx(bound);
3723
3724 if (docopy) {
3725 KASSERT(0 <= space && space <= ifc->ifc_len);
3726 ifc->ifc_len -= space;
3727 } else {
3728 KASSERT(space >= 0);
3729 ifc->ifc_len = space;
3730 }
3731 return 0;
3732
3733 release_exit:
3734 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3735 curlwp_bindx(bound);
3736 return error;
3737 }
3738
3739 int
ifreq_setaddr(u_long cmd,struct ifreq * ifr,const struct sockaddr * sa)3740 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
3741 {
3742 uint8_t len = sizeof(ifr->ifr_ifru.ifru_space);
3743 struct ifreq ifrb;
3744 struct oifreq *oifr = NULL;
3745 u_long ocmd = cmd;
3746 int hook;
3747
3748 MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook);
3749 if (hook != ENOSYS) {
3750 if (cmd != ocmd) {
3751 oifr = (struct oifreq *)(void *)ifr;
3752 ifr = &ifrb;
3753 IFREQO2N_43(oifr, ifr);
3754 len = sizeof(oifr->ifr_addr);
3755 }
3756 }
3757
3758 if (len < sa->sa_len)
3759 return EFBIG;
3760
3761 memset(&ifr->ifr_addr, 0, len);
3762 sockaddr_copy(&ifr->ifr_addr, len, sa);
3763
3764 if (cmd != ocmd)
3765 IFREQN2O_43(oifr, ifr);
3766 return 0;
3767 }
3768
3769 /*
3770 * wrapper function for the drivers which doesn't have if_transmit().
3771 */
3772 static int
if_transmit(struct ifnet * ifp,struct mbuf * m)3773 if_transmit(struct ifnet *ifp, struct mbuf *m)
3774 {
3775 int error;
3776 size_t pktlen = m->m_pkthdr.len;
3777 bool mcast = (m->m_flags & M_MCAST) != 0;
3778
3779 const int s = splnet();
3780
3781 IFQ_ENQUEUE(&ifp->if_snd, m, error);
3782 if (error != 0) {
3783 /* mbuf is already freed */
3784 goto out;
3785 }
3786
3787 net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
3788 if_statadd_ref(nsr, if_obytes, pktlen);
3789 if (mcast)
3790 if_statinc_ref(nsr, if_omcasts);
3791 IF_STAT_PUTREF(ifp);
3792
3793 if ((ifp->if_flags & IFF_OACTIVE) == 0)
3794 if_start_lock(ifp);
3795 out:
3796 splx(s);
3797
3798 return error;
3799 }
3800
3801 int
if_transmit_lock(struct ifnet * ifp,struct mbuf * m)3802 if_transmit_lock(struct ifnet *ifp, struct mbuf *m)
3803 {
3804 int error;
3805
3806 kmsan_check_mbuf(m);
3807
3808 #ifdef ALTQ
3809 KERNEL_LOCK(1, NULL);
3810 if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
3811 error = if_transmit(ifp, m);
3812 KERNEL_UNLOCK_ONE(NULL);
3813 } else {
3814 KERNEL_UNLOCK_ONE(NULL);
3815 error = (*ifp->if_transmit)(ifp, m);
3816 /* mbuf is already freed */
3817 }
3818 #else /* !ALTQ */
3819 error = (*ifp->if_transmit)(ifp, m);
3820 /* mbuf is already freed */
3821 #endif /* !ALTQ */
3822
3823 return error;
3824 }
3825
3826 /*
3827 * Queue message on interface, and start output if interface
3828 * not yet active.
3829 */
3830 int
ifq_enqueue(struct ifnet * ifp,struct mbuf * m)3831 ifq_enqueue(struct ifnet *ifp, struct mbuf *m)
3832 {
3833
3834 return if_transmit_lock(ifp, m);
3835 }
3836
3837 /*
3838 * Queue message on interface, possibly using a second fast queue
3839 */
3840 int
ifq_enqueue2(struct ifnet * ifp,struct ifqueue * ifq,struct mbuf * m)3841 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m)
3842 {
3843 int error = 0;
3844
3845 if (ifq != NULL
3846 #ifdef ALTQ
3847 && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
3848 #endif
3849 ) {
3850 if (IF_QFULL(ifq)) {
3851 IF_DROP(&ifp->if_snd);
3852 m_freem(m);
3853 if (error == 0)
3854 error = ENOBUFS;
3855 } else
3856 IF_ENQUEUE(ifq, m);
3857 } else
3858 IFQ_ENQUEUE(&ifp->if_snd, m, error);
3859 if (error != 0) {
3860 if_statinc(ifp, if_oerrors);
3861 return error;
3862 }
3863 return 0;
3864 }
3865
3866 int
if_addr_init(ifnet_t * ifp,struct ifaddr * ifa,const bool src)3867 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src)
3868 {
3869 int rc;
3870
3871 KASSERT(IFNET_LOCKED(ifp));
3872 if (ifp->if_initaddr != NULL)
3873 rc = (*ifp->if_initaddr)(ifp, ifa, src);
3874 else if (src || (rc = if_ioctl(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY)
3875 rc = if_ioctl(ifp, SIOCINITIFADDR, ifa);
3876
3877 return rc;
3878 }
3879
3880 int
if_do_dad(struct ifnet * ifp)3881 if_do_dad(struct ifnet *ifp)
3882 {
3883 if ((ifp->if_flags & IFF_LOOPBACK) != 0)
3884 return 0;
3885
3886 switch (ifp->if_type) {
3887 case IFT_FAITH:
3888 /*
3889 * These interfaces do not have the IFF_LOOPBACK flag,
3890 * but loop packets back. We do not have to do DAD on such
3891 * interfaces. We should even omit it, because loop-backed
3892 * responses would confuse the DAD procedure.
3893 */
3894 return 0;
3895 default:
3896 /*
3897 * Our DAD routine requires the interface up and running.
3898 * However, some interfaces can be up before the RUNNING
3899 * status. Additionally, users may try to assign addresses
3900 * before the interface becomes up (or running).
3901 * We simply skip DAD in such a case as a work around.
3902 * XXX: we should rather mark "tentative" on such addresses,
3903 * and do DAD after the interface becomes ready.
3904 */
3905 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
3906 (IFF_UP | IFF_RUNNING))
3907 return 0;
3908
3909 return 1;
3910 }
3911 }
3912
3913 /*
3914 * if_flags_set(ifp, flags)
3915 *
3916 * Ask ifp to change ifp->if_flags to flags, as if with the
3917 * SIOCSIFFLAGS ioctl command.
3918 *
3919 * May sleep. Caller must hold ifp->if_ioctl_lock, a.k.a
3920 * IFNET_LOCK.
3921 */
3922 int
if_flags_set(ifnet_t * ifp,const u_short flags)3923 if_flags_set(ifnet_t *ifp, const u_short flags)
3924 {
3925 int rc;
3926
3927 KASSERT(IFNET_LOCKED(ifp));
3928
3929 if (ifp->if_setflags != NULL)
3930 rc = (*ifp->if_setflags)(ifp, flags);
3931 else {
3932 u_short cantflags, chgdflags;
3933 struct ifreq ifr;
3934
3935 chgdflags = ifp->if_flags ^ flags;
3936 cantflags = chgdflags & IFF_CANTCHANGE;
3937
3938 if (cantflags != 0)
3939 ifp->if_flags ^= cantflags;
3940
3941 /*
3942 * Traditionally, we do not call if_ioctl after
3943 * setting/clearing only IFF_PROMISC if the interface
3944 * isn't IFF_UP. Uphold that tradition.
3945 */
3946 if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0)
3947 return 0;
3948
3949 memset(&ifr, 0, sizeof(ifr));
3950
3951 ifr.ifr_flags = flags & ~IFF_CANTCHANGE;
3952 rc = if_ioctl(ifp, SIOCSIFFLAGS, &ifr);
3953
3954 if (rc != 0 && cantflags != 0)
3955 ifp->if_flags ^= cantflags;
3956 }
3957
3958 return rc;
3959 }
3960
3961 /*
3962 * if_mcast_op(ifp, cmd, sa)
3963 *
3964 * Apply a multicast command, SIOCADDMULTI/SIOCDELMULTI, to the
3965 * interface. Returns 0 on success, nonzero errno(3) number on
3966 * failure.
3967 *
3968 * May sleep.
3969 *
3970 * Use this, not if_ioctl, for the multicast commands.
3971 */
3972 int
if_mcast_op(ifnet_t * ifp,const unsigned long cmd,const struct sockaddr * sa)3973 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
3974 {
3975 int rc;
3976 struct ifreq ifr;
3977
3978 switch (cmd) {
3979 case SIOCADDMULTI:
3980 case SIOCDELMULTI:
3981 break;
3982 default:
3983 panic("invalid ifnet multicast command: 0x%lx", cmd);
3984 }
3985
3986 ifreq_setaddr(cmd, &ifr, sa);
3987 rc = if_ioctl(ifp, cmd, &ifr);
3988
3989 return rc;
3990 }
3991
3992 static void
sysctl_sndq_setup(struct sysctllog ** clog,const char * ifname,struct ifaltq * ifq)3993 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
3994 struct ifaltq *ifq)
3995 {
3996 const struct sysctlnode *cnode, *rnode;
3997
3998 if (sysctl_createv(clog, 0, NULL, &rnode,
3999 CTLFLAG_PERMANENT,
4000 CTLTYPE_NODE, "interfaces",
4001 SYSCTL_DESCR("Per-interface controls"),
4002 NULL, 0, NULL, 0,
4003 CTL_NET, CTL_CREATE, CTL_EOL) != 0)
4004 goto bad;
4005
4006 if (sysctl_createv(clog, 0, &rnode, &rnode,
4007 CTLFLAG_PERMANENT,
4008 CTLTYPE_NODE, ifname,
4009 SYSCTL_DESCR("Interface controls"),
4010 NULL, 0, NULL, 0,
4011 CTL_CREATE, CTL_EOL) != 0)
4012 goto bad;
4013
4014 if (sysctl_createv(clog, 0, &rnode, &rnode,
4015 CTLFLAG_PERMANENT,
4016 CTLTYPE_NODE, "sndq",
4017 SYSCTL_DESCR("Interface output queue controls"),
4018 NULL, 0, NULL, 0,
4019 CTL_CREATE, CTL_EOL) != 0)
4020 goto bad;
4021
4022 if (sysctl_createv(clog, 0, &rnode, &cnode,
4023 CTLFLAG_PERMANENT,
4024 CTLTYPE_INT, "len",
4025 SYSCTL_DESCR("Current output queue length"),
4026 NULL, 0, &ifq->ifq_len, 0,
4027 CTL_CREATE, CTL_EOL) != 0)
4028 goto bad;
4029
4030 if (sysctl_createv(clog, 0, &rnode, &cnode,
4031 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
4032 CTLTYPE_INT, "maxlen",
4033 SYSCTL_DESCR("Maximum allowed output queue length"),
4034 NULL, 0, &ifq->ifq_maxlen, 0,
4035 CTL_CREATE, CTL_EOL) != 0)
4036 goto bad;
4037
4038 if (sysctl_createv(clog, 0, &rnode, &cnode,
4039 CTLFLAG_PERMANENT,
4040 CTLTYPE_QUAD, "drops",
4041 SYSCTL_DESCR("Packets dropped due to full output queue"),
4042 NULL, 0, &ifq->ifq_drops, 0,
4043 CTL_CREATE, CTL_EOL) != 0)
4044 goto bad;
4045
4046 return;
4047 bad:
4048 printf("%s: could not attach sysctl nodes\n", ifname);
4049 return;
4050 }
4051
4052 static int
if_sdl_sysctl(SYSCTLFN_ARGS)4053 if_sdl_sysctl(SYSCTLFN_ARGS)
4054 {
4055 struct ifnet *ifp;
4056 const struct sockaddr_dl *sdl;
4057 struct psref psref;
4058 int error = 0;
4059
4060 if (namelen != 1)
4061 return EINVAL;
4062
4063 const int bound = curlwp_bind();
4064 ifp = if_get_byindex(name[0], &psref);
4065 if (ifp == NULL) {
4066 error = ENODEV;
4067 goto out0;
4068 }
4069
4070 sdl = ifp->if_sadl;
4071 if (sdl == NULL) {
4072 *oldlenp = 0;
4073 goto out1;
4074 }
4075
4076 if (oldp == NULL) {
4077 *oldlenp = sdl->sdl_alen;
4078 goto out1;
4079 }
4080
4081 if (*oldlenp >= sdl->sdl_alen)
4082 *oldlenp = sdl->sdl_alen;
4083 error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen],
4084 oldp, *oldlenp);
4085 out1:
4086 if_put(ifp, &psref);
4087 out0:
4088 curlwp_bindx(bound);
4089 return error;
4090 }
4091
4092 static void
if_sysctl_setup(struct sysctllog ** clog)4093 if_sysctl_setup(struct sysctllog **clog)
4094 {
4095 const struct sysctlnode *rnode = NULL;
4096
4097 sysctl_createv(clog, 0, NULL, &rnode,
4098 CTLFLAG_PERMANENT,
4099 CTLTYPE_NODE, "sdl",
4100 SYSCTL_DESCR("Get active link-layer address"),
4101 if_sdl_sysctl, 0, NULL, 0,
4102 CTL_NET, CTL_CREATE, CTL_EOL);
4103 }
4104