1 /* LWIP service - ifdev.c - network interface devices */
2
3 #include "lwip.h"
4 #include "mcast.h"
5 #include "ifaddr.h"
6 #include "rtsock.h"
7 #include "route.h"
8 #include "bpfdev.h"
9
10 #include <net/if_media.h>
11
12 /*
13 * The highest possible interface index number, plus one. We currently let
14 * lwIP choose the interface index. lwIP will generate a number between 1 and
15 * 255 inclusive. For efficiency, we use an array to look up an interface
16 * device object by its index. Thus, this array must be large enough to be
17 * indexed by the largest possible index number generated by lwIP. lwIP uses
18 * an unsigned 8-bit field to store the index number.
19 */
20 #define MAX_IFDEV (UINT8_MAX + 1)
21
22 /* The table is indexed by the interface index minus one. */
23 static struct ifdev *ifdev_table[MAX_IFDEV]; /* index-based lookup table */
24
25 static TAILQ_HEAD(, ifdev) ifdev_list; /* list of active interfaces */
26
27 static struct ifdev *ifdev_loopback; /* loopback interface */
28
29 /*
30 * The maximum number of virtual interface types--that is, interface types for
31 * which interfaces may be created and destroyed dynamically. The BSDs call
32 * these "clones". There should be enough slots for all types, which are
33 * registered by their respective modules through ifdev_register(). Increase
34 * as necessary.
35 */
36 #define MAX_VTYPE 4
37
38 static struct {
39 const char *ifvt_name; /* interface name without digits (e.g. "lo") */
40 size_t ifvt_namelen; /* length of the name, excluding null term. */
41 int (*ifvt_create)(const char *); /* ifdev create function */
42 } ifdev_vtype[MAX_VTYPE];
43
44 static unsigned int ifdev_vtypes; /* number of in-use vtype slots */
45
46 #define IFDEV_MIN_MTU 1280 /* minimum interface MTU, required by IPv6 */
47
48 /*
49 * Initialize the network interface devices module. This call must be issued
50 * before any virtual interfaces are initialized, because the virtual types
51 * array is initialized here.
52 */
53 void
ifdev_init(void)54 ifdev_init(void)
55 {
56
57 memset(ifdev_table, 0, sizeof(ifdev_table));
58
59 TAILQ_INIT(&ifdev_list);
60
61 memset(ifdev_vtype, 0, sizeof(ifdev_vtype));
62 ifdev_vtypes = 0;
63 }
64
65 /*
66 * Check all active interfaces to see if any tasks need to be performed. This
67 * function is called as part of each message loop iteration.
68 */
69 void
ifdev_poll(void)70 ifdev_poll(void)
71 {
72 struct ifdev *ifdev;
73
74 /*
75 * Call the polling function of the active interfaces. Note that
76 * interfaces may not remove themselves as a result of polling!
77 */
78 TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
79 if (ifdev->ifdev_ops->iop_poll != NULL)
80 ifdev->ifdev_ops->iop_poll(ifdev);
81 }
82 }
83
84 /*
85 * Handle an incoming packet on an interface. This function assumes ownership
86 * of the packet buffers: the caller must no longer refer to it afterward. For
87 * packets looped back for a non-loopback interface, 'ifdev' is the loopback
88 * interface and 'netif' is the original (non-loopback) interface's netif. For
89 * other packets, 'ifdev' is the actual interface and 'netif' is NULL. The
90 * packet is passed to BPF devices only if 'to_bpf' is set.
91 */
92 void
ifdev_input(struct ifdev * ifdev,struct pbuf * pbuf,struct netif * netif,int to_bpf)93 ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
94 int to_bpf)
95 {
96 struct bpfdev_link *bpfl;
97 err_t err;
98
99 /*
100 * Looped-back packets are captured on the loopback device, not on the
101 * original interface. Similarly, we account the traffic to the
102 * loopback interface. This is a policy decision (inspired by NetBSD's
103 * behavior) and may be changed later.
104 */
105 if (to_bpf) {
106 TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
107 bpfdev_input(bpfl, pbuf);
108 }
109
110 ifdev->ifdev_data.ifi_ipackets++;
111 ifdev->ifdev_data.ifi_ibytes += pbuf->tot_len;
112
113 if (pbuf->flags & PBUF_FLAG_LLMCAST)
114 ifdev->ifdev_data.ifi_imcasts++;
115
116 /*
117 * For looped-back packets, we must bypass the regular netif input
118 * function (as that one is for link-layer packet handling) and instead
119 * pass it directly to the IP-layer packet handling function of lwIP.
120 */
121 if (netif != NULL)
122 err = ip_input(pbuf, netif);
123 else
124 err = ifdev->ifdev_netif.input(pbuf, &ifdev->ifdev_netif);
125
126 if (err != ERR_OK)
127 pbuf_free(pbuf);
128 }
129
130 /*
131 * Handle an outgoing packet on an interface. Return ERR_OK if the packet was
132 * transmitted or another lwIP ERR_ error code upon failure. Either way, the
133 * caller is responsible for freeing the packet buffers. If the packet is
134 * to be looped back to a non-loopback interface (because its destination is a
135 * local address), 'ifdev' is the loopback interface and 'netif' is set to the
136 * original interface's netif. In all other cases, 'ifdev' is the packet's
137 * source interface and 'netif' is NULL. The packet is passed to attached BPF
138 * devices only if 'to_bpf' is set. If 'hdrcmplt' is set, the source address
139 * of the data link header is already filled in; otherwise, the source address
140 * must be set to the device's source address, if applicable.
141 */
142 err_t
ifdev_output(struct ifdev * ifdev,struct pbuf * pbuf,struct netif * netif,int to_bpf,int hdrcmplt)143 ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
144 int to_bpf, int hdrcmplt)
145 {
146 struct bpfdev_link *bpfl;
147
148 /*
149 * If the interface and/or the link is down, discard the packet without
150 * reporting it to BPF or the actual interface module.
151 */
152 if (!ifdev_is_up(ifdev) || !ifdev_is_link_up(ifdev))
153 return ERR_IF; /* this should translate to ENETDOWN */
154
155 /*
156 * If the link-layer header is not yet complete, fill in the source
157 * address now. This exception applies to BPF-generated packets only.
158 * Complete the header before passing the packet back to BPF, which
159 * should see the completed version of the packet.
160 */
161 if (!hdrcmplt && ifdev->ifdev_ops->iop_hdrcmplt != NULL)
162 ifdev->ifdev_ops->iop_hdrcmplt(ifdev, pbuf);
163
164 /*
165 * As in ifdev_input(), we use the loopback interface for BPF and
166 * statistics even if the packet originates from a non-loopback device.
167 */
168 if (to_bpf) {
169 TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
170 bpfdev_output(bpfl, pbuf);
171 }
172
173 ifdev->ifdev_data.ifi_opackets++;
174 ifdev->ifdev_data.ifi_obytes += pbuf->tot_len;
175
176 /*
177 * TODO: this is rather imprecise, because it works only when we set
178 * the pbuf flag explicitly ourselves. That happens only for UDP/RAW
179 * packets, and not for (e.g.) ND6 multicast traffic. We have reasons
180 * to set the flags ourselves anyway, namely to support MSG_MCAST and
181 * MSG_BCAST on loopback interfaces, but they should be complemented by
182 * additional checks here on, say, the destination ethernet address.
183 */
184 if (pbuf->flags & PBUF_FLAG_LLMCAST)
185 ifdev->ifdev_data.ifi_omcasts++;
186
187 return ifdev->ifdev_ops->iop_output(ifdev, pbuf, netif);
188 }
189
190 /*
191 * Transmit an IPv4 packet on an interface, as requested by lwIP. Pass on the
192 * packet to the interface's link processor (e.g., etharp), unless the packet
193 * should be rejected or blackholed according to route information, or it is to
194 * be looped back into the interface. The latter may occur if the destination
195 * address belongs to the interface. In that case, we send the packet over a
196 * loopback interface instead. In addition, if this is a multicast packet that
197 * should be looped back, send a copy over a loopback interface as well.
198 * Loopback interfaces themselves are exempt from these special cases.
199 */
200 static err_t
ifdev_output_v4(struct netif * netif,struct pbuf * pbuf,const ip4_addr_t * ipaddr)201 ifdev_output_v4(struct netif * netif, struct pbuf * pbuf,
202 const ip4_addr_t * ipaddr)
203 {
204 struct ifdev *ifdev = netif_get_ifdev(netif);
205 err_t err;
206
207 assert(ifdev_loopback != NULL);
208
209 /* Check for reject/blackhole routes. */
210 if (!route_output_v4(ifdev, ipaddr, &err))
211 return err;
212
213 /* Handle looping of multicast packets on non-loopback interfaces. */
214 if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
215 (void)ifdev_output(ifdev_loopback, pbuf, netif,
216 FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
217
218 /* Divert packets sent to the local interface address. */
219 if (!ifdev_is_loopback(ifdev) && ifdev->ifdev_v4set &&
220 ip4_addr_cmp(netif_ip4_addr(&ifdev->ifdev_netif), ipaddr))
221 ifdev = ifdev_loopback;
222 else
223 netif = NULL;
224
225 if (ifdev->ifdev_ops->iop_output_v4 != NULL)
226 return ifdev->ifdev_ops->iop_output_v4(ifdev_get_netif(ifdev),
227 pbuf, ipaddr);
228 else
229 return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
230 TRUE /*hdrcmplt*/);
231 }
232
233 /*
234 * Transmit an IPv6 packet on an interface, as requested by lwIP. As for IPv4.
235 */
236 static err_t
ifdev_output_v6(struct netif * netif,struct pbuf * pbuf,const ip6_addr_t * ipaddr)237 ifdev_output_v6(struct netif * netif, struct pbuf * pbuf,
238 const ip6_addr_t * ipaddr)
239 {
240 struct ifdev *ifdev = netif_get_ifdev(netif);
241 err_t err;
242
243 assert(ifdev_loopback != NULL);
244
245 /* Check for reject/blackhole routes. */
246 if (!route_output_v6(ifdev, ipaddr, &err))
247 return err;
248
249 /* Handle looping of multicast packets on non-loopback interfaces. */
250 if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
251 (void)ifdev_output(ifdev_loopback, pbuf, netif,
252 FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
253
254 /* Divert packets sent to the local interface address. */
255 if (!ifdev_is_loopback(ifdev) &&
256 (netif_get_ip6_addr_match(&ifdev->ifdev_netif, ipaddr) != -1 ||
257 ip6_addr_ismulticast_iflocal(ipaddr)))
258 ifdev = ifdev_loopback;
259 else
260 netif = NULL;
261
262 if (ifdev->ifdev_ops->iop_output_v6 != NULL)
263 return ifdev->ifdev_ops->iop_output_v6(ifdev_get_netif(ifdev),
264 pbuf, ipaddr);
265 else
266 return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
267 TRUE /*hdrcmplt*/);
268 }
269
270 /*
271 * Status callback function, called by lwIP whenever certain status changes are
272 * made on the netif. These changes may be initiated either by lwIP itself or
273 * by us. We use this callback to check lwIP-initiated state changes on local
274 * IPv6 addresses, using shadow state to filter out self-initiated changes.
275 *
276 * One day we might switch to the extended netif callback mechanism offered by
277 * lwIP. Currently, netif state changes are rare and it takes us little effort
278 * to find out whether anything changed, so there is no immediate need.
279 */
280 static void
ifdev_status_callback(struct netif * netif)281 ifdev_status_callback(struct netif * netif)
282 {
283 struct ifdev *ifdev = netif_get_ifdev(netif);
284
285 ifaddr_v6_check(ifdev);
286 }
287
288 /*
289 * Initialize the netif structure for a new interface. Most of this is handled
290 * by the specific interface module.
291 */
292 static err_t
ifdev_init_netif(struct netif * netif)293 ifdev_init_netif(struct netif * netif)
294 {
295 struct ifdev *ifdev = netif_get_ifdev(netif);
296
297 assert(ifdev != NULL);
298
299 netif->output = ifdev_output_v4;
300 netif->output_ip6 = ifdev_output_v6;
301
302 netif->hwaddr_len = ifdev->ifdev_data.ifi_addrlen;
303 netif->mtu = ifdev->ifdev_data.ifi_mtu;
304
305 netif_set_status_callback(netif, ifdev_status_callback);
306
307 return ifdev->ifdev_ops->iop_init(ifdev, netif);
308 }
309
310 /*
311 * Retrieve an interface device by its interface index. Return a pointer to
312 * the interface device if found, or NULL otherwise. If the given interface
313 * index is zero, this function will always return NULL.
314 */
315 struct ifdev *
ifdev_get_by_index(uint32_t ifindex)316 ifdev_get_by_index(uint32_t ifindex)
317 {
318
319 if (ifindex >= __arraycount(ifdev_table))
320 return NULL;
321
322 return ifdev_table[ifindex];
323 }
324
325 /*
326 * Find an interface device by its name. Return a pointer to the interface
327 * device if found, or NULL otherwise.
328 */
329 struct ifdev *
ifdev_find_by_name(const char * name)330 ifdev_find_by_name(const char * name)
331 {
332 struct ifdev *ifdev;
333
334 TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
335 if (!strcmp(ifdev->ifdev_name, name))
336 return ifdev;
337 }
338
339 return NULL;
340 }
341
342 /*
343 * Given either NULL or a previously returned interface device object pointer,
344 * return the first or next interface device object pointer, or NULL if there
345 * are no more.
346 */
347 struct ifdev *
ifdev_enum(struct ifdev * last)348 ifdev_enum(struct ifdev * last)
349 {
350
351 if (last == NULL)
352 return TAILQ_FIRST(&ifdev_list);
353 else
354 return TAILQ_NEXT(last, ifdev_next);
355 }
356
357 /*
358 * Attach a BPF device as listener to this interface.
359 */
360 void
ifdev_attach_bpf(struct ifdev * ifdev,struct bpfdev_link * bpfl)361 ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
362 {
363
364 TAILQ_INSERT_TAIL(&ifdev->ifdev_bpf, bpfl, bpfl_next);
365 }
366
367 /*
368 * Detach a previously attached BPF device from this interface.
369 */
370 void
ifdev_detach_bpf(struct ifdev * ifdev,struct bpfdev_link * bpfl)371 ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
372 {
373
374 TAILQ_REMOVE(&ifdev->ifdev_bpf, bpfl, bpfl_next);
375 }
376
377 /*
378 * Register the calling party as interested in putting the interface in
379 * promiscuous mode. There may be multiple such parties, each of which can
380 * call this function once, after which they must call ifdev_clear_promisc()
381 * later. If possible, the interface is put in promiscuous mode if there is at
382 * least one interested party. Return TRUE on success, or FALSE on failure.
383 */
384 int
ifdev_set_promisc(struct ifdev * ifdev)385 ifdev_set_promisc(struct ifdev * ifdev)
386 {
387
388 /*
389 * A bit silly, but we want to retain the ability to fail this call for
390 * other reasons in the future, with BPF handling that case properly.
391 */
392 if (ifdev->ifdev_promisc == UINT_MAX)
393 return FALSE;
394
395 if (ifdev->ifdev_promisc++ == 0) {
396 ifdev_update_ifflags(ifdev,
397 ifdev->ifdev_ifflags | IFF_PROMISC);
398
399 if (ifdev->ifdev_ops->iop_set_promisc != NULL)
400 ifdev->ifdev_ops->iop_set_promisc(ifdev, TRUE);
401 }
402
403 return TRUE;
404 }
405
406 /*
407 * Deregister a previously registered party interested in putting the interface
408 * in promiscuous mode. Once the last party deregisters, the device is pulled
409 * out of promiscuous mode.
410 */
411 void
ifdev_clear_promisc(struct ifdev * ifdev)412 ifdev_clear_promisc(struct ifdev * ifdev)
413 {
414
415 assert(ifdev->ifdev_promisc > 0);
416
417 if (--ifdev->ifdev_promisc == 0) {
418 if (ifdev->ifdev_ops->iop_set_promisc != NULL)
419 ifdev->ifdev_ops->iop_set_promisc(ifdev, FALSE);
420
421 ifdev_update_ifflags(ifdev,
422 ifdev->ifdev_ifflags & ~IFF_PROMISC);
423 }
424 }
425
426 /*
427 * Set NetBSD-style interface flags (IFF_) for an interface.
428 */
429 int
ifdev_set_ifflags(struct ifdev * ifdev,unsigned int ifflags)430 ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
431 {
432 int r;
433
434 /* Check and update only the subset of flags that may be changed. */
435 ifflags &= ~(IFF_CANTCHANGE | IFF_LOOPBACK);
436
437 /*
438 * Important: the callback function may call ifdev_update_ifflags()
439 * itself immediately, to update read-only flags such as IFF_RUNNING
440 * based on read-write flags such as IFF_UP. So as to make that work..
441 *
442 * 1) this function MUST succeed if the callback function succeeds;
443 * 2) this function MUST NOT make assumptions about the ifdev_ifflags
444 * field across the callback invocation.
445 *
446 * Conversely, the callback function should be aware that the flags
447 * field will still be updated with the flags. In this model, it is
448 * not possible for the callback function to silently change any of the
449 * given flags. If that is ever necessary, API changes are needed.
450 */
451 if ((r = ifdev->ifdev_ops->iop_set_ifflags(ifdev, ifflags)) != OK)
452 return r;
453
454 /*
455 * On success, merge the updated subset with the subset that may not be
456 * changed.
457 */
458 ifflags |= ifdev->ifdev_ifflags & (IFF_CANTCHANGE | IFF_LOOPBACK);
459
460 ifdev_update_ifflags(ifdev, ifflags);
461
462 return OK;
463 }
464
465 /*
466 * Update NetBSD-style interface flags (IFF_) for an interface, and perform any
467 * required operations as a result of certain flags changing. This function
468 * bypasses all input checks and directly changes the flags field to exactly
469 * the given set of flags.
470 */
471 void
ifdev_update_ifflags(struct ifdev * ifdev,unsigned int ifflags)472 ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags)
473 {
474 struct netif *netif;
475
476 /*
477 * First update the flags field itself. The new value should be
478 * visible in the routing messages generated below, for example.
479 */
480 ifdev->ifdev_ifflags = ifflags;
481
482 /*
483 * Then perform operations as a result of the flags field changing.
484 * For now, this is relevant for IFF_UP only.
485 */
486 netif = ifdev_get_netif(ifdev);
487
488 if ((ifflags & IFF_UP) && !netif_is_up(netif)) {
489 netif_set_up(netif);
490
491 rtsock_msg_ifinfo(ifdev);
492
493 /*
494 * Check if all conditions are now met for link-local IPv6
495 * address assignment.
496 */
497 ifaddr_v6_set_linklocal(ifdev);
498
499 /* See if we should also reset address states now. */
500 if (netif_is_link_up(netif))
501 ifaddr_v6_set_up(ifdev);
502 } else if (!(ifflags & IFF_UP) && netif_is_up(netif)) {
503 netif_set_down(netif);
504
505 rtsock_msg_ifinfo(ifdev);
506 }
507 }
508
509 /*
510 * Retrieve NetBSD-style interface capabilities (IFCAP_) for an interface: both
511 * the supported and the enabled capabilities.
512 */
513 void
ifdev_get_ifcap(struct ifdev * ifdev,uint64_t * ifcap,uint64_t * ifena)514 ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena)
515 {
516
517 *ifcap = 0;
518 *ifena = 0;
519
520 if (ifdev->ifdev_ops->iop_get_ifcap != NULL)
521 ifdev->ifdev_ops->iop_get_ifcap(ifdev, ifcap, ifena);
522 }
523
524 /*
525 * Set enabled NetBSD-style interface capabilities (IFCAP_) for an interface.
526 */
527 int
ifdev_set_ifcap(struct ifdev * ifdev,uint64_t ifena)528 ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena)
529 {
530
531 if (ifdev->ifdev_ops->iop_set_ifcap != NULL)
532 return ifdev->ifdev_ops->iop_set_ifcap(ifdev, ifena);
533 else
534 return EINVAL;
535 }
536
537 /*
538 * Retrieve NetBSD-style media type (IFM_) for an interface. Return OK on
539 * success, with the current media type selection stored in 'ifcurrent', the
540 * driver-reported active media type in 'ifactive', and the link status in
541 * 'ifstatus'. Return a negative error code on failure.
542 */
543 int
ifdev_get_ifmedia(struct ifdev * ifdev,int * ifcurrent,int * ifactive)544 ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive)
545 {
546
547 if (ifdev->ifdev_ops->iop_get_ifmedia == NULL)
548 return ENOTTY;
549
550 ifdev->ifdev_ops->iop_get_ifmedia(ifdev, ifcurrent, ifactive);
551
552 return OK;
553 }
554
555 /*
556 * Set NetBSD-style media type (IFM_) for an interface. Return OK on success,
557 * or a negative error code on failure.
558 */
559 int
ifdev_set_ifmedia(struct ifdev * ifdev,int ifmedia)560 ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia)
561 {
562
563 if (ifdev->ifdev_ops->iop_set_ifmedia == NULL)
564 return ENOTTY;
565
566 if (ifmedia < 0)
567 return EINVAL;
568
569 return ifdev->ifdev_ops->iop_set_ifmedia(ifdev, ifmedia);
570 }
571
572 /*
573 * Set the Maximum Transmission Unit for an interface. Return OK on success,
574 * or a negative error code on failure.
575 */
576 int
ifdev_set_mtu(struct ifdev * ifdev,unsigned int mtu)577 ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu)
578 {
579
580 if (ifdev->ifdev_ops->iop_set_mtu == NULL)
581 return ENOTTY;
582
583 if (mtu < IFDEV_MIN_MTU || mtu > UINT16_MAX ||
584 !ifdev->ifdev_ops->iop_set_mtu(ifdev, mtu))
585 return EINVAL;
586
587 ifdev->ifdev_data.ifi_mtu = mtu;
588 ifdev->ifdev_netif.mtu = mtu;
589
590 return OK;
591 }
592
593 /*
594 * Set IPv6 Neighbor Discovery related flags.
595 */
596 int
ifdev_set_nd6flags(struct ifdev * ifdev,uint32_t nd6flags)597 ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags)
598 {
599
600 /* For now, refuse setting any flags that are not even known. */
601 if ((nd6flags & ~(ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV |
602 ND6_IFF_IFDISABLED | ND6_IFF_OVERRIDE_RTADV |
603 ND6_IFF_AUTO_LINKLOCAL)) != 0)
604 return EINVAL;
605
606 /*
607 * Unfortunately, the mismatch between NetBSD and lwIP requires us to
608 * support but butcher ND6 flags. The current status is as follows:
609 *
610 * - ND6_IFF_PERFORMNUD: set by default as lwIP always implements NUD;
611 * changes are disregarded but possible, for dhcpcd(8).
612 * - ND6_IFF_ACCEPT_RTADV: disregarded but settable, for dhcpcd(8); in
613 * our case, lwIP always processes router advertisements but never
614 * autoconfigures addresses, so this flag has no meaning for us.
615 * - ND6_IFF_IFDISABLED: not supported; can only be cleared; we could
616 * probably do detection of link-local address collision and set this
617 * flag (and disable the interface if set) when that happens; TODO.
618 * - ND6_IFF_OVERRIDE_RTADV: same as _ACCEPT_ above.
619 * - ND6_IFF_AUTO_LINKLOCAL: supported, but not initialized based on
620 * the corresponding sysctl(7) flag for reasons mentioned in ifaddr.
621 */
622 if (nd6flags & ND6_IFF_IFDISABLED)
623 return EINVAL;
624
625 ifdev->ifdev_nd6flags = nd6flags;
626
627 return OK;
628 }
629
630 /*
631 * Report an update to the interface's active hardware address that is *not*
632 * the result of a user action. If the 'is_factory' flag is set, the address
633 * is the factory (driver-given) address. This function is for use by
634 * interface modules, to update the internal state to their current external
635 * state.
636 */
637 void
ifdev_update_hwaddr(struct ifdev * ifdev,const uint8_t * hwaddr,int is_factory)638 ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
639 int is_factory)
640 {
641
642 return ifaddr_dl_update(ifdev, hwaddr, is_factory);
643 }
644
645 /*
646 * Insert a new interface device into the list of interface devices, at a
647 * location determined by policy.
648 */
649 static void
ifdev_insert(struct ifdev * ifdev)650 ifdev_insert(struct ifdev * ifdev)
651 {
652 struct ifdev *ifdev2;
653 const char *p;
654 unsigned int unit, unit2;
655 size_t namelen;
656 int found;
657
658 /*
659 * While NetBSD can set up all interfaces in the order it wants them to
660 * appear in, we do not have such luxury: network device drivers come
661 * up and report to us in no particular predefined order, and we have
662 * no way to know how many and which will appear. The result is that
663 * we always have to create the loopback device first, something that
664 * is explicitly said to be bad in NetBSD. Instead, we create an
665 * illusion of a reasonable order by performing insertion sort on the
666 * interface list, using (for now) these rules, ordered by priority:
667 *
668 * 1. same-named devices are sorted by their unit number;
669 * 2. loopback interfaces are inserted after all other interfaces;
670 * 3. new devices are added at the end of their type category.
671 *
672 * In the future, other forms of real-vs-virtual sorting may be added.
673 */
674
675 /* First check for same-named devices (#1). */
676 for (p = ifdev->ifdev_name; *p != '\0' && (*p < '0' || *p > '9'); p++);
677
678 namelen = (size_t)(p - ifdev->ifdev_name);
679
680 for (unit = 0; *p >= '0' && *p <= '9'; p++)
681 unit = unit * 10 + *p - '0';
682
683 found = FALSE;
684 TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
685 if (!strncmp(ifdev->ifdev_name, ifdev2->ifdev_name, namelen) &&
686 *(p = &ifdev2->ifdev_name[namelen]) >= '0' && *p <= '9') {
687 for (unit2 = 0; *p >= '0' && *p <= '9'; p++)
688 unit2 = unit2 * 10 + *p - '0';
689
690 assert(unit != unit2);
691
692 found = TRUE;
693 if (unit2 > unit)
694 break;
695 } else if (found)
696 break;
697 }
698
699 if (found) {
700 if (ifdev2 != NULL)
701 TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
702 else
703 TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
704
705 return;
706 }
707
708 /*
709 * No same-named device found. Is this a loopback interface? If not,
710 * insert before the first loopback device, if any.
711 */
712 if (!ifdev_is_loopback(ifdev)) {
713 TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
714 if (ifdev_is_loopback(ifdev2)) {
715 TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
716
717 return;
718 }
719 }
720 }
721
722 /*
723 * The given device is not a loopback device, or there was no loopback
724 * device in the list, possibly because it was empty. Add to the tail.
725 */
726 TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
727 }
728
729 /*
730 * Add and initialize an interface device.
731 */
732 void
ifdev_add(struct ifdev * ifdev,const char * name,unsigned int ifflags,unsigned int iftype,size_t hdrlen,size_t addrlen,unsigned int dlt,unsigned int mtu,uint32_t nd6flags,const struct ifdev_ops * iop)733 ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
734 unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
735 unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop)
736 {
737 unsigned int ifindex;
738 ip4_addr_t ip4addr_any, ip4addr_none;
739
740 /*
741 * Since the call to netif_add() may end up invoking some of our
742 * callbacks (the add-multicast-address ones in particular), make sure
743 * that everything else is set up first. We cannot set up the index
744 * mapping until netif_add() returns, but this is currently no problem.
745 */
746 strlcpy(ifdev->ifdev_name, name, sizeof(ifdev->ifdev_name));
747 ifdev->ifdev_ifflags = 0; /* will be updated below */
748 ifdev->ifdev_dlt = dlt;
749 ifdev->ifdev_nd6flags = nd6flags;
750 ifdev->ifdev_ops = iop;
751
752 memset(&ifdev->ifdev_data, 0, sizeof(ifdev->ifdev_data));
753
754 assert(addrlen <= NETIF_MAX_HWADDR_LEN);
755 assert(mtu >= IFDEV_MIN_MTU && mtu <= UINT16_MAX);
756
757 ifdev->ifdev_data.ifi_type = iftype;
758 ifdev->ifdev_data.ifi_hdrlen = hdrlen;
759 ifdev->ifdev_data.ifi_addrlen = addrlen;
760 ifdev->ifdev_data.ifi_link_state = LINK_STATE_UNKNOWN;
761 ifdev->ifdev_data.ifi_mtu = mtu;
762
763 TAILQ_INIT(&ifdev->ifdev_bpf);
764
765 ifaddr_init(ifdev);
766
767 /*
768 * We have to assign an IPv4 address at netif addition time, but we may
769 * not have one yet, so pass in an "any" address for now. Hopefully
770 * lwIP will not mistake this for a real IPv4 address if we happen to
771 * enable the interface with only an IPv6 address later on.
772 */
773 ip4_addr_set_any(&ip4addr_any);
774 ip4_addr_set_u32(&ip4addr_none, PP_HTONL(INADDR_NONE));
775
776 /*
777 * Insert the new interface device into a sensible place in the current
778 * list of interfaces.
779 */
780 ifdev_insert(ifdev);
781
782 /*
783 * netif_add() can fail only as a result of the initialization callback
784 * failing, which is something that should never happen in our case.
785 */
786 if (netif_add(&ifdev->ifdev_netif, &ip4addr_any, &ip4addr_none,
787 &ip4addr_any, ifdev, ifdev_init_netif, iop->iop_input) == NULL)
788 panic("unable to add netif");
789
790 /*
791 * Set up the index mapping. Since interface index zero never
792 * generated, table slot zero is always NULL. We could shift all
793 * elements by one to save four bytes, but there's no real point.
794 */
795 ifindex = netif_get_index(&ifdev->ifdev_netif);
796
797 if (ifindex == 0 || ifindex >= __arraycount(ifdev_table))
798 panic("invalid lwIP-generated interface index %u", ifindex);
799
800 ifdev_table[ifindex] = ifdev;
801
802 /*
803 * Set the initial interface flags. Use the regular procedure for this
804 * just in case the interface module is crazy enough to set the
805 * interface up right away (which is never a good idea but still).
806 */
807 ifdev_update_ifflags(ifdev, ifflags);
808
809 /*
810 * If this is the first loopback interface to be registered, save it as
811 * the loopback interface that we will use to loop back self-destined
812 * packets on other interfaces. Do this after setting the interface
813 * flags, since those are what we use to perform this loopback check.
814 */
815 if (ifdev_loopback == NULL && ifdev_is_loopback(ifdev))
816 ifdev_loopback = ifdev;
817
818 /* Finally, announce the new interface. */
819 rtsock_msg_ifannounce(ifdev, TRUE /*arrival*/);
820 }
821
822 /*
823 * Remove an interface device. Return OK on success, or a negative error code
824 * on failure. Only loopback interfaces may be refused for removal.
825 */
826 int
ifdev_remove(struct ifdev * ifdev)827 ifdev_remove(struct ifdev * ifdev)
828 {
829 struct bpfdev_link *bpfl;
830
831 /*
832 * If this is the loopback interface used to loop back packets for
833 * other interfaces (typically lo0), we cannot afford to get rid of it.
834 */
835 if (ifdev == ifdev_loopback)
836 return EPERM;
837
838 /*
839 * Take down the interface for the purpose of sending a routing
840 * message. NetBSD sends a RTM_IFINFO even if the interface was down
841 * already, and so we do not check whether IFF_UP was set at all here.
842 */
843 ifdev_update_ifflags(ifdev, ifdev->ifdev_ifflags & ~IFF_UP);
844
845 /*
846 * Report all associated addresses as deleted. It is not necessary to
847 * actually delete the addresses, nor is that even possible in all
848 * cases. In particular, the active hardware address cannot be
849 * deleted. Since the active hardware address is used in all address
850 * change announcements, delete it at the very end.
851 */
852 ifaddr_v4_clear(ifdev);
853 ifaddr_v6_clear(ifdev);
854 ifaddr_dl_clear(ifdev);
855
856 /*
857 * Delete all remaining routes associated with the interface. These
858 * are reported as well. We do this after clearing the addresses so as
859 * not to confuse the route deletion part of clearing addresses.
860 */
861 route_clear(ifdev);
862
863 /* Finally, announce the interface itself as gone. */
864 rtsock_msg_ifannounce(ifdev, FALSE /*arrival*/);
865
866 /*
867 * Free up all per-socket multicast membership structures associated to
868 * the interface. There is no need to leave the multicast groups.
869 */
870 mcast_clear(ifdev);
871
872 /*
873 * Also tell attached BPF devices that the interface is now gone. Do
874 * not bother to reset the list.
875 */
876 TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
877 bpfdev_detach(bpfl);
878
879 /* Then perform the actual interface removal. */
880 netif_remove(&ifdev->ifdev_netif);
881
882 TAILQ_REMOVE(&ifdev_list, ifdev, ifdev_next);
883
884 assert(ifdev_table[ifdev_get_index(ifdev)] == ifdev);
885 ifdev_table[ifdev_get_index(ifdev)] = NULL;
886
887 return OK;
888 }
889
890 /*
891 * Return the loopback interface.
892 */
893 struct ifdev *
ifdev_get_loopback(void)894 ifdev_get_loopback(void)
895 {
896
897 assert(ifdev_loopback != NULL);
898
899 return ifdev_loopback;
900 }
901
902 /*
903 * Report an update of the link state of the given interface, to 'unknown',
904 * 'up', or 'down', using NetBSD's LINK_STATE_ values. The link state is
905 * changed in the associated lwIP netif, and is reported on monitoring routing
906 * sockets. This function is for use by interface modules, to update the
907 * internal state to their current external state.
908 */
909 void
ifdev_update_link(struct ifdev * ifdev,int iflink)910 ifdev_update_link(struct ifdev * ifdev, int iflink)
911 {
912 struct netif *netif;
913 int was_up, is_up;
914
915 ifdev->ifdev_data.ifi_link_state = iflink;
916
917 /*
918 * For netif, 'up' and 'unknown' are the same link state: we simply try
919 * to send and receive packets in both cases. Thus, transitions from
920 * and to the 'down' link state are the ones that matter.
921 */
922 netif = ifdev_get_netif(ifdev);
923
924 was_up = netif_is_link_up(netif);
925 is_up = (iflink != LINK_STATE_DOWN);
926
927 if (was_up != is_up) {
928 if (is_up) {
929 netif_set_link_up(netif);
930
931 /* See if we should also reset address states now. */
932 if (ifdev_is_up(ifdev))
933 ifaddr_v6_set_up(ifdev);
934 } else
935 netif_set_link_down(netif);
936
937 rtsock_msg_ifinfo(ifdev);
938 }
939 }
940
941 /*
942 * Register a virtual interface type, using a name prefix and a function that
943 * is called when creation of a virtual interface of that type is requested.
944 */
945 void
ifdev_register(const char * name,int (* create)(const char *))946 ifdev_register(const char * name, int (* create)(const char *))
947 {
948
949 if (ifdev_vtypes == __arraycount(ifdev_vtype))
950 panic("too few slots for all virtual interface types");
951
952 ifdev_vtype[ifdev_vtypes].ifvt_name = name;
953 ifdev_vtype[ifdev_vtypes].ifvt_namelen = strlen(name);
954 ifdev_vtype[ifdev_vtypes].ifvt_create = create;
955 ifdev_vtypes++;
956 }
957
958 /*
959 * Verify that the given name is a valid interface name that can be used for
960 * creating a new interface. In particular, check that the given name is a
961 * valid interface name, consisting of an alphabetic string (the interface type
962 * or driver name) followed by a number string (the unit or instance number).
963 * Furthermore, make sure that the name does not already exist. Finally, see
964 * if the name prefix is reserved for a virtual interface type. If the given
965 * 'vtype_slot' pointer is not NULL, the prefix must be, and the virtual type
966 * slot number is returned in 'vtype_slot' on success. If 'vtype_slot' is
967 * NULL, the name must not have a virtual interface prefix, and an error is
968 * returned if it is. Since vtype slot numbers are meaningless outside of this
969 * module, external callers must always pass in NULL. This function returns OK
970 * on succes or a negative error code on error.
971 */
972 int
ifdev_check_name(const char * name,unsigned int * vtype_slot)973 ifdev_check_name(const char * name, unsigned int * vtype_slot)
974 {
975 const char *p;
976 size_t namelen;
977 unsigned int slot;
978
979 /*
980 * First see if the name is valid at all. TODO: decide if we want to
981 * allow uppercase letters, dashes, and/or underscores.
982 */
983 for (p = name; *p >= 'a' && *p <= 'z'; p++);
984
985 if (p == name || *p == '\0')
986 return EINVAL;
987
988 namelen = (size_t)(p - name);
989
990 for (; *p >= '0' && *p <= '9'; p++);
991
992 if (*p != '\0')
993 return EINVAL;
994
995 /* Then make sure that it does not already exist. */
996 if (ifdev_find_by_name(name) != NULL)
997 return EEXIST;
998
999 /* See if there is a matching virtual interface type for the name. */
1000 for (slot = 0; slot < ifdev_vtypes; slot++) {
1001 if (ifdev_vtype[slot].ifvt_namelen == namelen &&
1002 !strncmp(ifdev_vtype[slot].ifvt_name, name, namelen))
1003 break;
1004 }
1005
1006 /* The interpretation of the result depends on 'vtype_slot'. */
1007 if (vtype_slot != NULL) {
1008 if (slot == ifdev_vtypes)
1009 return EINVAL;
1010
1011 *vtype_slot = slot;
1012 } else if (slot != ifdev_vtypes)
1013 return EINVAL;
1014
1015 return OK;
1016 }
1017
1018 /*
1019 * Create a new virtual interface. The virtual interface type is based on the
1020 * given name (without unit number). Return OK if the virtual interface has
1021 * been successfully created, or a negative error code otherwise. This
1022 * function is used both for the SIOCIFCREATE ioctl and internally.
1023 */
1024 int
ifdev_create(const char * name)1025 ifdev_create(const char * name)
1026 {
1027 unsigned int slot;
1028 int r;
1029
1030 /* Verify that the given name is an acceptable interface name. */
1031 if ((r = ifdev_check_name(name, &slot)) != OK)
1032 return EINVAL;
1033
1034 /* Let the virtual interface implementation handle the rest. */
1035 return ifdev_vtype[slot].ifvt_create(name);
1036 }
1037
1038 /*
1039 * Destroy an interface, if possible.
1040 */
1041 int
ifdev_destroy(struct ifdev * ifdev)1042 ifdev_destroy(struct ifdev * ifdev)
1043 {
1044
1045 if (ifdev->ifdev_ops->iop_destroy == NULL)
1046 return EINVAL;
1047
1048 return ifdev->ifdev_ops->iop_destroy(ifdev);
1049 }
1050
1051 /*
1052 * Enumerate the names of currently supported virtual interface types. Return
1053 * a pointer to the null-terminated name prefix of the Nth virtual interface
1054 * type if the (zero-based) N value is within range, or NULL otherwise.
1055 */
1056 const char *
ifdev_enum_vtypes(unsigned int num)1057 ifdev_enum_vtypes(unsigned int num)
1058 {
1059
1060 if (num < ifdev_vtypes)
1061 return ifdev_vtype[num].ifvt_name;
1062 else
1063 return NULL;
1064 }
1065