1 /* $NetBSD: if_tap.c,v 1.84 2016/06/10 13:27:16 ozaki-r Exp $ */
2
3 /*
4 * Copyright (c) 2003, 2004, 2008, 2009 The NetBSD Foundation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * tap(4) is a virtual Ethernet interface. It appears as a real Ethernet
31 * device to the system, but can also be accessed by userland through a
32 * character device interface, which allows reading and injecting frames.
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: if_tap.c,v 1.84 2016/06/10 13:27:16 ozaki-r Exp $");
37
38 #if defined(_KERNEL_OPT)
39
40 #include "opt_modular.h"
41 #include "opt_compat_netbsd.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/conf.h>
49 #include <sys/cprng.h>
50 #include <sys/device.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/poll.h>
54 #include <sys/proc.h>
55 #include <sys/select.h>
56 #include <sys/sockio.h>
57 #if defined(COMPAT_40) || defined(MODULAR)
58 #include <sys/sysctl.h>
59 #endif
60 #include <sys/kauth.h>
61 #include <sys/mutex.h>
62 #include <sys/intr.h>
63 #include <sys/stat.h>
64
65 #include <net/if.h>
66 #include <net/if_dl.h>
67 #include <net/if_ether.h>
68 #include <net/if_media.h>
69 #include <net/if_tap.h>
70 #include <net/bpf.h>
71
72 #include <compat/sys/sockio.h>
73
74 #include "ioconf.h"
75
76 #if defined(COMPAT_40) || defined(MODULAR)
77 /*
78 * sysctl node management
79 *
80 * It's not really possible to use a SYSCTL_SETUP block with
81 * current module implementation, so it is easier to just define
82 * our own function.
83 *
84 * The handler function is a "helper" in Andrew Brown's sysctl
85 * framework terminology. It is used as a gateway for sysctl
86 * requests over the nodes.
87 *
88 * tap_log allows the module to log creations of nodes and
89 * destroy them all at once using sysctl_teardown.
90 */
91 static int tap_node;
92 static int tap_sysctl_handler(SYSCTLFN_PROTO);
93 SYSCTL_SETUP_PROTO(sysctl_tap_setup);
94 #endif
95
96 /*
97 * Since we're an Ethernet device, we need the 2 following
98 * components: a struct ethercom and a struct ifmedia
99 * since we don't attach a PHY to ourselves.
100 * We could emulate one, but there's no real point.
101 */
102
103 struct tap_softc {
104 device_t sc_dev;
105 struct ifmedia sc_im;
106 struct ethercom sc_ec;
107 int sc_flags;
108 #define TAP_INUSE 0x00000001 /* tap device can only be opened once */
109 #define TAP_ASYNCIO 0x00000002 /* user is using async I/O (SIGIO) on the device */
110 #define TAP_NBIO 0x00000004 /* user wants calls to avoid blocking */
111 #define TAP_GOING 0x00000008 /* interface is being destroyed */
112 struct selinfo sc_rsel;
113 pid_t sc_pgid; /* For async. IO */
114 kmutex_t sc_rdlock;
115 kmutex_t sc_kqlock;
116 void *sc_sih;
117 struct timespec sc_atime;
118 struct timespec sc_mtime;
119 struct timespec sc_btime;
120 };
121
122 /* autoconf(9) glue */
123
124 static int tap_match(device_t, cfdata_t, void *);
125 static void tap_attach(device_t, device_t, void *);
126 static int tap_detach(device_t, int);
127
128 CFATTACH_DECL_NEW(tap, sizeof(struct tap_softc),
129 tap_match, tap_attach, tap_detach, NULL);
130 extern struct cfdriver tap_cd;
131
132 /* Real device access routines */
133 static int tap_dev_close(struct tap_softc *);
134 static int tap_dev_read(int, struct uio *, int);
135 static int tap_dev_write(int, struct uio *, int);
136 static int tap_dev_ioctl(int, u_long, void *, struct lwp *);
137 static int tap_dev_poll(int, int, struct lwp *);
138 static int tap_dev_kqfilter(int, struct knote *);
139
140 /* Fileops access routines */
141 static int tap_fops_close(file_t *);
142 static int tap_fops_read(file_t *, off_t *, struct uio *,
143 kauth_cred_t, int);
144 static int tap_fops_write(file_t *, off_t *, struct uio *,
145 kauth_cred_t, int);
146 static int tap_fops_ioctl(file_t *, u_long, void *);
147 static int tap_fops_poll(file_t *, int);
148 static int tap_fops_stat(file_t *, struct stat *);
149 static int tap_fops_kqfilter(file_t *, struct knote *);
150
151 static const struct fileops tap_fileops = {
152 .fo_read = tap_fops_read,
153 .fo_write = tap_fops_write,
154 .fo_ioctl = tap_fops_ioctl,
155 .fo_fcntl = fnullop_fcntl,
156 .fo_poll = tap_fops_poll,
157 .fo_stat = tap_fops_stat,
158 .fo_close = tap_fops_close,
159 .fo_kqfilter = tap_fops_kqfilter,
160 .fo_restart = fnullop_restart,
161 };
162
163 /* Helper for cloning open() */
164 static int tap_dev_cloner(struct lwp *);
165
166 /* Character device routines */
167 static int tap_cdev_open(dev_t, int, int, struct lwp *);
168 static int tap_cdev_close(dev_t, int, int, struct lwp *);
169 static int tap_cdev_read(dev_t, struct uio *, int);
170 static int tap_cdev_write(dev_t, struct uio *, int);
171 static int tap_cdev_ioctl(dev_t, u_long, void *, int, struct lwp *);
172 static int tap_cdev_poll(dev_t, int, struct lwp *);
173 static int tap_cdev_kqfilter(dev_t, struct knote *);
174
175 const struct cdevsw tap_cdevsw = {
176 .d_open = tap_cdev_open,
177 .d_close = tap_cdev_close,
178 .d_read = tap_cdev_read,
179 .d_write = tap_cdev_write,
180 .d_ioctl = tap_cdev_ioctl,
181 .d_stop = nostop,
182 .d_tty = notty,
183 .d_poll = tap_cdev_poll,
184 .d_mmap = nommap,
185 .d_kqfilter = tap_cdev_kqfilter,
186 .d_discard = nodiscard,
187 .d_flag = D_OTHER
188 };
189
190 #define TAP_CLONER 0xfffff /* Maximal minor value */
191
192 /* kqueue-related routines */
193 static void tap_kqdetach(struct knote *);
194 static int tap_kqread(struct knote *, long);
195
196 /*
197 * Those are needed by the if_media interface.
198 */
199
200 static int tap_mediachange(struct ifnet *);
201 static void tap_mediastatus(struct ifnet *, struct ifmediareq *);
202
203 /*
204 * Those are needed by the ifnet interface, and would typically be
205 * there for any network interface driver.
206 * Some other routines are optional: watchdog and drain.
207 */
208
209 static void tap_start(struct ifnet *);
210 static void tap_stop(struct ifnet *, int);
211 static int tap_init(struct ifnet *);
212 static int tap_ioctl(struct ifnet *, u_long, void *);
213
214 /* Internal functions */
215 #if defined(COMPAT_40) || defined(MODULAR)
216 static int tap_lifaddr(struct ifnet *, u_long, struct ifaliasreq *);
217 #endif
218 static void tap_softintr(void *);
219
220 /*
221 * tap is a clonable interface, although it is highly unrealistic for
222 * an Ethernet device.
223 *
224 * Here are the bits needed for a clonable interface.
225 */
226 static int tap_clone_create(struct if_clone *, int);
227 static int tap_clone_destroy(struct ifnet *);
228
229 struct if_clone tap_cloners = IF_CLONE_INITIALIZER("tap",
230 tap_clone_create,
231 tap_clone_destroy);
232
233 /* Helper functionis shared by the two cloning code paths */
234 static struct tap_softc * tap_clone_creator(int);
235 int tap_clone_destroyer(device_t);
236
237 void
tapattach(int n)238 tapattach(int n)
239 {
240 int error;
241
242 error = config_cfattach_attach(tap_cd.cd_name, &tap_ca);
243 if (error) {
244 aprint_error("%s: unable to register cfattach\n",
245 tap_cd.cd_name);
246 (void)config_cfdriver_detach(&tap_cd);
247 return;
248 }
249
250 if_clone_attach(&tap_cloners);
251 }
252
253 /* Pretty much useless for a pseudo-device */
254 static int
tap_match(device_t parent,cfdata_t cfdata,void * arg)255 tap_match(device_t parent, cfdata_t cfdata, void *arg)
256 {
257
258 return (1);
259 }
260
261 void
tap_attach(device_t parent,device_t self,void * aux)262 tap_attach(device_t parent, device_t self, void *aux)
263 {
264 struct tap_softc *sc = device_private(self);
265 struct ifnet *ifp;
266 #if defined(COMPAT_40) || defined(MODULAR)
267 const struct sysctlnode *node;
268 int error;
269 #endif
270 uint8_t enaddr[ETHER_ADDR_LEN] =
271 { 0xf2, 0x0b, 0xa4, 0xff, 0xff, 0xff };
272 char enaddrstr[3 * ETHER_ADDR_LEN];
273
274 sc->sc_dev = self;
275 sc->sc_sih = NULL;
276 getnanotime(&sc->sc_btime);
277 sc->sc_atime = sc->sc_mtime = sc->sc_btime;
278 sc->sc_flags = 0;
279 selinit(&sc->sc_rsel);
280
281 /*
282 * Initialize the two locks for the device.
283 *
284 * We need a lock here because even though the tap device can be
285 * opened only once, the file descriptor might be passed to another
286 * process, say a fork(2)ed child.
287 *
288 * The Giant saves us from most of the hassle, but since the read
289 * operation can sleep, we don't want two processes to wake up at
290 * the same moment and both try and dequeue a single packet.
291 *
292 * The queue for event listeners (used by kqueue(9), see below) has
293 * to be protected too, so use a spin lock.
294 */
295 mutex_init(&sc->sc_rdlock, MUTEX_DEFAULT, IPL_NONE);
296 mutex_init(&sc->sc_kqlock, MUTEX_DEFAULT, IPL_VM);
297
298 if (!pmf_device_register(self, NULL, NULL))
299 aprint_error_dev(self, "couldn't establish power handler\n");
300
301 /*
302 * In order to obtain unique initial Ethernet address on a host,
303 * do some randomisation. It's not meant for anything but avoiding
304 * hard-coding an address.
305 */
306 cprng_fast(&enaddr[3], 3);
307
308 aprint_verbose_dev(self, "Ethernet address %s\n",
309 ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr));
310
311 /*
312 * Why 1000baseT? Why not? You can add more.
313 *
314 * Note that there are 3 steps: init, one or several additions to
315 * list of supported media, and in the end, the selection of one
316 * of them.
317 */
318 ifmedia_init(&sc->sc_im, 0, tap_mediachange, tap_mediastatus);
319 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T, 0, NULL);
320 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
321 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX, 0, NULL);
322 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
323 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T, 0, NULL);
324 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
325 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL);
326 ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO);
327
328 /*
329 * One should note that an interface must do multicast in order
330 * to support IPv6.
331 */
332 ifp = &sc->sc_ec.ec_if;
333 strcpy(ifp->if_xname, device_xname(self));
334 ifp->if_softc = sc;
335 int tmp = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
336 ifp->if_flags = tmp;
337 ifp->if_ioctl = tap_ioctl;
338 ifp->if_start = tap_start;
339 ifp->if_stop = tap_stop;
340 ifp->if_init = tap_init;
341 IFQ_SET_READY(&ifp->if_snd);
342
343 sc->sc_ec.ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
344
345 /* Those steps are mandatory for an Ethernet driver. */
346 if_initialize(ifp);
347 ether_ifattach(ifp, enaddr);
348 if_register(ifp);
349
350 #if defined(COMPAT_40) || defined(MODULAR)
351 /*
352 * Add a sysctl node for that interface.
353 *
354 * The pointer transmitted is not a string, but instead a pointer to
355 * the softc structure, which we can use to build the string value on
356 * the fly in the helper function of the node. See the comments for
357 * tap_sysctl_handler for details.
358 *
359 * Usually sysctl_createv is called with CTL_CREATE as the before-last
360 * component. However, we can allocate a number ourselves, as we are
361 * the only consumer of the net.link.<iface> node. In this case, the
362 * unit number is conveniently used to number the node. CTL_CREATE
363 * would just work, too.
364 */
365 if ((error = sysctl_createv(NULL, 0, NULL,
366 &node, CTLFLAG_READWRITE,
367 CTLTYPE_STRING, device_xname(self), NULL,
368 tap_sysctl_handler, 0, (void *)sc, 18,
369 CTL_NET, AF_LINK, tap_node, device_unit(sc->sc_dev),
370 CTL_EOL)) != 0)
371 aprint_error_dev(self, "sysctl_createv returned %d, ignoring\n",
372 error);
373 #endif
374 }
375
376 /*
377 * When detaching, we do the inverse of what is done in the attach
378 * routine, in reversed order.
379 */
380 static int
tap_detach(device_t self,int flags)381 tap_detach(device_t self, int flags)
382 {
383 struct tap_softc *sc = device_private(self);
384 struct ifnet *ifp = &sc->sc_ec.ec_if;
385 #if defined(COMPAT_40) || defined(MODULAR)
386 int error;
387 #endif
388 int s;
389
390 sc->sc_flags |= TAP_GOING;
391 s = splnet();
392 tap_stop(ifp, 1);
393 if_down(ifp);
394 splx(s);
395
396 if (sc->sc_sih != NULL) {
397 softint_disestablish(sc->sc_sih);
398 sc->sc_sih = NULL;
399 }
400
401 #if defined(COMPAT_40) || defined(MODULAR)
402 /*
403 * Destroying a single leaf is a very straightforward operation using
404 * sysctl_destroyv. One should be sure to always end the path with
405 * CTL_EOL.
406 */
407 if ((error = sysctl_destroyv(NULL, CTL_NET, AF_LINK, tap_node,
408 device_unit(sc->sc_dev), CTL_EOL)) != 0)
409 aprint_error_dev(self,
410 "sysctl_destroyv returned %d, ignoring\n", error);
411 #endif
412 ether_ifdetach(ifp);
413 if_detach(ifp);
414 ifmedia_delete_instance(&sc->sc_im, IFM_INST_ANY);
415 seldestroy(&sc->sc_rsel);
416 mutex_destroy(&sc->sc_rdlock);
417 mutex_destroy(&sc->sc_kqlock);
418
419 pmf_device_deregister(self);
420
421 return (0);
422 }
423
424 /*
425 * This function is called by the ifmedia layer to notify the driver
426 * that the user requested a media change. A real driver would
427 * reconfigure the hardware.
428 */
429 static int
tap_mediachange(struct ifnet * ifp)430 tap_mediachange(struct ifnet *ifp)
431 {
432 return (0);
433 }
434
435 /*
436 * Here the user asks for the currently used media.
437 */
438 static void
tap_mediastatus(struct ifnet * ifp,struct ifmediareq * imr)439 tap_mediastatus(struct ifnet *ifp, struct ifmediareq *imr)
440 {
441 struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
442 imr->ifm_active = sc->sc_im.ifm_cur->ifm_media;
443 }
444
445 /*
446 * This is the function where we SEND packets.
447 *
448 * There is no 'receive' equivalent. A typical driver will get
449 * interrupts from the hardware, and from there will inject new packets
450 * into the network stack.
451 *
452 * Once handled, a packet must be freed. A real driver might not be able
453 * to fit all the pending packets into the hardware, and is allowed to
454 * return before having sent all the packets. It should then use the
455 * if_flags flag IFF_OACTIVE to notify the upper layer.
456 *
457 * There are also other flags one should check, such as IFF_PAUSE.
458 *
459 * It is our duty to make packets available to BPF listeners.
460 *
461 * You should be aware that this function is called by the Ethernet layer
462 * at splnet().
463 *
464 * When the device is opened, we have to pass the packet(s) to the
465 * userland. For that we stay in OACTIVE mode while the userland gets
466 * the packets, and we send a signal to the processes waiting to read.
467 *
468 * wakeup(sc) is the counterpart to the tsleep call in
469 * tap_dev_read, while selnotify() is used for kevent(2) and
470 * poll(2) (which includes select(2)) listeners.
471 */
472 static void
tap_start(struct ifnet * ifp)473 tap_start(struct ifnet *ifp)
474 {
475 struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
476 struct mbuf *m0;
477
478 if ((sc->sc_flags & TAP_INUSE) == 0) {
479 /* Simply drop packets */
480 for(;;) {
481 IFQ_DEQUEUE(&ifp->if_snd, m0);
482 if (m0 == NULL)
483 return;
484
485 ifp->if_opackets++;
486 bpf_mtap(ifp, m0);
487
488 m_freem(m0);
489 }
490 } else if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
491 ifp->if_flags |= IFF_OACTIVE;
492 wakeup(sc);
493 selnotify(&sc->sc_rsel, 0, 1);
494 if (sc->sc_flags & TAP_ASYNCIO)
495 softint_schedule(sc->sc_sih);
496 }
497 }
498
499 static void
tap_softintr(void * cookie)500 tap_softintr(void *cookie)
501 {
502 struct tap_softc *sc;
503 struct ifnet *ifp;
504 int a, b;
505
506 sc = cookie;
507
508 if (sc->sc_flags & TAP_ASYNCIO) {
509 ifp = &sc->sc_ec.ec_if;
510 if (ifp->if_flags & IFF_RUNNING) {
511 a = POLL_IN;
512 b = POLLIN|POLLRDNORM;
513 } else {
514 a = POLL_HUP;
515 b = 0;
516 }
517 fownsignal(sc->sc_pgid, SIGIO, a, b, NULL);
518 }
519 }
520
521 /*
522 * A typical driver will only contain the following handlers for
523 * ioctl calls, except SIOCSIFPHYADDR.
524 * The latter is a hack I used to set the Ethernet address of the
525 * faked device.
526 *
527 * Note that both ifmedia_ioctl() and ether_ioctl() have to be
528 * called under splnet().
529 */
530 static int
tap_ioctl(struct ifnet * ifp,u_long cmd,void * data)531 tap_ioctl(struct ifnet *ifp, u_long cmd, void *data)
532 {
533 struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
534 struct ifreq *ifr = (struct ifreq *)data;
535 int s, error;
536
537 s = splnet();
538
539 switch (cmd) {
540 #ifdef OSIOCSIFMEDIA
541 case OSIOCSIFMEDIA:
542 #endif
543 case SIOCSIFMEDIA:
544 case SIOCGIFMEDIA:
545 error = ifmedia_ioctl(ifp, ifr, &sc->sc_im, cmd);
546 break;
547 #if defined(COMPAT_40) || defined(MODULAR)
548 case SIOCSIFPHYADDR:
549 error = tap_lifaddr(ifp, cmd, (struct ifaliasreq *)data);
550 break;
551 #endif
552 default:
553 error = ether_ioctl(ifp, cmd, data);
554 if (error == ENETRESET)
555 error = 0;
556 break;
557 }
558
559 splx(s);
560
561 return (error);
562 }
563
564 #if defined(COMPAT_40) || defined(MODULAR)
565 /*
566 * Helper function to set Ethernet address. This has been replaced by
567 * the generic SIOCALIFADDR ioctl on a PF_LINK socket.
568 */
569 static int
tap_lifaddr(struct ifnet * ifp,u_long cmd,struct ifaliasreq * ifra)570 tap_lifaddr(struct ifnet *ifp, u_long cmd, struct ifaliasreq *ifra)
571 {
572 const struct sockaddr *sa = &ifra->ifra_addr;
573
574 if (sa->sa_family != AF_LINK)
575 return (EINVAL);
576
577 if_set_sadl(ifp, sa->sa_data, ETHER_ADDR_LEN, false);
578
579 return (0);
580 }
581 #endif
582
583 /*
584 * _init() would typically be called when an interface goes up,
585 * meaning it should configure itself into the state in which it
586 * can send packets.
587 */
588 static int
tap_init(struct ifnet * ifp)589 tap_init(struct ifnet *ifp)
590 {
591 ifp->if_flags |= IFF_RUNNING;
592
593 tap_start(ifp);
594
595 return (0);
596 }
597
598 /*
599 * _stop() is called when an interface goes down. It is our
600 * responsability to validate that state by clearing the
601 * IFF_RUNNING flag.
602 *
603 * We have to wake up all the sleeping processes to have the pending
604 * read requests cancelled.
605 */
606 static void
tap_stop(struct ifnet * ifp,int disable)607 tap_stop(struct ifnet *ifp, int disable)
608 {
609 struct tap_softc *sc = (struct tap_softc *)ifp->if_softc;
610
611 ifp->if_flags &= ~IFF_RUNNING;
612 wakeup(sc);
613 selnotify(&sc->sc_rsel, 0, 1);
614 if (sc->sc_flags & TAP_ASYNCIO)
615 softint_schedule(sc->sc_sih);
616 }
617
618 /*
619 * The 'create' command of ifconfig can be used to create
620 * any numbered instance of a given device. Thus we have to
621 * make sure we have enough room in cd_devs to create the
622 * user-specified instance. config_attach_pseudo will do this
623 * for us.
624 */
625 static int
tap_clone_create(struct if_clone * ifc,int unit)626 tap_clone_create(struct if_clone *ifc, int unit)
627 {
628 if (tap_clone_creator(unit) == NULL) {
629 aprint_error("%s%d: unable to attach an instance\n",
630 tap_cd.cd_name, unit);
631 return (ENXIO);
632 }
633
634 return (0);
635 }
636
637 /*
638 * tap(4) can be cloned by two ways:
639 * using 'ifconfig tap0 create', which will use the network
640 * interface cloning API, and call tap_clone_create above.
641 * opening the cloning device node, whose minor number is TAP_CLONER.
642 * See below for an explanation on how this part work.
643 */
644 static struct tap_softc *
tap_clone_creator(int unit)645 tap_clone_creator(int unit)
646 {
647 struct cfdata *cf;
648
649 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
650 cf->cf_name = tap_cd.cd_name;
651 cf->cf_atname = tap_ca.ca_name;
652 if (unit == -1) {
653 /* let autoconf find the first free one */
654 cf->cf_unit = 0;
655 cf->cf_fstate = FSTATE_STAR;
656 } else {
657 cf->cf_unit = unit;
658 cf->cf_fstate = FSTATE_NOTFOUND;
659 }
660
661 return device_private(config_attach_pseudo(cf));
662 }
663
664 /*
665 * The clean design of if_clone and autoconf(9) makes that part
666 * really straightforward. The second argument of config_detach
667 * means neither QUIET nor FORCED.
668 */
669 static int
tap_clone_destroy(struct ifnet * ifp)670 tap_clone_destroy(struct ifnet *ifp)
671 {
672 struct tap_softc *sc = ifp->if_softc;
673
674 return tap_clone_destroyer(sc->sc_dev);
675 }
676
677 int
tap_clone_destroyer(device_t dev)678 tap_clone_destroyer(device_t dev)
679 {
680 cfdata_t cf = device_cfdata(dev);
681 int error;
682
683 if ((error = config_detach(dev, 0)) != 0)
684 aprint_error_dev(dev, "unable to detach instance\n");
685 free(cf, M_DEVBUF);
686
687 return (error);
688 }
689
690 /*
691 * tap(4) is a bit of an hybrid device. It can be used in two different
692 * ways:
693 * 1. ifconfig tapN create, then use /dev/tapN to read/write off it.
694 * 2. open /dev/tap, get a new interface created and read/write off it.
695 * That interface is destroyed when the process that had it created exits.
696 *
697 * The first way is managed by the cdevsw structure, and you access interfaces
698 * through a (major, minor) mapping: tap4 is obtained by the minor number
699 * 4. The entry points for the cdevsw interface are prefixed by tap_cdev_.
700 *
701 * The second way is the so-called "cloning" device. It's a special minor
702 * number (chosen as the maximal number, to allow as much tap devices as
703 * possible). The user first opens the cloner (e.g., /dev/tap), and that
704 * call ends in tap_cdev_open. The actual place where it is handled is
705 * tap_dev_cloner.
706 *
707 * An tap device cannot be opened more than once at a time, so the cdevsw
708 * part of open() does nothing but noting that the interface is being used and
709 * hence ready to actually handle packets.
710 */
711
712 static int
tap_cdev_open(dev_t dev,int flags,int fmt,struct lwp * l)713 tap_cdev_open(dev_t dev, int flags, int fmt, struct lwp *l)
714 {
715 struct tap_softc *sc;
716
717 if (minor(dev) == TAP_CLONER)
718 return tap_dev_cloner(l);
719
720 sc = device_lookup_private(&tap_cd, minor(dev));
721 if (sc == NULL)
722 return (ENXIO);
723
724 /* The device can only be opened once */
725 if (sc->sc_flags & TAP_INUSE)
726 return (EBUSY);
727 sc->sc_flags |= TAP_INUSE;
728 return (0);
729 }
730
731 /*
732 * There are several kinds of cloning devices, and the most simple is the one
733 * tap(4) uses. What it does is change the file descriptor with a new one,
734 * with its own fileops structure (which maps to the various read, write,
735 * ioctl functions). It starts allocating a new file descriptor with falloc,
736 * then actually creates the new tap devices.
737 *
738 * Once those two steps are successful, we can re-wire the existing file
739 * descriptor to its new self. This is done with fdclone(): it fills the fp
740 * structure as needed (notably f_devunit gets filled with the fifth parameter
741 * passed, the unit of the tap device which will allows us identifying the
742 * device later), and returns EMOVEFD.
743 *
744 * That magic value is interpreted by sys_open() which then replaces the
745 * current file descriptor by the new one (through a magic member of struct
746 * lwp, l_dupfd).
747 *
748 * The tap device is flagged as being busy since it otherwise could be
749 * externally accessed through the corresponding device node with the cdevsw
750 * interface.
751 */
752
753 static int
tap_dev_cloner(struct lwp * l)754 tap_dev_cloner(struct lwp *l)
755 {
756 struct tap_softc *sc;
757 file_t *fp;
758 int error, fd;
759
760 if ((error = fd_allocfile(&fp, &fd)) != 0)
761 return (error);
762
763 if ((sc = tap_clone_creator(-1)) == NULL) {
764 fd_abort(curproc, fp, fd);
765 return (ENXIO);
766 }
767
768 sc->sc_flags |= TAP_INUSE;
769
770 return fd_clone(fp, fd, FREAD|FWRITE, &tap_fileops,
771 (void *)(intptr_t)device_unit(sc->sc_dev));
772 }
773
774 /*
775 * While all other operations (read, write, ioctl, poll and kqfilter) are
776 * really the same whether we are in cdevsw or fileops mode, the close()
777 * function is slightly different in the two cases.
778 *
779 * As for the other, the core of it is shared in tap_dev_close. What
780 * it does is sufficient for the cdevsw interface, but the cloning interface
781 * needs another thing: the interface is destroyed when the processes that
782 * created it closes it.
783 */
784 static int
tap_cdev_close(dev_t dev,int flags,int fmt,struct lwp * l)785 tap_cdev_close(dev_t dev, int flags, int fmt,
786 struct lwp *l)
787 {
788 struct tap_softc *sc =
789 device_lookup_private(&tap_cd, minor(dev));
790
791 if (sc == NULL)
792 return (ENXIO);
793
794 return tap_dev_close(sc);
795 }
796
797 /*
798 * It might happen that the administrator used ifconfig to externally destroy
799 * the interface. In that case, tap_fops_close will be called while
800 * tap_detach is already happening. If we called it again from here, we
801 * would dead lock. TAP_GOING ensures that this situation doesn't happen.
802 */
803 static int
tap_fops_close(file_t * fp)804 tap_fops_close(file_t *fp)
805 {
806 int unit = fp->f_devunit;
807 struct tap_softc *sc;
808 int error;
809
810 sc = device_lookup_private(&tap_cd, unit);
811 if (sc == NULL)
812 return (ENXIO);
813
814 /* tap_dev_close currently always succeeds, but it might not
815 * always be the case. */
816 KERNEL_LOCK(1, NULL);
817 if ((error = tap_dev_close(sc)) != 0) {
818 KERNEL_UNLOCK_ONE(NULL);
819 return (error);
820 }
821
822 /* Destroy the device now that it is no longer useful,
823 * unless it's already being destroyed. */
824 if ((sc->sc_flags & TAP_GOING) != 0) {
825 KERNEL_UNLOCK_ONE(NULL);
826 return (0);
827 }
828
829 error = tap_clone_destroyer(sc->sc_dev);
830 KERNEL_UNLOCK_ONE(NULL);
831 return error;
832 }
833
834 static int
tap_dev_close(struct tap_softc * sc)835 tap_dev_close(struct tap_softc *sc)
836 {
837 struct ifnet *ifp;
838 int s;
839
840 s = splnet();
841 /* Let tap_start handle packets again */
842 ifp = &sc->sc_ec.ec_if;
843 ifp->if_flags &= ~IFF_OACTIVE;
844
845 /* Purge output queue */
846 if (!(IFQ_IS_EMPTY(&ifp->if_snd))) {
847 struct mbuf *m;
848
849 for (;;) {
850 IFQ_DEQUEUE(&ifp->if_snd, m);
851 if (m == NULL)
852 break;
853
854 ifp->if_opackets++;
855 bpf_mtap(ifp, m);
856 m_freem(m);
857 }
858 }
859 splx(s);
860
861 if (sc->sc_sih != NULL) {
862 softint_disestablish(sc->sc_sih);
863 sc->sc_sih = NULL;
864 }
865 sc->sc_flags &= ~(TAP_INUSE | TAP_ASYNCIO);
866
867 return (0);
868 }
869
870 static int
tap_cdev_read(dev_t dev,struct uio * uio,int flags)871 tap_cdev_read(dev_t dev, struct uio *uio, int flags)
872 {
873 return tap_dev_read(minor(dev), uio, flags);
874 }
875
876 static int
tap_fops_read(file_t * fp,off_t * offp,struct uio * uio,kauth_cred_t cred,int flags)877 tap_fops_read(file_t *fp, off_t *offp, struct uio *uio,
878 kauth_cred_t cred, int flags)
879 {
880 int error;
881
882 KERNEL_LOCK(1, NULL);
883 error = tap_dev_read(fp->f_devunit, uio, flags);
884 KERNEL_UNLOCK_ONE(NULL);
885 return error;
886 }
887
888 static int
tap_dev_read(int unit,struct uio * uio,int flags)889 tap_dev_read(int unit, struct uio *uio, int flags)
890 {
891 struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
892 struct ifnet *ifp;
893 struct mbuf *m, *n;
894 int error = 0, s;
895
896 if (sc == NULL)
897 return (ENXIO);
898
899 getnanotime(&sc->sc_atime);
900
901 ifp = &sc->sc_ec.ec_if;
902 if ((ifp->if_flags & IFF_UP) == 0)
903 return (EHOSTDOWN);
904
905 /*
906 * In the TAP_NBIO case, we have to make sure we won't be sleeping
907 */
908 if ((sc->sc_flags & TAP_NBIO) != 0) {
909 if (!mutex_tryenter(&sc->sc_rdlock))
910 return (EWOULDBLOCK);
911 } else {
912 mutex_enter(&sc->sc_rdlock);
913 }
914
915 s = splnet();
916 if (IFQ_IS_EMPTY(&ifp->if_snd)) {
917 ifp->if_flags &= ~IFF_OACTIVE;
918 /*
919 * We must release the lock before sleeping, and re-acquire it
920 * after.
921 */
922 mutex_exit(&sc->sc_rdlock);
923 if (sc->sc_flags & TAP_NBIO)
924 error = EWOULDBLOCK;
925 else
926 error = tsleep(sc, PSOCK|PCATCH, "tap", 0);
927 splx(s);
928
929 if (error != 0)
930 return (error);
931 /* The device might have been downed */
932 if ((ifp->if_flags & IFF_UP) == 0)
933 return (EHOSTDOWN);
934 if ((sc->sc_flags & TAP_NBIO)) {
935 if (!mutex_tryenter(&sc->sc_rdlock))
936 return (EWOULDBLOCK);
937 } else {
938 mutex_enter(&sc->sc_rdlock);
939 }
940 s = splnet();
941 }
942
943 IFQ_DEQUEUE(&ifp->if_snd, m);
944 ifp->if_flags &= ~IFF_OACTIVE;
945 splx(s);
946 if (m == NULL) {
947 error = 0;
948 goto out;
949 }
950
951 ifp->if_opackets++;
952 bpf_mtap(ifp, m);
953
954 /*
955 * One read is one packet.
956 */
957 do {
958 error = uiomove(mtod(m, void *),
959 min(m->m_len, uio->uio_resid), uio);
960 MFREE(m, n);
961 m = n;
962 } while (m != NULL && uio->uio_resid > 0 && error == 0);
963
964 if (m != NULL)
965 m_freem(m);
966
967 out:
968 mutex_exit(&sc->sc_rdlock);
969 return (error);
970 }
971
972 static int
tap_fops_stat(file_t * fp,struct stat * st)973 tap_fops_stat(file_t *fp, struct stat *st)
974 {
975 int error = 0;
976 struct tap_softc *sc;
977 int unit = fp->f_devunit;
978
979 (void)memset(st, 0, sizeof(*st));
980
981 KERNEL_LOCK(1, NULL);
982 sc = device_lookup_private(&tap_cd, unit);
983 if (sc == NULL) {
984 error = ENXIO;
985 goto out;
986 }
987
988 st->st_dev = makedev(cdevsw_lookup_major(&tap_cdevsw), unit);
989 st->st_atimespec = sc->sc_atime;
990 st->st_mtimespec = sc->sc_mtime;
991 st->st_ctimespec = st->st_birthtimespec = sc->sc_btime;
992 st->st_uid = kauth_cred_geteuid(fp->f_cred);
993 st->st_gid = kauth_cred_getegid(fp->f_cred);
994 out:
995 KERNEL_UNLOCK_ONE(NULL);
996 return error;
997 }
998
999 static int
tap_cdev_write(dev_t dev,struct uio * uio,int flags)1000 tap_cdev_write(dev_t dev, struct uio *uio, int flags)
1001 {
1002 return tap_dev_write(minor(dev), uio, flags);
1003 }
1004
1005 static int
tap_fops_write(file_t * fp,off_t * offp,struct uio * uio,kauth_cred_t cred,int flags)1006 tap_fops_write(file_t *fp, off_t *offp, struct uio *uio,
1007 kauth_cred_t cred, int flags)
1008 {
1009 int error;
1010
1011 KERNEL_LOCK(1, NULL);
1012 error = tap_dev_write(fp->f_devunit, uio, flags);
1013 KERNEL_UNLOCK_ONE(NULL);
1014 return error;
1015 }
1016
1017 static int
tap_dev_write(int unit,struct uio * uio,int flags)1018 tap_dev_write(int unit, struct uio *uio, int flags)
1019 {
1020 struct tap_softc *sc =
1021 device_lookup_private(&tap_cd, unit);
1022 struct ifnet *ifp;
1023 struct mbuf *m, **mp;
1024 int error = 0;
1025 int s;
1026
1027 if (sc == NULL)
1028 return (ENXIO);
1029
1030 getnanotime(&sc->sc_mtime);
1031 ifp = &sc->sc_ec.ec_if;
1032
1033 /* One write, one packet, that's the rule */
1034 MGETHDR(m, M_DONTWAIT, MT_DATA);
1035 if (m == NULL) {
1036 ifp->if_ierrors++;
1037 return (ENOBUFS);
1038 }
1039 m->m_pkthdr.len = uio->uio_resid;
1040
1041 mp = &m;
1042 while (error == 0 && uio->uio_resid > 0) {
1043 if (*mp != m) {
1044 MGET(*mp, M_DONTWAIT, MT_DATA);
1045 if (*mp == NULL) {
1046 error = ENOBUFS;
1047 break;
1048 }
1049 }
1050 (*mp)->m_len = min(MHLEN, uio->uio_resid);
1051 error = uiomove(mtod(*mp, void *), (*mp)->m_len, uio);
1052 mp = &(*mp)->m_next;
1053 }
1054 if (error) {
1055 ifp->if_ierrors++;
1056 m_freem(m);
1057 return (error);
1058 }
1059
1060 ifp->if_ipackets++;
1061 m_set_rcvif(m, ifp);
1062
1063 bpf_mtap(ifp, m);
1064 s = splnet();
1065 if_input(ifp, m);
1066 splx(s);
1067
1068 return (0);
1069 }
1070
1071 static int
tap_cdev_ioctl(dev_t dev,u_long cmd,void * data,int flags,struct lwp * l)1072 tap_cdev_ioctl(dev_t dev, u_long cmd, void *data, int flags,
1073 struct lwp *l)
1074 {
1075 return tap_dev_ioctl(minor(dev), cmd, data, l);
1076 }
1077
1078 static int
tap_fops_ioctl(file_t * fp,u_long cmd,void * data)1079 tap_fops_ioctl(file_t *fp, u_long cmd, void *data)
1080 {
1081 return tap_dev_ioctl(fp->f_devunit, cmd, data, curlwp);
1082 }
1083
1084 static int
tap_dev_ioctl(int unit,u_long cmd,void * data,struct lwp * l)1085 tap_dev_ioctl(int unit, u_long cmd, void *data, struct lwp *l)
1086 {
1087 struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
1088
1089 if (sc == NULL)
1090 return ENXIO;
1091
1092 switch (cmd) {
1093 case FIONREAD:
1094 {
1095 struct ifnet *ifp = &sc->sc_ec.ec_if;
1096 struct mbuf *m;
1097 int s;
1098
1099 s = splnet();
1100 IFQ_POLL(&ifp->if_snd, m);
1101
1102 if (m == NULL)
1103 *(int *)data = 0;
1104 else
1105 *(int *)data = m->m_pkthdr.len;
1106 splx(s);
1107 return 0;
1108 }
1109 case TIOCSPGRP:
1110 case FIOSETOWN:
1111 return fsetown(&sc->sc_pgid, cmd, data);
1112 case TIOCGPGRP:
1113 case FIOGETOWN:
1114 return fgetown(sc->sc_pgid, cmd, data);
1115 case FIOASYNC:
1116 if (*(int *)data) {
1117 if (sc->sc_sih == NULL) {
1118 sc->sc_sih = softint_establish(SOFTINT_CLOCK,
1119 tap_softintr, sc);
1120 if (sc->sc_sih == NULL)
1121 return EBUSY; /* XXX */
1122 }
1123 sc->sc_flags |= TAP_ASYNCIO;
1124 } else {
1125 sc->sc_flags &= ~TAP_ASYNCIO;
1126 if (sc->sc_sih != NULL) {
1127 softint_disestablish(sc->sc_sih);
1128 sc->sc_sih = NULL;
1129 }
1130 }
1131 return 0;
1132 case FIONBIO:
1133 if (*(int *)data)
1134 sc->sc_flags |= TAP_NBIO;
1135 else
1136 sc->sc_flags &= ~TAP_NBIO;
1137 return 0;
1138 #ifdef OTAPGIFNAME
1139 case OTAPGIFNAME:
1140 #endif
1141 case TAPGIFNAME:
1142 {
1143 struct ifreq *ifr = (struct ifreq *)data;
1144 struct ifnet *ifp = &sc->sc_ec.ec_if;
1145
1146 strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
1147 return 0;
1148 }
1149 default:
1150 return ENOTTY;
1151 }
1152 }
1153
1154 static int
tap_cdev_poll(dev_t dev,int events,struct lwp * l)1155 tap_cdev_poll(dev_t dev, int events, struct lwp *l)
1156 {
1157 return tap_dev_poll(minor(dev), events, l);
1158 }
1159
1160 static int
tap_fops_poll(file_t * fp,int events)1161 tap_fops_poll(file_t *fp, int events)
1162 {
1163 return tap_dev_poll(fp->f_devunit, events, curlwp);
1164 }
1165
1166 static int
tap_dev_poll(int unit,int events,struct lwp * l)1167 tap_dev_poll(int unit, int events, struct lwp *l)
1168 {
1169 struct tap_softc *sc =
1170 device_lookup_private(&tap_cd, unit);
1171 int revents = 0;
1172
1173 if (sc == NULL)
1174 return POLLERR;
1175
1176 if (events & (POLLIN|POLLRDNORM)) {
1177 struct ifnet *ifp = &sc->sc_ec.ec_if;
1178 struct mbuf *m;
1179 int s;
1180
1181 s = splnet();
1182 IFQ_POLL(&ifp->if_snd, m);
1183
1184 if (m != NULL)
1185 revents |= events & (POLLIN|POLLRDNORM);
1186 else {
1187 mutex_spin_enter(&sc->sc_kqlock);
1188 selrecord(l, &sc->sc_rsel);
1189 mutex_spin_exit(&sc->sc_kqlock);
1190 }
1191 splx(s);
1192 }
1193 revents |= events & (POLLOUT|POLLWRNORM);
1194
1195 return (revents);
1196 }
1197
1198 static struct filterops tap_read_filterops = { 1, NULL, tap_kqdetach,
1199 tap_kqread };
1200 static struct filterops tap_seltrue_filterops = { 1, NULL, tap_kqdetach,
1201 filt_seltrue };
1202
1203 static int
tap_cdev_kqfilter(dev_t dev,struct knote * kn)1204 tap_cdev_kqfilter(dev_t dev, struct knote *kn)
1205 {
1206 return tap_dev_kqfilter(minor(dev), kn);
1207 }
1208
1209 static int
tap_fops_kqfilter(file_t * fp,struct knote * kn)1210 tap_fops_kqfilter(file_t *fp, struct knote *kn)
1211 {
1212 return tap_dev_kqfilter(fp->f_devunit, kn);
1213 }
1214
1215 static int
tap_dev_kqfilter(int unit,struct knote * kn)1216 tap_dev_kqfilter(int unit, struct knote *kn)
1217 {
1218 struct tap_softc *sc =
1219 device_lookup_private(&tap_cd, unit);
1220
1221 if (sc == NULL)
1222 return (ENXIO);
1223
1224 KERNEL_LOCK(1, NULL);
1225 switch(kn->kn_filter) {
1226 case EVFILT_READ:
1227 kn->kn_fop = &tap_read_filterops;
1228 break;
1229 case EVFILT_WRITE:
1230 kn->kn_fop = &tap_seltrue_filterops;
1231 break;
1232 default:
1233 KERNEL_UNLOCK_ONE(NULL);
1234 return (EINVAL);
1235 }
1236
1237 kn->kn_hook = sc;
1238 mutex_spin_enter(&sc->sc_kqlock);
1239 SLIST_INSERT_HEAD(&sc->sc_rsel.sel_klist, kn, kn_selnext);
1240 mutex_spin_exit(&sc->sc_kqlock);
1241 KERNEL_UNLOCK_ONE(NULL);
1242 return (0);
1243 }
1244
1245 static void
tap_kqdetach(struct knote * kn)1246 tap_kqdetach(struct knote *kn)
1247 {
1248 struct tap_softc *sc = (struct tap_softc *)kn->kn_hook;
1249
1250 KERNEL_LOCK(1, NULL);
1251 mutex_spin_enter(&sc->sc_kqlock);
1252 SLIST_REMOVE(&sc->sc_rsel.sel_klist, kn, knote, kn_selnext);
1253 mutex_spin_exit(&sc->sc_kqlock);
1254 KERNEL_UNLOCK_ONE(NULL);
1255 }
1256
1257 static int
tap_kqread(struct knote * kn,long hint)1258 tap_kqread(struct knote *kn, long hint)
1259 {
1260 struct tap_softc *sc = (struct tap_softc *)kn->kn_hook;
1261 struct ifnet *ifp = &sc->sc_ec.ec_if;
1262 struct mbuf *m;
1263 int s, rv;
1264
1265 KERNEL_LOCK(1, NULL);
1266 s = splnet();
1267 IFQ_POLL(&ifp->if_snd, m);
1268
1269 if (m == NULL)
1270 kn->kn_data = 0;
1271 else
1272 kn->kn_data = m->m_pkthdr.len;
1273 splx(s);
1274 rv = (kn->kn_data != 0 ? 1 : 0);
1275 KERNEL_UNLOCK_ONE(NULL);
1276 return rv;
1277 }
1278
1279 #if defined(COMPAT_40) || defined(MODULAR)
1280 /*
1281 * sysctl management routines
1282 * You can set the address of an interface through:
1283 * net.link.tap.tap<number>
1284 *
1285 * Note the consistent use of tap_log in order to use
1286 * sysctl_teardown at unload time.
1287 *
1288 * In the kernel you will find a lot of SYSCTL_SETUP blocks. Those
1289 * blocks register a function in a special section of the kernel
1290 * (called a link set) which is used at init_sysctl() time to cycle
1291 * through all those functions to create the kernel's sysctl tree.
1292 *
1293 * It is not possible to use link sets in a module, so the
1294 * easiest is to simply call our own setup routine at load time.
1295 *
1296 * In the SYSCTL_SETUP blocks you find in the kernel, nodes have the
1297 * CTLFLAG_PERMANENT flag, meaning they cannot be removed. Once the
1298 * whole kernel sysctl tree is built, it is not possible to add any
1299 * permanent node.
1300 *
1301 * It should be noted that we're not saving the sysctlnode pointer
1302 * we are returned when creating the "tap" node. That structure
1303 * cannot be trusted once out of the calling function, as it might
1304 * get reused. So we just save the MIB number, and always give the
1305 * full path starting from the root for later calls to sysctl_createv
1306 * and sysctl_destroyv.
1307 */
1308 SYSCTL_SETUP(sysctl_tap_setup, "sysctl net.link.tap subtree setup")
1309 {
1310 const struct sysctlnode *node;
1311 int error = 0;
1312
1313 if ((error = sysctl_createv(clog, 0, NULL, NULL,
1314 CTLFLAG_PERMANENT,
1315 CTLTYPE_NODE, "link", NULL,
1316 NULL, 0, NULL, 0,
1317 CTL_NET, AF_LINK, CTL_EOL)) != 0)
1318 return;
1319
1320 /*
1321 * The first four parameters of sysctl_createv are for management.
1322 *
1323 * The four that follows, here starting with a '0' for the flags,
1324 * describe the node.
1325 *
1326 * The next series of four set its value, through various possible
1327 * means.
1328 *
1329 * Last but not least, the path to the node is described. That path
1330 * is relative to the given root (third argument). Here we're
1331 * starting from the root.
1332 */
1333 if ((error = sysctl_createv(clog, 0, NULL, &node,
1334 CTLFLAG_PERMANENT,
1335 CTLTYPE_NODE, "tap", NULL,
1336 NULL, 0, NULL, 0,
1337 CTL_NET, AF_LINK, CTL_CREATE, CTL_EOL)) != 0)
1338 return;
1339 tap_node = node->sysctl_num;
1340 }
1341
1342 /*
1343 * The helper functions make Andrew Brown's interface really
1344 * shine. It makes possible to create value on the fly whether
1345 * the sysctl value is read or written.
1346 *
1347 * As shown as an example in the man page, the first step is to
1348 * create a copy of the node to have sysctl_lookup work on it.
1349 *
1350 * Here, we have more work to do than just a copy, since we have
1351 * to create the string. The first step is to collect the actual
1352 * value of the node, which is a convenient pointer to the softc
1353 * of the interface. From there we create the string and use it
1354 * as the value, but only for the *copy* of the node.
1355 *
1356 * Then we let sysctl_lookup do the magic, which consists in
1357 * setting oldp and newp as required by the operation. When the
1358 * value is read, that means that the string will be copied to
1359 * the user, and when it is written, the new value will be copied
1360 * over in the addr array.
1361 *
1362 * If newp is NULL, the user was reading the value, so we don't
1363 * have anything else to do. If a new value was written, we
1364 * have to check it.
1365 *
1366 * If it is incorrect, we can return an error and leave 'node' as
1367 * it is: since it is a copy of the actual node, the change will
1368 * be forgotten.
1369 *
1370 * Upon a correct input, we commit the change to the ifnet
1371 * structure of our interface.
1372 */
1373 static int
tap_sysctl_handler(SYSCTLFN_ARGS)1374 tap_sysctl_handler(SYSCTLFN_ARGS)
1375 {
1376 struct sysctlnode node;
1377 struct tap_softc *sc;
1378 struct ifnet *ifp;
1379 int error;
1380 size_t len;
1381 char addr[3 * ETHER_ADDR_LEN];
1382 uint8_t enaddr[ETHER_ADDR_LEN];
1383
1384 node = *rnode;
1385 sc = node.sysctl_data;
1386 ifp = &sc->sc_ec.ec_if;
1387 (void)ether_snprintf(addr, sizeof(addr), CLLADDR(ifp->if_sadl));
1388 node.sysctl_data = addr;
1389 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1390 if (error || newp == NULL)
1391 return (error);
1392
1393 len = strlen(addr);
1394 if (len < 11 || len > 17)
1395 return (EINVAL);
1396
1397 /* Commit change */
1398 if (ether_aton_r(enaddr, sizeof(enaddr), addr) != 0)
1399 return (EINVAL);
1400 if_set_sadl(ifp, enaddr, ETHER_ADDR_LEN, false);
1401 return (error);
1402 }
1403 #endif
1404