1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/proc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
44
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
47
48 #include <net/ethernet.h>
49 #include <net/if.h>
50 #include <net/bpf.h>
51 #include <net/if_arp.h>
52 #include <net/if_media.h>
53 #include <net/ifq_var.h>
54 #include <net/vlan/if_vlan_ether.h>
55
56 #include <netinet/in_var.h>
57
58 #include <sys/stat.h>
59 #include <net/tap/if_tap.h>
60 #include <err.h>
61 #include <errno.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <fcntl.h>
66
67 #define VKE_DEVNAME "vke"
68
69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
70
71 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1))
72
73 #define VKE_COTD_RUN 0
74 #define VKE_COTD_EXIT 1
75 #define VKE_COTD_DEAD 2
76
77 struct vke_fifo {
78 struct mbuf **array;
79 int rindex;
80 int windex;
81 };
82 typedef struct vke_fifo *fifo_t;
83
84 /* Default value for a long time */
85 #define VKE_DEFAULT_RINGSIZE 256
86 static int vke_max_ringsize = 0;
87 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize);
88
89 #define LOW_POW_2(n) (1 << (fls(n) - 1))
90
91 struct vke_softc {
92 struct arpcom arpcom;
93 int sc_fd;
94 int sc_unit;
95
96 cothread_t cotd_tx;
97 cothread_t cotd_rx;
98
99 int cotd_tx_exit;
100 int cotd_rx_exit;
101
102 void *sc_txbuf;
103 int sc_txbuf_len;
104
105 fifo_t sc_txfifo;
106 fifo_t sc_txfifo_done;
107 fifo_t sc_rxfifo;
108
109 int sc_ringsize;
110
111 long cotd_ipackets;
112 long cotd_oerrors;
113 long cotd_opackets;
114
115 struct sysctl_ctx_list sc_sysctl_ctx;
116 struct sysctl_oid *sc_sysctl_tree;
117
118 int sc_tap_unit; /* unit of backend tap(4) */
119 in_addr_t sc_addr; /* address */
120 in_addr_t sc_mask; /* netmask */
121
122 struct ifmedia sc_media;
123 };
124
125 static void vke_start(struct ifnet *, struct ifaltq_subque *);
126 static void vke_init(void *);
127 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
128
129 static int vke_media_change(struct ifnet *);
130 static void vke_media_status(struct ifnet *, struct ifmediareq *);
131
132 static int vke_attach(const struct vknetif_info *, int);
133 static int vke_stop(struct vke_softc *);
134 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
135 static void vke_tx_intr(cothread_t cotd);
136 static void vke_tx_thread(cothread_t cotd);
137 static void vke_rx_intr(cothread_t cotd);
138 static void vke_rx_thread(cothread_t cotd);
139
140 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
141 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
142
143 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
144 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
145
146 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
147 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
148
149 static void
vke_sysinit(void * arg __unused)150 vke_sysinit(void *arg __unused)
151 {
152 int i, unit;
153
154 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
155
156 unit = 0;
157 for (i = 0; i < NetifNum; ++i) {
158 if (vke_attach(&NetifInfo[i], unit) == 0)
159 ++unit;
160 }
161 }
162 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
163
164 /*
165 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
166 * the cothread cannot free transmit mbufs after processing we put them on
167 * the done fifo so the kernel can free them.
168 */
169 static int
vke_txfifo_done_enqueue(struct vke_softc * sc,struct mbuf * m)170 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
171 {
172 fifo_t fifo = sc->sc_txfifo_done;
173
174 while (NETFIFOINDEX(fifo->windex + 1, sc) ==
175 NETFIFOINDEX(fifo->rindex, sc)) {
176 usleep(20000);
177 }
178 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m;
179 cpu_sfence();
180 ++fifo->windex;
181
182 return (0);
183 }
184
185 /*
186 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
187 */
188 static struct mbuf *
vke_txfifo_done_dequeue(struct vke_softc * sc,struct mbuf * nm)189 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
190 {
191 fifo_t fifo = sc->sc_txfifo_done;
192 struct mbuf *m;
193
194 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
195 return (NULL);
196
197 cpu_lfence();
198 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
199 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm;
200 ++fifo->rindex;
201
202 return (m);
203 }
204
205 /*
206 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
207 */
208 static int
vke_txfifo_enqueue(struct vke_softc * sc,struct mbuf * m)209 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
210 {
211 fifo_t fifo = sc->sc_txfifo;
212
213 if (NETFIFOINDEX(fifo->windex + 1, sc) ==
214 NETFIFOINDEX(fifo->rindex, sc)) {
215 return (-1);
216 }
217
218 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m;
219 cpu_sfence();
220 ++fifo->windex;
221
222 return (0);
223 }
224
225 /*
226 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
227 * exists.
228 */
229 static struct mbuf *
vke_txfifo_dequeue(struct vke_softc * sc)230 vke_txfifo_dequeue(struct vke_softc *sc)
231 {
232 fifo_t fifo = sc->sc_txfifo;
233 struct mbuf *m;
234
235 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
236 return (NULL);
237
238 cpu_lfence();
239 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
240 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL;
241 cpu_sfence();
242 ++fifo->rindex;
243
244 return (m);
245 }
246
247 static int
vke_txfifo_empty(struct vke_softc * sc)248 vke_txfifo_empty(struct vke_softc *sc)
249 {
250 fifo_t fifo = sc->sc_txfifo;
251
252 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
253 return (1);
254 return(0);
255 }
256
257 /*
258 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
259 * exists replacing it with newm which should point to a newly allocated
260 * mbuf.
261 */
262 static struct mbuf *
vke_rxfifo_dequeue(struct vke_softc * sc,struct mbuf * newm)263 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
264 {
265 fifo_t fifo = sc->sc_rxfifo;
266 struct mbuf *m;
267
268 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
269 return (NULL);
270
271 cpu_lfence();
272 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
273 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm;
274 cpu_sfence();
275 ++fifo->rindex;
276
277 return (m);
278 }
279
280 /*
281 * Return the next mbuf if available but do NOT remove it from the FIFO.
282 */
283 static struct mbuf *
vke_rxfifo_sniff(struct vke_softc * sc)284 vke_rxfifo_sniff(struct vke_softc *sc)
285 {
286 fifo_t fifo = sc->sc_rxfifo;
287 struct mbuf *m;
288
289 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
290 return (NULL);
291
292 cpu_lfence();
293 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
294
295 return (m);
296 }
297
298 static void
vke_init(void * xsc)299 vke_init(void *xsc)
300 {
301 struct vke_softc *sc = xsc;
302 struct ifnet *ifp = &sc->arpcom.ac_if;
303 size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *);
304 int i;
305
306 ASSERT_SERIALIZED(ifp->if_serializer);
307
308 vke_stop(sc);
309
310 ifp->if_flags |= IFF_RUNNING;
311 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
312
313 /*
314 * Allocate memory for FIFO structures and mbufs.
315 */
316 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo),
317 M_DEVBUF, M_WAITOK | M_ZERO);
318 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done),
319 M_DEVBUF, M_WAITOK | M_ZERO);
320 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo),
321 M_DEVBUF, M_WAITOK | M_ZERO);
322 sc->sc_txfifo->array = kmalloc(ringsize,
323 M_DEVBUF, M_WAITOK | M_ZERO);
324 sc->sc_txfifo_done->array = kmalloc(ringsize,
325 M_DEVBUF, M_WAITOK | M_ZERO);
326 sc->sc_rxfifo->array = kmalloc(ringsize,
327 M_DEVBUF, M_WAITOK | M_ZERO);
328
329 for (i = 0; i < sc->sc_ringsize; i++) {
330 sc->sc_rxfifo->array[i] = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
331 sc->sc_txfifo->array[i] = NULL;
332 sc->sc_txfifo_done->array[i] = NULL;
333 }
334
335 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
336 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
337 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
338
339 if (sc->sc_addr != 0) {
340 in_addr_t addr, mask;
341
342 addr = sc->sc_addr;
343 mask = sc->sc_mask;
344
345 /*
346 * Make sure vkernel assigned
347 * address will not be added
348 * again.
349 */
350 sc->sc_addr = 0;
351 sc->sc_mask = 0;
352
353 vke_init_addr(ifp, addr, mask);
354 }
355
356 }
357
358 /*
359 * Called from kernel.
360 *
361 * NOTE: We can't make any kernel callbacks while holding cothread lock
362 * because the cothread lock is not governed by the kernel scheduler
363 * (so mplock, tokens, etc will not be released).
364 */
365 static void
vke_start(struct ifnet * ifp,struct ifaltq_subque * ifsq)366 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
367 {
368 struct vke_softc *sc = ifp->if_softc;
369 struct mbuf *m;
370 cothread_t cotd = sc->cotd_tx;
371 int count;
372
373 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
374 ASSERT_SERIALIZED(ifp->if_serializer);
375
376 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
377 return;
378
379 count = 0;
380 while ((m = ifsq_dequeue(ifsq)) != NULL) {
381 if (vke_txfifo_enqueue(sc, m) != -1) {
382 ETHER_BPF_MTAP(ifp, m);
383 if (count++ == VKE_CHUNK) {
384 cothread_lock(cotd, 0);
385 cothread_signal(cotd);
386 cothread_unlock(cotd, 0);
387 count = 0;
388 }
389 } else {
390 m_freem(m);
391 }
392 }
393 if (count) {
394 cothread_lock(cotd, 0);
395 cothread_signal(cotd);
396 cothread_unlock(cotd, 0);
397 }
398 }
399
400 static int
vke_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data,struct ucred * cr)401 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
402 {
403 struct vke_softc *sc = ifp->if_softc;
404 struct ifreq *ifr = (struct ifreq *)data;
405 int error = 0;
406
407 ASSERT_SERIALIZED(ifp->if_serializer);
408
409 switch (cmd) {
410 case SIOCSIFFLAGS:
411 if (ifp->if_flags & IFF_UP) {
412 if ((ifp->if_flags & IFF_RUNNING) == 0)
413 vke_init(sc);
414 } else {
415 if (ifp->if_flags & IFF_RUNNING)
416 vke_stop(sc);
417 }
418 break;
419 case SIOCGIFMEDIA:
420 case SIOCGIFXMEDIA:
421 case SIOCSIFMEDIA:
422 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
423 break;
424 case SIOCGIFSTATUS: {
425 struct ifstat *ifs = (struct ifstat *)data;
426 int len;
427
428 len = strlen(ifs->ascii);
429 if (len < sizeof(ifs->ascii)) {
430 if (sc->sc_tap_unit >= 0) {
431 ksnprintf(ifs->ascii + len,
432 sizeof(ifs->ascii) - len,
433 "\tBacked by tap%d\n",
434 sc->sc_tap_unit);
435 }
436 }
437 break;
438 }
439 case SIOCSIFADDR:
440 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
441 /*
442 * If we are explicitly requested to change address,
443 * we should invalidate address/netmask passed in
444 * from vkernel command line.
445 */
446 sc->sc_addr = 0;
447 sc->sc_mask = 0;
448 }
449 /* FALL THROUGH */
450 default:
451 error = ether_ioctl(ifp, cmd, data);
452 break;
453 }
454 return error;
455 }
456
457 static int
vke_stop(struct vke_softc * sc)458 vke_stop(struct vke_softc *sc)
459 {
460 struct ifnet *ifp = &sc->arpcom.ac_if;
461 int i;
462
463 ASSERT_SERIALIZED(ifp->if_serializer);
464
465 ifp->if_flags &= ~IFF_RUNNING;
466 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
467
468 if (sc) {
469 if (sc->cotd_tx) {
470 cothread_lock(sc->cotd_tx, 0);
471 if (sc->cotd_tx_exit == VKE_COTD_RUN)
472 sc->cotd_tx_exit = VKE_COTD_EXIT;
473 cothread_signal(sc->cotd_tx);
474 cothread_unlock(sc->cotd_tx, 0);
475 cothread_delete(&sc->cotd_tx);
476 }
477 if (sc->cotd_rx) {
478 cothread_lock(sc->cotd_rx, 0);
479 if (sc->cotd_rx_exit == VKE_COTD_RUN)
480 sc->cotd_rx_exit = VKE_COTD_EXIT;
481 cothread_signal(sc->cotd_rx);
482 cothread_unlock(sc->cotd_rx, 0);
483 cothread_delete(&sc->cotd_rx);
484 }
485
486 for (i = 0; i < sc->sc_ringsize; i++) {
487 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
488 m_freem(sc->sc_rxfifo->array[i]);
489 sc->sc_rxfifo->array[i] = NULL;
490 }
491 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
492 m_freem(sc->sc_txfifo->array[i]);
493 sc->sc_txfifo->array[i] = NULL;
494 }
495 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
496 m_freem(sc->sc_txfifo_done->array[i]);
497 sc->sc_txfifo_done->array[i] = NULL;
498 }
499 }
500
501 if (sc->sc_txfifo) {
502 if (sc->sc_txfifo->array)
503 kfree(sc->sc_txfifo->array, M_DEVBUF);
504 kfree(sc->sc_txfifo, M_DEVBUF);
505 sc->sc_txfifo = NULL;
506 }
507
508 if (sc->sc_txfifo_done) {
509 if (sc->sc_txfifo_done->array)
510 kfree(sc->sc_txfifo_done->array, M_DEVBUF);
511 kfree(sc->sc_txfifo_done, M_DEVBUF);
512 sc->sc_txfifo_done = NULL;
513 }
514
515 if (sc->sc_rxfifo) {
516 if (sc->sc_rxfifo->array)
517 kfree(sc->sc_rxfifo->array, M_DEVBUF);
518 kfree(sc->sc_rxfifo, M_DEVBUF);
519 sc->sc_rxfifo = NULL;
520 }
521 }
522
523
524 return 0;
525 }
526
527 /*
528 * vke_rx_intr() is the interrupt function for the receive cothread.
529 */
530 static void
vke_rx_intr(cothread_t cotd)531 vke_rx_intr(cothread_t cotd)
532 {
533 struct mbuf *m;
534 struct mbuf *nm;
535 struct vke_softc *sc = cotd->arg;
536 struct ifnet *ifp = &sc->arpcom.ac_if;
537 static int count = 0;
538
539 ifnet_serialize_all(ifp);
540 cothread_lock(cotd, 0);
541
542 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
543 cothread_unlock(cotd, 0);
544 ifnet_deserialize_all(ifp);
545 return;
546 }
547 if (sc->cotd_ipackets) {
548 IFNET_STAT_INC(ifp, ipackets, 1);
549 sc->cotd_ipackets = 0;
550 }
551 cothread_unlock(cotd, 0);
552
553 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
554 nm = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
555 if (nm) {
556 vke_rxfifo_dequeue(sc, nm);
557 ifp->if_input(ifp, m, NULL, -1);
558 if (count++ == VKE_CHUNK) {
559 cothread_lock(cotd, 0);
560 cothread_signal(cotd);
561 cothread_unlock(cotd, 0);
562 count = 0;
563 }
564 } else {
565 vke_rxfifo_dequeue(sc, m);
566 }
567 }
568
569 if (count) {
570 cothread_lock(cotd, 0);
571 cothread_signal(cotd);
572 cothread_unlock(cotd, 0);
573 }
574 ifnet_deserialize_all(ifp);
575 }
576
577 /*
578 * vke_tx_intr() is the interrupt function for the transmit cothread.
579 * Calls vke_start() to handle processing transmit mbufs.
580 */
581 static void
vke_tx_intr(cothread_t cotd)582 vke_tx_intr(cothread_t cotd)
583 {
584 struct vke_softc *sc = cotd->arg;
585 struct ifnet *ifp = &sc->arpcom.ac_if;
586 struct mbuf *m;
587
588 ifnet_serialize_all(ifp);
589 cothread_lock(cotd, 0);
590 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
591 cothread_unlock(cotd, 0);
592 ifnet_deserialize_all(ifp);
593 return;
594 }
595 if (sc->cotd_opackets) {
596 IFNET_STAT_INC(ifp, opackets, 1);
597 sc->cotd_opackets = 0;
598 }
599 if (sc->cotd_oerrors) {
600 IFNET_STAT_INC(ifp, oerrors, 1);
601 sc->cotd_oerrors = 0;
602 }
603 cothread_unlock(cotd, 0);
604
605 /*
606 * Free TX mbufs that have been processed before starting new
607 * ones going to be pipeline friendly.
608 */
609 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
610 m_freem(m);
611 }
612
613 if ((ifp->if_flags & IFF_RUNNING) == 0)
614 if_devstart(ifp);
615
616 ifnet_deserialize_all(ifp);
617 }
618
619 /*
620 * vke_rx_thread() is the body of the receive cothread.
621 *
622 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!!
623 */
624 static void
vke_rx_thread(cothread_t cotd)625 vke_rx_thread(cothread_t cotd)
626 {
627 struct mbuf *m;
628 struct vke_softc *sc = cotd->arg;
629 struct ifnet *ifp = &sc->arpcom.ac_if;
630 fifo_t fifo = sc->sc_rxfifo;
631 fd_set fdset;
632 struct timeval tv;
633 int count;
634 int n;
635 int r;
636
637 /* Select timeout cannot be infinite since we need to check for
638 * the exit flag sc->cotd_rx_exit.
639 */
640 tv.tv_sec = 0;
641 tv.tv_usec = 500000;
642
643 FD_ZERO(&fdset);
644 count = 0;
645
646 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
647 /*
648 * Wait for the RX FIFO to be loaded with
649 * empty mbufs.
650 */
651 if (NETFIFOINDEX(fifo->windex + 1, sc) ==
652 NETFIFOINDEX(fifo->rindex, sc)) {
653 usleep(20000);
654 continue;
655 }
656
657 /*
658 * Load data into the rx fifo
659 */
660 cpu_lfence();
661 m = fifo->array[NETFIFOINDEX(fifo->windex, sc)];
662 if (m == NULL) {
663 fprintf(stderr,
664 VKE_DEVNAME "%d: NULL rxring mbuf\n",
665 sc->sc_unit);
666 *(volatile int *)0 = 1;
667 }
668 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
669 if (n > 0) {
670 /* no mycpu in cothread */
671 /*IFNET_STAT_INC(ifp, ipackets, 1);*/
672 ++sc->cotd_ipackets;
673 m->m_pkthdr.rcvif = ifp;
674 m->m_pkthdr.len = m->m_len = n;
675 cpu_sfence();
676 ++fifo->windex;
677 if (count++ == VKE_CHUNK) {
678 cothread_intr(cotd);
679 count = 0;
680 }
681 } else {
682 if (count) {
683 cothread_intr(cotd);
684 count = 0;
685 }
686 FD_SET(sc->sc_fd, &fdset);
687 r = select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv);
688 if (r == -1) {
689 fprintf(stderr,
690 VKE_DEVNAME "%d: select failed for "
691 "TAP device\n", sc->sc_unit);
692 usleep(1000000);
693 }
694 }
695 }
696 cpu_sfence();
697 sc->cotd_rx_exit = VKE_COTD_DEAD;
698 }
699
700 /*
701 * vke_tx_thread() is the body of the transmit cothread.
702 *
703 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!!
704 */
705 static void
vke_tx_thread(cothread_t cotd)706 vke_tx_thread(cothread_t cotd)
707 {
708 struct mbuf *m;
709 struct vke_softc *sc = cotd->arg;
710 /*struct ifnet *ifp = &sc->arpcom.ac_if;*/
711 int count = 0;
712
713 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
714 /*
715 * Write outgoing packets to the TAP interface
716 */
717 m = vke_txfifo_dequeue(sc);
718 if (m) {
719 if (m->m_pkthdr.len <= MCLBYTES) {
720 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
721 sc->sc_txbuf_len = m->m_pkthdr.len;
722
723 if (write(sc->sc_fd, sc->sc_txbuf,
724 sc->sc_txbuf_len) < 0) {
725 /* no mycpu in cothread */
726 /*IFNET_STAT_INC(ifp, oerrors, 1);*/
727 ++sc->cotd_oerrors;
728 } else {
729 /* no mycpu in cothread */
730 /*IFNET_STAT_INC(ifp, opackets, 1);*/
731 ++sc->cotd_opackets;
732 }
733 }
734 if (count++ == VKE_CHUNK) {
735 cothread_intr(cotd);
736 count = 0;
737 }
738 vke_txfifo_done_enqueue(sc, m);
739 } else {
740 if (count) {
741 cothread_intr(cotd);
742 count = 0;
743 }
744 cothread_lock(cotd, 1);
745 if (vke_txfifo_empty(sc))
746 cothread_wait(cotd);
747 cothread_unlock(cotd, 1);
748 }
749 }
750 cpu_sfence();
751 sc->cotd_tx_exit = VKE_COTD_DEAD;
752 }
753
754 static void
vke_ifmedia_add(struct vke_softc * sc,int mword)755 vke_ifmedia_add(struct vke_softc *sc, int mword)
756 {
757 ifmedia_add(&sc->sc_media, IFM_ETHER | mword, 0, NULL);
758 }
759
760 static void
vke_ifmedia_addfdx(struct vke_softc * sc,int mword)761 vke_ifmedia_addfdx(struct vke_softc *sc, int mword)
762 {
763 vke_ifmedia_add(sc, mword | IFM_FDX);
764 }
765
766 static int
vke_attach(const struct vknetif_info * info,int unit)767 vke_attach(const struct vknetif_info *info, int unit)
768 {
769 struct vke_softc *sc;
770 struct ifnet *ifp;
771 struct tapinfo tapinfo;
772 uint8_t enaddr[ETHER_ADDR_LEN];
773 int nmbufs;
774 int fd;
775
776 KKASSERT(info->tap_fd >= 0);
777 fd = info->tap_fd;
778
779 if (info->enaddr) {
780 /*
781 * enaddr is supplied
782 */
783 bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN);
784 } else {
785 /*
786 * This is only a TAP device if tap_unit is non-zero. If
787 * connecting to a virtual socket we generate a unique MAC.
788 *
789 * WARNING: enaddr[0] bit 0 is the multicast bit, when
790 * randomizing enaddr[] just leave the first
791 * two bytes 00 00 for now.
792 */
793 bzero(enaddr, sizeof(enaddr));
794 if (info->tap_unit >= 0) {
795 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
796 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
797 "failed: %s\n", unit, strerror(errno));
798 return ENXIO;
799 }
800
801 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
802 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
803 "failed: %s\n", unit, strerror(errno));
804 return ENXIO;
805 }
806 } else {
807 int fd = open("/dev/urandom", O_RDONLY);
808 if (fd >= 0) {
809 read(fd, enaddr + 2, 4);
810 close(fd);
811 }
812 enaddr[4] = (int)getpid() >> 8;
813 enaddr[5] = (int)getpid() & 255;
814
815 }
816 enaddr[1] += 1;
817 }
818 if (ETHER_IS_MULTICAST(enaddr)) {
819 kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit);
820 return ENXIO;
821 }
822
823 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
824
825 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
826 sc->sc_fd = fd;
827 sc->sc_unit = unit;
828 sc->sc_tap_unit = info->tap_unit;
829 sc->sc_addr = info->netif_addr;
830 sc->sc_mask = info->netif_mask;
831
832 if (vke_max_ringsize == 0) {
833 nmbufs = nmbclusters / (NetifNum * 2);
834 sc->sc_ringsize = LOW_POW_2(nmbufs);
835 if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE)
836 sc->sc_ringsize = VKE_DEFAULT_RINGSIZE;
837 } else if (vke_max_ringsize >= VKE_CHUNK) { /* Tunable specified */
838 sc->sc_ringsize = LOW_POW_2(vke_max_ringsize);
839 } else {
840 sc->sc_ringsize = LOW_POW_2(VKE_CHUNK);
841 }
842
843 ifp = &sc->arpcom.ac_if;
844 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
845
846 /* NB: after if_initname() */
847 sysctl_ctx_init(&sc->sc_sysctl_ctx);
848 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
849 SYSCTL_STATIC_CHILDREN(_hw),
850 OID_AUTO, ifp->if_xname,
851 CTLFLAG_RD, 0, "");
852 if (sc->sc_sysctl_tree == NULL) {
853 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
854 } else {
855 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
856 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
857 OID_AUTO, "tap_unit",
858 CTLFLAG_RD, &sc->sc_tap_unit, 0,
859 "Backend tap(4) unit");
860 }
861
862 ifp->if_softc = sc;
863 ifp->if_ioctl = vke_ioctl;
864 ifp->if_start = vke_start;
865 ifp->if_init = vke_init;
866 ifp->if_mtu = tapinfo.mtu;
867 ifp->if_baudrate = tapinfo.baudrate;
868 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
869 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
870 ifq_set_ready(&ifp->if_snd);
871
872 ifmedia_init(&sc->sc_media, 0, vke_media_change, vke_media_status);
873 /* We support as many media types as we please for
874 debugging purposes */
875 vke_ifmedia_add(sc, IFM_10_T);
876 vke_ifmedia_add(sc, IFM_10_T);
877 vke_ifmedia_add(sc, IFM_10_2);
878 vke_ifmedia_add(sc, IFM_10_5);
879 vke_ifmedia_add(sc, IFM_100_TX);
880 vke_ifmedia_addfdx(sc, IFM_100_TX);
881 vke_ifmedia_add(sc, IFM_100_FX);
882 vke_ifmedia_add(sc, IFM_100_T4);
883 vke_ifmedia_add(sc, IFM_100_VG);
884 vke_ifmedia_add(sc, IFM_100_T2);
885 vke_ifmedia_addfdx(sc, IFM_1000_SX);
886 vke_ifmedia_add(sc, IFM_10_STP);
887 vke_ifmedia_add(sc, IFM_10_FL);
888 vke_ifmedia_addfdx(sc, IFM_1000_LX);
889 vke_ifmedia_addfdx(sc, IFM_1000_CX);
890 vke_ifmedia_addfdx(sc, IFM_1000_T);
891 vke_ifmedia_add(sc, IFM_HPNA_1);
892 vke_ifmedia_addfdx(sc, IFM_10G_LR);
893 vke_ifmedia_addfdx(sc, IFM_10G_SR);
894 vke_ifmedia_addfdx(sc, IFM_10G_CX4);
895 vke_ifmedia_addfdx(sc, IFM_2500_SX);
896 vke_ifmedia_addfdx(sc, IFM_10G_TWINAX);
897 vke_ifmedia_addfdx(sc, IFM_10G_TWINAX_LONG);
898 vke_ifmedia_addfdx(sc, IFM_10G_LRM);
899 vke_ifmedia_addfdx(sc, IFM_10G_T);
900 vke_ifmedia_addfdx(sc, IFM_40G_CR4);
901 vke_ifmedia_addfdx(sc, IFM_40G_SR4);
902 vke_ifmedia_addfdx(sc, IFM_40G_LR4);
903 vke_ifmedia_addfdx(sc, IFM_1000_KX);
904 vke_ifmedia_addfdx(sc, IFM_10G_KX4);
905 vke_ifmedia_addfdx(sc, IFM_10G_KR);
906 vke_ifmedia_addfdx(sc, IFM_10G_CR1);
907 vke_ifmedia_addfdx(sc, IFM_20G_KR2);
908 vke_ifmedia_addfdx(sc, IFM_2500_KX);
909 vke_ifmedia_addfdx(sc, IFM_2500_T);
910 vke_ifmedia_addfdx(sc, IFM_5000_T);
911 vke_ifmedia_addfdx(sc, IFM_50G_PCIE);
912 vke_ifmedia_addfdx(sc, IFM_25G_PCIE);
913 vke_ifmedia_addfdx(sc, IFM_1000_SGMII);
914 vke_ifmedia_addfdx(sc, IFM_10G_SFI);
915 vke_ifmedia_addfdx(sc, IFM_40G_XLPPI);
916 vke_ifmedia_addfdx(sc, IFM_1000_CX_SGMII);
917 vke_ifmedia_addfdx(sc, IFM_40G_KR4);
918 vke_ifmedia_addfdx(sc, IFM_10G_ER);
919 vke_ifmedia_addfdx(sc, IFM_100G_CR4);
920 vke_ifmedia_addfdx(sc, IFM_100G_SR4);
921 vke_ifmedia_addfdx(sc, IFM_100G_KR4);
922 vke_ifmedia_addfdx(sc, IFM_100G_LR4);
923 vke_ifmedia_addfdx(sc, IFM_56G_R4);
924 vke_ifmedia_addfdx(sc, IFM_100_T);
925 vke_ifmedia_addfdx(sc, IFM_25G_CR);
926 vke_ifmedia_addfdx(sc, IFM_25G_KR);
927 vke_ifmedia_addfdx(sc, IFM_25G_SR);
928 vke_ifmedia_addfdx(sc, IFM_50G_CR2);
929 vke_ifmedia_addfdx(sc, IFM_50G_KR2);
930 vke_ifmedia_add(sc, IFM_AUTO);
931
932 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
933
934 ifp->if_link_state = LINK_STATE_UP;
935
936 ether_ifattach(ifp, enaddr, NULL);
937
938 if (bootverbose && sc->sc_addr != 0) {
939 if_printf(ifp, "pre-configured "
940 "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n",
941 ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize);
942 }
943
944 return 0;
945 }
946
947 static int
vke_init_addr(struct ifnet * ifp,in_addr_t addr,in_addr_t mask)948 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
949 {
950 struct ifaliasreq ifra;
951 struct sockaddr_in *sin;
952 int ret;
953
954 ASSERT_SERIALIZED(ifp->if_serializer);
955
956 if (bootverbose) {
957 if_printf(ifp, "add pre-configured "
958 "address 0x%08x, netmask 0x%08x\n",
959 ntohl(addr), ntohl(mask));
960 }
961
962 bzero(&ifra, sizeof(ifra));
963
964 /* NB: no need to set ifaliasreq.ifra_name */
965
966 sin = (struct sockaddr_in *)&ifra.ifra_addr;
967 sin->sin_family = AF_INET;
968 sin->sin_len = sizeof(*sin);
969 sin->sin_addr.s_addr = addr;
970
971 if (mask != 0) {
972 sin = (struct sockaddr_in *)&ifra.ifra_mask;
973 sin->sin_len = sizeof(*sin);
974 sin->sin_addr.s_addr = mask;
975 }
976
977 /*
978 * Temporarily release serializer, in_control() will hold
979 * it again before calling ifnet.if_ioctl().
980 */
981 ifnet_deserialize_all(ifp);
982 ret = in_control(SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
983 ifnet_serialize_all(ifp);
984
985 return ret;
986 }
987
vke_media_change(struct ifnet * ifp)988 static int vke_media_change(struct ifnet *ifp)
989 {
990 /* ignored */
991 return(0);
992 }
993
vke_media_status(struct ifnet * ifp,struct ifmediareq * imr)994 static void vke_media_status(struct ifnet *ifp, struct ifmediareq *imr)
995 {
996 struct vke_softc *sc = (struct vke_softc *)ifp->if_softc;
997
998 imr->ifm_status = IFM_AVALID;
999 imr->ifm_status |= IFM_ACTIVE;
1000
1001 if(sc->sc_media.ifm_cur) {
1002 if(sc->sc_media.ifm_cur->ifm_media == IFM_ETHER) {
1003 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX;
1004 } else {
1005 imr->ifm_active = sc->sc_media.ifm_cur->ifm_media;
1006 }
1007 } else {
1008 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX;
1009 }
1010 }
1011