1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/endian.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/proc.h> 40 #include <sys/serialize.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/sysctl.h> 44 45 #include <machine/md_var.h> 46 #include <machine/cothread.h> 47 48 #include <net/ethernet.h> 49 #include <net/if.h> 50 #include <net/bpf.h> 51 #include <net/if_arp.h> 52 #include <net/if_media.h> 53 #include <net/ifq_var.h> 54 #include <net/vlan/if_vlan_ether.h> 55 56 #include <netinet/in_var.h> 57 58 #include <sys/stat.h> 59 #include <net/tap/if_tap.h> 60 #include <err.h> 61 #include <errno.h> 62 #include <stdio.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <fcntl.h> 66 67 #define VKE_DEVNAME "vke" 68 69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */ 70 71 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1)) 72 73 #define VKE_COTD_RUN 0 74 #define VKE_COTD_EXIT 1 75 #define VKE_COTD_DEAD 2 76 77 struct vke_fifo { 78 struct mbuf **array; 79 int rindex; 80 int windex; 81 }; 82 typedef struct vke_fifo *fifo_t; 83 84 /* Default value for a long time */ 85 #define VKE_DEFAULT_RINGSIZE 256 86 static int vke_max_ringsize = 0; 87 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize); 88 89 #define LOW_POW_2(n) (1 << (fls(n) - 1)) 90 91 struct vke_softc { 92 struct arpcom arpcom; 93 int sc_fd; 94 int sc_unit; 95 96 cothread_t cotd_tx; 97 cothread_t cotd_rx; 98 99 int cotd_tx_exit; 100 int cotd_rx_exit; 101 102 void *sc_txbuf; 103 int sc_txbuf_len; 104 105 fifo_t sc_txfifo; 106 fifo_t sc_txfifo_done; 107 fifo_t sc_rxfifo; 108 109 int sc_ringsize; 110 111 long cotd_ipackets; 112 long cotd_oerrors; 113 long cotd_opackets; 114 115 struct sysctl_ctx_list sc_sysctl_ctx; 116 struct sysctl_oid *sc_sysctl_tree; 117 118 int sc_tap_unit; /* unit of backend tap(4) */ 119 in_addr_t sc_addr; /* address */ 120 in_addr_t sc_mask; /* netmask */ 121 122 struct ifmedia sc_media; 123 }; 124 125 static void vke_start(struct ifnet *, struct ifaltq_subque *); 126 static void vke_init(void *); 127 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 128 129 static int vke_media_change(struct ifnet *); 130 static void vke_media_status(struct ifnet *, struct ifmediareq *); 131 132 static int vke_attach(const struct vknetif_info *, int); 133 static int vke_stop(struct vke_softc *); 134 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t); 135 static void vke_tx_intr(cothread_t cotd); 136 static void vke_tx_thread(cothread_t cotd); 137 static void vke_rx_intr(cothread_t cotd); 138 static void vke_rx_thread(cothread_t cotd); 139 140 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m); 141 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc); 142 143 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m); 144 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm); 145 146 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm); 147 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc); 148 149 static void 150 vke_sysinit(void *arg __unused) 151 { 152 int i, unit; 153 154 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum)); 155 156 unit = 0; 157 for (i = 0; i < NetifNum; ++i) { 158 if (vke_attach(&NetifInfo[i], unit) == 0) 159 ++unit; 160 } 161 } 162 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL); 163 164 /* 165 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since 166 * the cothread cannot free transmit mbufs after processing we put them on 167 * the done fifo so the kernel can free them. 168 */ 169 static int 170 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m) 171 { 172 fifo_t fifo = sc->sc_txfifo_done; 173 174 while (NETFIFOINDEX(fifo->windex + 1, sc) == 175 NETFIFOINDEX(fifo->rindex, sc)) { 176 usleep(20000); 177 } 178 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 179 cpu_sfence(); 180 ++fifo->windex; 181 182 return (0); 183 } 184 185 /* 186 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo. 187 */ 188 static struct mbuf * 189 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm) 190 { 191 fifo_t fifo = sc->sc_txfifo_done; 192 struct mbuf *m; 193 194 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 195 return (NULL); 196 197 cpu_lfence(); 198 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 199 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm; 200 ++fifo->rindex; 201 202 return (m); 203 } 204 205 /* 206 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. 207 */ 208 static int 209 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m) 210 { 211 fifo_t fifo = sc->sc_txfifo; 212 213 if (NETFIFOINDEX(fifo->windex + 1, sc) == 214 NETFIFOINDEX(fifo->rindex, sc)) { 215 return (-1); 216 } 217 218 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 219 cpu_sfence(); 220 ++fifo->windex; 221 222 return (0); 223 } 224 225 /* 226 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one 227 * exists. 228 */ 229 static struct mbuf * 230 vke_txfifo_dequeue(struct vke_softc *sc) 231 { 232 fifo_t fifo = sc->sc_txfifo; 233 struct mbuf *m; 234 235 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 236 return (NULL); 237 238 cpu_lfence(); 239 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 240 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL; 241 cpu_sfence(); 242 ++fifo->rindex; 243 244 return (m); 245 } 246 247 static int 248 vke_txfifo_empty(struct vke_softc *sc) 249 { 250 fifo_t fifo = sc->sc_txfifo; 251 252 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 253 return (1); 254 return(0); 255 } 256 257 /* 258 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one 259 * exists replacing it with newm which should point to a newly allocated 260 * mbuf. 261 */ 262 static struct mbuf * 263 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm) 264 { 265 fifo_t fifo = sc->sc_rxfifo; 266 struct mbuf *m; 267 268 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 269 return (NULL); 270 271 cpu_lfence(); 272 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 273 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm; 274 cpu_sfence(); 275 ++fifo->rindex; 276 277 return (m); 278 } 279 280 /* 281 * Return the next mbuf if available but do NOT remove it from the FIFO. 282 */ 283 static struct mbuf * 284 vke_rxfifo_sniff(struct vke_softc *sc) 285 { 286 fifo_t fifo = sc->sc_rxfifo; 287 struct mbuf *m; 288 289 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 290 return (NULL); 291 292 cpu_lfence(); 293 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 294 295 return (m); 296 } 297 298 static void 299 vke_init(void *xsc) 300 { 301 struct vke_softc *sc = xsc; 302 struct ifnet *ifp = &sc->arpcom.ac_if; 303 size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *); 304 int i; 305 306 ASSERT_SERIALIZED(ifp->if_serializer); 307 308 vke_stop(sc); 309 310 ifp->if_flags |= IFF_RUNNING; 311 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 312 313 /* 314 * Allocate memory for FIFO structures and mbufs. 315 */ 316 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), 317 M_DEVBUF, M_WAITOK | M_ZERO); 318 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), 319 M_DEVBUF, M_WAITOK | M_ZERO); 320 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), 321 M_DEVBUF, M_WAITOK | M_ZERO); 322 sc->sc_txfifo->array = kmalloc(ringsize, 323 M_DEVBUF, M_WAITOK | M_ZERO); 324 sc->sc_txfifo_done->array = kmalloc(ringsize, 325 M_DEVBUF, M_WAITOK | M_ZERO); 326 sc->sc_rxfifo->array = kmalloc(ringsize, 327 M_DEVBUF, M_WAITOK | M_ZERO); 328 329 for (i = 0; i < sc->sc_ringsize; i++) { 330 sc->sc_rxfifo->array[i] = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); 331 sc->sc_txfifo->array[i] = NULL; 332 sc->sc_txfifo_done->array[i] = NULL; 333 } 334 335 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN; 336 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx"); 337 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx"); 338 339 if (sc->sc_addr != 0) { 340 in_addr_t addr, mask; 341 342 addr = sc->sc_addr; 343 mask = sc->sc_mask; 344 345 /* 346 * Make sure vkernel assigned 347 * address will not be added 348 * again. 349 */ 350 sc->sc_addr = 0; 351 sc->sc_mask = 0; 352 353 vke_init_addr(ifp, addr, mask); 354 } 355 356 } 357 358 /* 359 * Called from kernel. 360 * 361 * NOTE: We can't make any kernel callbacks while holding cothread lock 362 * because the cothread lock is not governed by the kernel scheduler 363 * (so mplock, tokens, etc will not be released). 364 */ 365 static void 366 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 367 { 368 struct vke_softc *sc = ifp->if_softc; 369 struct mbuf *m; 370 cothread_t cotd = sc->cotd_tx; 371 int count; 372 373 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 374 ASSERT_SERIALIZED(ifp->if_serializer); 375 376 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 377 return; 378 379 count = 0; 380 while ((m = ifsq_dequeue(ifsq)) != NULL) { 381 if (vke_txfifo_enqueue(sc, m) != -1) { 382 ETHER_BPF_MTAP(ifp, m); 383 if (count++ == VKE_CHUNK) { 384 cothread_lock(cotd, 0); 385 cothread_signal(cotd); 386 cothread_unlock(cotd, 0); 387 count = 0; 388 } 389 } else { 390 m_freem(m); 391 } 392 } 393 if (count) { 394 cothread_lock(cotd, 0); 395 cothread_signal(cotd); 396 cothread_unlock(cotd, 0); 397 } 398 } 399 400 static int 401 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 402 { 403 struct vke_softc *sc = ifp->if_softc; 404 struct ifreq *ifr = (struct ifreq *)data; 405 int error = 0; 406 407 ASSERT_SERIALIZED(ifp->if_serializer); 408 409 switch (cmd) { 410 case SIOCSIFFLAGS: 411 if (ifp->if_flags & IFF_UP) { 412 if ((ifp->if_flags & IFF_RUNNING) == 0) 413 vke_init(sc); 414 } else { 415 if (ifp->if_flags & IFF_RUNNING) 416 vke_stop(sc); 417 } 418 break; 419 case SIOCGIFMEDIA: 420 case SIOCGIFXMEDIA: 421 case SIOCSIFMEDIA: 422 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 423 break; 424 case SIOCGIFSTATUS: { 425 struct ifstat *ifs = (struct ifstat *)data; 426 int len; 427 428 len = strlen(ifs->ascii); 429 if (len < sizeof(ifs->ascii)) { 430 if (sc->sc_tap_unit >= 0) { 431 ksnprintf(ifs->ascii + len, 432 sizeof(ifs->ascii) - len, 433 "\tBacked by tap%d\n", 434 sc->sc_tap_unit); 435 } 436 } 437 break; 438 } 439 case SIOCSIFADDR: 440 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) { 441 /* 442 * If we are explicitly requested to change address, 443 * we should invalidate address/netmask passed in 444 * from vkernel command line. 445 */ 446 sc->sc_addr = 0; 447 sc->sc_mask = 0; 448 } 449 /* FALL THROUGH */ 450 default: 451 error = ether_ioctl(ifp, cmd, data); 452 break; 453 } 454 return error; 455 } 456 457 static int 458 vke_stop(struct vke_softc *sc) 459 { 460 struct ifnet *ifp = &sc->arpcom.ac_if; 461 int i; 462 463 ASSERT_SERIALIZED(ifp->if_serializer); 464 465 ifp->if_flags &= ~IFF_RUNNING; 466 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 467 468 if (sc) { 469 if (sc->cotd_tx) { 470 cothread_lock(sc->cotd_tx, 0); 471 if (sc->cotd_tx_exit == VKE_COTD_RUN) 472 sc->cotd_tx_exit = VKE_COTD_EXIT; 473 cothread_signal(sc->cotd_tx); 474 cothread_unlock(sc->cotd_tx, 0); 475 cothread_delete(&sc->cotd_tx); 476 } 477 if (sc->cotd_rx) { 478 cothread_lock(sc->cotd_rx, 0); 479 if (sc->cotd_rx_exit == VKE_COTD_RUN) 480 sc->cotd_rx_exit = VKE_COTD_EXIT; 481 cothread_signal(sc->cotd_rx); 482 cothread_unlock(sc->cotd_rx, 0); 483 cothread_delete(&sc->cotd_rx); 484 } 485 486 for (i = 0; i < sc->sc_ringsize; i++) { 487 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) { 488 m_freem(sc->sc_rxfifo->array[i]); 489 sc->sc_rxfifo->array[i] = NULL; 490 } 491 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) { 492 m_freem(sc->sc_txfifo->array[i]); 493 sc->sc_txfifo->array[i] = NULL; 494 } 495 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) { 496 m_freem(sc->sc_txfifo_done->array[i]); 497 sc->sc_txfifo_done->array[i] = NULL; 498 } 499 } 500 501 if (sc->sc_txfifo) { 502 if (sc->sc_txfifo->array) 503 kfree(sc->sc_txfifo->array, M_DEVBUF); 504 kfree(sc->sc_txfifo, M_DEVBUF); 505 sc->sc_txfifo = NULL; 506 } 507 508 if (sc->sc_txfifo_done) { 509 if (sc->sc_txfifo_done->array) 510 kfree(sc->sc_txfifo_done->array, M_DEVBUF); 511 kfree(sc->sc_txfifo_done, M_DEVBUF); 512 sc->sc_txfifo_done = NULL; 513 } 514 515 if (sc->sc_rxfifo) { 516 if (sc->sc_rxfifo->array) 517 kfree(sc->sc_rxfifo->array, M_DEVBUF); 518 kfree(sc->sc_rxfifo, M_DEVBUF); 519 sc->sc_rxfifo = NULL; 520 } 521 } 522 523 524 return 0; 525 } 526 527 /* 528 * vke_rx_intr() is the interrupt function for the receive cothread. 529 */ 530 static void 531 vke_rx_intr(cothread_t cotd) 532 { 533 struct mbuf *m; 534 struct mbuf *nm; 535 struct vke_softc *sc = cotd->arg; 536 struct ifnet *ifp = &sc->arpcom.ac_if; 537 static int count = 0; 538 539 ifnet_serialize_all(ifp); 540 cothread_lock(cotd, 0); 541 542 if (sc->cotd_rx_exit != VKE_COTD_RUN) { 543 cothread_unlock(cotd, 0); 544 ifnet_deserialize_all(ifp); 545 return; 546 } 547 if (sc->cotd_ipackets) { 548 IFNET_STAT_INC(ifp, ipackets, 1); 549 sc->cotd_ipackets = 0; 550 } 551 cothread_unlock(cotd, 0); 552 553 while ((m = vke_rxfifo_sniff(sc)) != NULL) { 554 nm = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 555 if (nm) { 556 vke_rxfifo_dequeue(sc, nm); 557 ifp->if_input(ifp, m, NULL, -1); 558 if (count++ == VKE_CHUNK) { 559 cothread_lock(cotd, 0); 560 cothread_signal(cotd); 561 cothread_unlock(cotd, 0); 562 count = 0; 563 } 564 } else { 565 vke_rxfifo_dequeue(sc, m); 566 } 567 } 568 569 if (count) { 570 cothread_lock(cotd, 0); 571 cothread_signal(cotd); 572 cothread_unlock(cotd, 0); 573 } 574 ifnet_deserialize_all(ifp); 575 } 576 577 /* 578 * vke_tx_intr() is the interrupt function for the transmit cothread. 579 * Calls vke_start() to handle processing transmit mbufs. 580 */ 581 static void 582 vke_tx_intr(cothread_t cotd) 583 { 584 struct vke_softc *sc = cotd->arg; 585 struct ifnet *ifp = &sc->arpcom.ac_if; 586 struct mbuf *m; 587 588 ifnet_serialize_all(ifp); 589 cothread_lock(cotd, 0); 590 if (sc->cotd_tx_exit != VKE_COTD_RUN) { 591 cothread_unlock(cotd, 0); 592 ifnet_deserialize_all(ifp); 593 return; 594 } 595 if (sc->cotd_opackets) { 596 IFNET_STAT_INC(ifp, opackets, 1); 597 sc->cotd_opackets = 0; 598 } 599 if (sc->cotd_oerrors) { 600 IFNET_STAT_INC(ifp, oerrors, 1); 601 sc->cotd_oerrors = 0; 602 } 603 cothread_unlock(cotd, 0); 604 605 /* 606 * Free TX mbufs that have been processed before starting new 607 * ones going to be pipeline friendly. 608 */ 609 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) { 610 m_freem(m); 611 } 612 613 if ((ifp->if_flags & IFF_RUNNING) == 0) 614 if_devstart(ifp); 615 616 ifnet_deserialize_all(ifp); 617 } 618 619 /* 620 * vke_rx_thread() is the body of the receive cothread. 621 * 622 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 623 */ 624 static void 625 vke_rx_thread(cothread_t cotd) 626 { 627 struct mbuf *m; 628 struct vke_softc *sc = cotd->arg; 629 struct ifnet *ifp = &sc->arpcom.ac_if; 630 fifo_t fifo = sc->sc_rxfifo; 631 fd_set fdset; 632 struct timeval tv; 633 int count; 634 int n; 635 int r; 636 637 /* Select timeout cannot be infinite since we need to check for 638 * the exit flag sc->cotd_rx_exit. 639 */ 640 tv.tv_sec = 0; 641 tv.tv_usec = 500000; 642 643 FD_ZERO(&fdset); 644 count = 0; 645 646 while (sc->cotd_rx_exit == VKE_COTD_RUN) { 647 /* 648 * Wait for the RX FIFO to be loaded with 649 * empty mbufs. 650 */ 651 if (NETFIFOINDEX(fifo->windex + 1, sc) == 652 NETFIFOINDEX(fifo->rindex, sc)) { 653 usleep(20000); 654 continue; 655 } 656 657 /* 658 * Load data into the rx fifo 659 */ 660 cpu_lfence(); 661 m = fifo->array[NETFIFOINDEX(fifo->windex, sc)]; 662 if (m == NULL) { 663 fprintf(stderr, 664 VKE_DEVNAME "%d: NULL rxring mbuf\n", 665 sc->sc_unit); 666 *(volatile int *)0 = 1; 667 } 668 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); 669 if (n > 0) { 670 /* no mycpu in cothread */ 671 /*IFNET_STAT_INC(ifp, ipackets, 1);*/ 672 ++sc->cotd_ipackets; 673 m->m_pkthdr.rcvif = ifp; 674 m->m_pkthdr.len = m->m_len = n; 675 cpu_sfence(); 676 ++fifo->windex; 677 if (count++ == VKE_CHUNK) { 678 cothread_intr(cotd); 679 count = 0; 680 } 681 } else { 682 if (count) { 683 cothread_intr(cotd); 684 count = 0; 685 } 686 FD_SET(sc->sc_fd, &fdset); 687 r = select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv); 688 if (r == -1) { 689 fprintf(stderr, 690 VKE_DEVNAME "%d: select failed for " 691 "TAP device\n", sc->sc_unit); 692 usleep(1000000); 693 } 694 } 695 } 696 cpu_sfence(); 697 sc->cotd_rx_exit = VKE_COTD_DEAD; 698 } 699 700 /* 701 * vke_tx_thread() is the body of the transmit cothread. 702 * 703 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 704 */ 705 static void 706 vke_tx_thread(cothread_t cotd) 707 { 708 struct mbuf *m; 709 struct vke_softc *sc = cotd->arg; 710 /*struct ifnet *ifp = &sc->arpcom.ac_if;*/ 711 int count = 0; 712 713 while (sc->cotd_tx_exit == VKE_COTD_RUN) { 714 /* 715 * Write outgoing packets to the TAP interface 716 */ 717 m = vke_txfifo_dequeue(sc); 718 if (m) { 719 if (m->m_pkthdr.len <= MCLBYTES) { 720 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf); 721 sc->sc_txbuf_len = m->m_pkthdr.len; 722 723 if (write(sc->sc_fd, sc->sc_txbuf, 724 sc->sc_txbuf_len) < 0) { 725 /* no mycpu in cothread */ 726 /*IFNET_STAT_INC(ifp, oerrors, 1);*/ 727 ++sc->cotd_oerrors; 728 } else { 729 /* no mycpu in cothread */ 730 /*IFNET_STAT_INC(ifp, opackets, 1);*/ 731 ++sc->cotd_opackets; 732 } 733 } 734 if (count++ == VKE_CHUNK) { 735 cothread_intr(cotd); 736 count = 0; 737 } 738 vke_txfifo_done_enqueue(sc, m); 739 } else { 740 if (count) { 741 cothread_intr(cotd); 742 count = 0; 743 } 744 cothread_lock(cotd, 1); 745 if (vke_txfifo_empty(sc)) 746 cothread_wait(cotd); 747 cothread_unlock(cotd, 1); 748 } 749 } 750 cpu_sfence(); 751 sc->cotd_tx_exit = VKE_COTD_DEAD; 752 } 753 754 static void 755 vke_ifmedia_add(struct vke_softc *sc, int mword) 756 { 757 ifmedia_add(&sc->sc_media, IFM_ETHER | mword, 0, NULL); 758 } 759 760 static void 761 vke_ifmedia_addfdx(struct vke_softc *sc, int mword) 762 { 763 vke_ifmedia_add(sc, mword | IFM_FDX); 764 } 765 766 static int 767 vke_attach(const struct vknetif_info *info, int unit) 768 { 769 struct vke_softc *sc; 770 struct ifnet *ifp; 771 struct tapinfo tapinfo; 772 uint8_t enaddr[ETHER_ADDR_LEN]; 773 int nmbufs; 774 int fd; 775 776 KKASSERT(info->tap_fd >= 0); 777 fd = info->tap_fd; 778 779 if (info->enaddr) { 780 /* 781 * enaddr is supplied 782 */ 783 bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN); 784 } else { 785 /* 786 * This is only a TAP device if tap_unit is non-zero. If 787 * connecting to a virtual socket we generate a unique MAC. 788 * 789 * WARNING: enaddr[0] bit 0 is the multicast bit, when 790 * randomizing enaddr[] just leave the first 791 * two bytes 00 00 for now. 792 */ 793 bzero(enaddr, sizeof(enaddr)); 794 if (info->tap_unit >= 0) { 795 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) { 796 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) " 797 "failed: %s\n", unit, strerror(errno)); 798 return ENXIO; 799 } 800 801 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) { 802 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) " 803 "failed: %s\n", unit, strerror(errno)); 804 return ENXIO; 805 } 806 } else { 807 int fd = open("/dev/urandom", O_RDONLY); 808 if (fd >= 0) { 809 read(fd, enaddr + 2, 4); 810 close(fd); 811 } 812 enaddr[4] = (int)getpid() >> 8; 813 enaddr[5] = (int)getpid() & 255; 814 815 } 816 enaddr[1] += 1; 817 } 818 if (ETHER_IS_MULTICAST(enaddr)) { 819 kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit); 820 return ENXIO; 821 } 822 823 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 824 825 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK); 826 sc->sc_fd = fd; 827 sc->sc_unit = unit; 828 sc->sc_tap_unit = info->tap_unit; 829 sc->sc_addr = info->netif_addr; 830 sc->sc_mask = info->netif_mask; 831 832 if (vke_max_ringsize == 0) { 833 nmbufs = nmbclusters / (NetifNum * 2); 834 sc->sc_ringsize = LOW_POW_2(nmbufs); 835 if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE) 836 sc->sc_ringsize = VKE_DEFAULT_RINGSIZE; 837 } else if (vke_max_ringsize >= VKE_CHUNK) { /* Tunable specified */ 838 sc->sc_ringsize = LOW_POW_2(vke_max_ringsize); 839 } else { 840 sc->sc_ringsize = LOW_POW_2(VKE_CHUNK); 841 } 842 843 ifp = &sc->arpcom.ac_if; 844 if_initname(ifp, VKE_DEVNAME, sc->sc_unit); 845 846 /* NB: after if_initname() */ 847 sysctl_ctx_init(&sc->sc_sysctl_ctx); 848 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx, 849 SYSCTL_STATIC_CHILDREN(_hw), 850 OID_AUTO, ifp->if_xname, 851 CTLFLAG_RD, 0, ""); 852 if (sc->sc_sysctl_tree == NULL) { 853 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit); 854 } else { 855 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx, 856 SYSCTL_CHILDREN(sc->sc_sysctl_tree), 857 OID_AUTO, "tap_unit", 858 CTLFLAG_RD, &sc->sc_tap_unit, 0, 859 "Backend tap(4) unit"); 860 } 861 862 ifp->if_softc = sc; 863 ifp->if_ioctl = vke_ioctl; 864 ifp->if_start = vke_start; 865 ifp->if_init = vke_init; 866 ifp->if_mtu = tapinfo.mtu; 867 ifp->if_baudrate = tapinfo.baudrate; 868 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 869 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 870 ifq_set_ready(&ifp->if_snd); 871 872 ifmedia_init(&sc->sc_media, 0, vke_media_change, vke_media_status); 873 /* We support as many media types as we please for 874 debugging purposes */ 875 vke_ifmedia_add(sc, IFM_10_T); 876 vke_ifmedia_add(sc, IFM_10_T); 877 vke_ifmedia_add(sc, IFM_10_2); 878 vke_ifmedia_add(sc, IFM_10_5); 879 vke_ifmedia_add(sc, IFM_100_TX); 880 vke_ifmedia_addfdx(sc, IFM_100_TX); 881 vke_ifmedia_add(sc, IFM_100_FX); 882 vke_ifmedia_add(sc, IFM_100_T4); 883 vke_ifmedia_add(sc, IFM_100_VG); 884 vke_ifmedia_add(sc, IFM_100_T2); 885 vke_ifmedia_addfdx(sc, IFM_1000_SX); 886 vke_ifmedia_add(sc, IFM_10_STP); 887 vke_ifmedia_add(sc, IFM_10_FL); 888 vke_ifmedia_addfdx(sc, IFM_1000_LX); 889 vke_ifmedia_addfdx(sc, IFM_1000_CX); 890 vke_ifmedia_addfdx(sc, IFM_1000_T); 891 vke_ifmedia_add(sc, IFM_HPNA_1); 892 vke_ifmedia_addfdx(sc, IFM_10G_LR); 893 vke_ifmedia_addfdx(sc, IFM_10G_SR); 894 vke_ifmedia_addfdx(sc, IFM_10G_CX4); 895 vke_ifmedia_addfdx(sc, IFM_2500_SX); 896 vke_ifmedia_addfdx(sc, IFM_10G_TWINAX); 897 vke_ifmedia_addfdx(sc, IFM_10G_TWINAX_LONG); 898 vke_ifmedia_addfdx(sc, IFM_10G_LRM); 899 vke_ifmedia_addfdx(sc, IFM_10G_T); 900 vke_ifmedia_addfdx(sc, IFM_40G_CR4); 901 vke_ifmedia_addfdx(sc, IFM_40G_SR4); 902 vke_ifmedia_addfdx(sc, IFM_40G_LR4); 903 vke_ifmedia_addfdx(sc, IFM_1000_KX); 904 vke_ifmedia_addfdx(sc, IFM_10G_KX4); 905 vke_ifmedia_addfdx(sc, IFM_10G_KR); 906 vke_ifmedia_addfdx(sc, IFM_10G_CR1); 907 vke_ifmedia_addfdx(sc, IFM_20G_KR2); 908 vke_ifmedia_addfdx(sc, IFM_2500_KX); 909 vke_ifmedia_addfdx(sc, IFM_2500_T); 910 vke_ifmedia_addfdx(sc, IFM_5000_T); 911 vke_ifmedia_addfdx(sc, IFM_50G_PCIE); 912 vke_ifmedia_addfdx(sc, IFM_25G_PCIE); 913 vke_ifmedia_addfdx(sc, IFM_1000_SGMII); 914 vke_ifmedia_addfdx(sc, IFM_10G_SFI); 915 vke_ifmedia_addfdx(sc, IFM_40G_XLPPI); 916 vke_ifmedia_addfdx(sc, IFM_1000_CX_SGMII); 917 vke_ifmedia_addfdx(sc, IFM_40G_KR4); 918 vke_ifmedia_addfdx(sc, IFM_10G_ER); 919 vke_ifmedia_addfdx(sc, IFM_100G_CR4); 920 vke_ifmedia_addfdx(sc, IFM_100G_SR4); 921 vke_ifmedia_addfdx(sc, IFM_100G_KR4); 922 vke_ifmedia_addfdx(sc, IFM_100G_LR4); 923 vke_ifmedia_addfdx(sc, IFM_56G_R4); 924 vke_ifmedia_addfdx(sc, IFM_100_T); 925 vke_ifmedia_addfdx(sc, IFM_25G_CR); 926 vke_ifmedia_addfdx(sc, IFM_25G_KR); 927 vke_ifmedia_addfdx(sc, IFM_25G_SR); 928 vke_ifmedia_addfdx(sc, IFM_50G_CR2); 929 vke_ifmedia_addfdx(sc, IFM_50G_KR2); 930 vke_ifmedia_add(sc, IFM_AUTO); 931 932 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 933 934 ifp->if_link_state = LINK_STATE_UP; 935 936 ether_ifattach(ifp, enaddr, NULL); 937 938 if (bootverbose && sc->sc_addr != 0) { 939 if_printf(ifp, "pre-configured " 940 "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n", 941 ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize); 942 } 943 944 return 0; 945 } 946 947 static int 948 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask) 949 { 950 struct ifaliasreq ifra; 951 struct sockaddr_in *sin; 952 int ret; 953 954 ASSERT_SERIALIZED(ifp->if_serializer); 955 956 if (bootverbose) { 957 if_printf(ifp, "add pre-configured " 958 "address 0x%08x, netmask 0x%08x\n", 959 ntohl(addr), ntohl(mask)); 960 } 961 962 bzero(&ifra, sizeof(ifra)); 963 964 /* NB: no need to set ifaliasreq.ifra_name */ 965 966 sin = (struct sockaddr_in *)&ifra.ifra_addr; 967 sin->sin_family = AF_INET; 968 sin->sin_len = sizeof(*sin); 969 sin->sin_addr.s_addr = addr; 970 971 if (mask != 0) { 972 sin = (struct sockaddr_in *)&ifra.ifra_mask; 973 sin->sin_len = sizeof(*sin); 974 sin->sin_addr.s_addr = mask; 975 } 976 977 /* 978 * Temporarily release serializer, in_control() will hold 979 * it again before calling ifnet.if_ioctl(). 980 */ 981 ifnet_deserialize_all(ifp); 982 ret = in_control(SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL); 983 ifnet_serialize_all(ifp); 984 985 return ret; 986 } 987 988 static int vke_media_change(struct ifnet *ifp) 989 { 990 /* ignored */ 991 return(0); 992 } 993 994 static void vke_media_status(struct ifnet *ifp, struct ifmediareq *imr) 995 { 996 struct vke_softc *sc = (struct vke_softc *)ifp->if_softc; 997 998 imr->ifm_status = IFM_AVALID; 999 imr->ifm_status |= IFM_ACTIVE; 1000 1001 if(sc->sc_media.ifm_cur) { 1002 if(sc->sc_media.ifm_cur->ifm_media == IFM_ETHER) { 1003 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 1004 } else { 1005 imr->ifm_active = sc->sc_media.ifm_cur->ifm_media; 1006 } 1007 } else { 1008 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 1009 } 1010 } 1011