1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/endian.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/proc.h> 40 #include <sys/serialize.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/sysctl.h> 44 45 #include <machine/md_var.h> 46 #include <machine/cothread.h> 47 48 #include <net/ethernet.h> 49 #include <net/if.h> 50 #include <net/bpf.h> 51 #include <net/if_arp.h> 52 #include <net/if_media.h> 53 #include <net/ifq_var.h> 54 #include <net/vlan/if_vlan_ether.h> 55 56 #include <netinet/in_var.h> 57 58 #include <sys/stat.h> 59 #include <net/tap/if_tap.h> 60 #include <err.h> 61 #include <errno.h> 62 #include <stdio.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <fcntl.h> 66 67 #define VKE_DEVNAME "vke" 68 69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */ 70 71 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1)) 72 73 #define VKE_COTD_RUN 0 74 #define VKE_COTD_EXIT 1 75 #define VKE_COTD_DEAD 2 76 77 struct vke_fifo { 78 struct mbuf **array; 79 int rindex; 80 int windex; 81 }; 82 typedef struct vke_fifo *fifo_t; 83 84 /* Default value for a long time */ 85 #define VKE_DEFAULT_RINGSIZE 256 86 static int vke_max_ringsize = 0; 87 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize); 88 89 #define LOW_POW_2(n) (1 << (fls(n) - 1)) 90 91 struct vke_softc { 92 struct arpcom arpcom; 93 int sc_fd; 94 int sc_unit; 95 96 cothread_t cotd_tx; 97 cothread_t cotd_rx; 98 99 int cotd_tx_exit; 100 int cotd_rx_exit; 101 102 void *sc_txbuf; 103 int sc_txbuf_len; 104 105 fifo_t sc_txfifo; 106 fifo_t sc_txfifo_done; 107 fifo_t sc_rxfifo; 108 109 int sc_ringsize; 110 111 long cotd_ipackets; 112 long cotd_oerrors; 113 long cotd_opackets; 114 115 struct sysctl_ctx_list sc_sysctl_ctx; 116 struct sysctl_oid *sc_sysctl_tree; 117 118 int sc_tap_unit; /* unit of backend tap(4) */ 119 in_addr_t sc_addr; /* address */ 120 in_addr_t sc_mask; /* netmask */ 121 122 struct ifmedia sc_media; 123 }; 124 125 static void vke_start(struct ifnet *, struct ifaltq_subque *); 126 static void vke_init(void *); 127 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 128 129 static int vke_media_change(struct ifnet *); 130 static void vke_media_status(struct ifnet *, struct ifmediareq *); 131 132 static int vke_attach(const struct vknetif_info *, int); 133 static int vke_stop(struct vke_softc *); 134 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t); 135 static void vke_tx_intr(cothread_t cotd); 136 static void vke_tx_thread(cothread_t cotd); 137 static void vke_rx_intr(cothread_t cotd); 138 static void vke_rx_thread(cothread_t cotd); 139 140 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m); 141 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc); 142 143 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m); 144 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm); 145 146 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm); 147 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc); 148 149 static void 150 vke_sysinit(void *arg __unused) 151 { 152 int i, unit; 153 154 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum)); 155 156 unit = 0; 157 for (i = 0; i < NetifNum; ++i) { 158 if (vke_attach(&NetifInfo[i], unit) == 0) 159 ++unit; 160 } 161 } 162 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL); 163 164 /* 165 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since 166 * the cothread cannot free transmit mbufs after processing we put them on 167 * the done fifo so the kernel can free them. 168 */ 169 static int 170 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m) 171 { 172 fifo_t fifo = sc->sc_txfifo_done; 173 174 while (NETFIFOINDEX(fifo->windex + 1, sc) == 175 NETFIFOINDEX(fifo->rindex, sc)) { 176 usleep(20000); 177 } 178 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 179 cpu_sfence(); 180 ++fifo->windex; 181 182 return (0); 183 } 184 185 /* 186 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo. 187 */ 188 static struct mbuf * 189 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm) 190 { 191 fifo_t fifo = sc->sc_txfifo_done; 192 struct mbuf *m; 193 194 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 195 return (NULL); 196 197 cpu_lfence(); 198 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 199 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm; 200 ++fifo->rindex; 201 202 return (m); 203 } 204 205 /* 206 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. 207 */ 208 static int 209 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m) 210 { 211 fifo_t fifo = sc->sc_txfifo; 212 213 if (NETFIFOINDEX(fifo->windex + 1, sc) == 214 NETFIFOINDEX(fifo->rindex, sc)) { 215 return (-1); 216 } 217 218 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 219 cpu_sfence(); 220 ++fifo->windex; 221 222 return (0); 223 } 224 225 /* 226 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one 227 * exists. 228 */ 229 static struct mbuf * 230 vke_txfifo_dequeue(struct vke_softc *sc) 231 { 232 fifo_t fifo = sc->sc_txfifo; 233 struct mbuf *m; 234 235 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 236 return (NULL); 237 238 cpu_lfence(); 239 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 240 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL; 241 cpu_sfence(); 242 ++fifo->rindex; 243 244 return (m); 245 } 246 247 static int 248 vke_txfifo_empty(struct vke_softc *sc) 249 { 250 fifo_t fifo = sc->sc_txfifo; 251 252 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 253 return (1); 254 return(0); 255 } 256 257 /* 258 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one 259 * exists replacing it with newm which should point to a newly allocated 260 * mbuf. 261 */ 262 static struct mbuf * 263 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm) 264 { 265 fifo_t fifo = sc->sc_rxfifo; 266 struct mbuf *m; 267 268 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 269 return (NULL); 270 271 cpu_lfence(); 272 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 273 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm; 274 cpu_sfence(); 275 ++fifo->rindex; 276 277 return (m); 278 } 279 280 /* 281 * Return the next mbuf if available but do NOT remove it from the FIFO. 282 */ 283 static struct mbuf * 284 vke_rxfifo_sniff(struct vke_softc *sc) 285 { 286 fifo_t fifo = sc->sc_rxfifo; 287 struct mbuf *m; 288 289 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 290 return (NULL); 291 292 cpu_lfence(); 293 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 294 295 return (m); 296 } 297 298 static void 299 vke_init(void *xsc) 300 { 301 struct vke_softc *sc = xsc; 302 struct ifnet *ifp = &sc->arpcom.ac_if; 303 size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *); 304 int i; 305 306 ASSERT_SERIALIZED(ifp->if_serializer); 307 308 vke_stop(sc); 309 310 ifp->if_flags |= IFF_RUNNING; 311 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 312 313 /* 314 * Allocate memory for FIFO structures and mbufs. 315 */ 316 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), 317 M_DEVBUF, M_WAITOK | M_ZERO); 318 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), 319 M_DEVBUF, M_WAITOK | M_ZERO); 320 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), 321 M_DEVBUF, M_WAITOK | M_ZERO); 322 sc->sc_txfifo->array = kmalloc(ringsize, 323 M_DEVBUF, M_WAITOK | M_ZERO); 324 sc->sc_txfifo_done->array = kmalloc(ringsize, 325 M_DEVBUF, M_WAITOK | M_ZERO); 326 sc->sc_rxfifo->array = kmalloc(ringsize, 327 M_DEVBUF, M_WAITOK | M_ZERO); 328 329 for (i = 0; i < sc->sc_ringsize; i++) { 330 sc->sc_rxfifo->array[i] = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); 331 sc->sc_txfifo->array[i] = NULL; 332 sc->sc_txfifo_done->array[i] = NULL; 333 } 334 335 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN; 336 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx"); 337 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx"); 338 339 if (sc->sc_addr != 0) { 340 in_addr_t addr, mask; 341 342 addr = sc->sc_addr; 343 mask = sc->sc_mask; 344 345 /* 346 * Make sure vkernel assigned 347 * address will not be added 348 * again. 349 */ 350 sc->sc_addr = 0; 351 sc->sc_mask = 0; 352 353 vke_init_addr(ifp, addr, mask); 354 } 355 356 } 357 358 /* 359 * Called from kernel. 360 * 361 * NOTE: We can't make any kernel callbacks while holding cothread lock 362 * because the cothread lock is not governed by the kernel scheduler 363 * (so mplock, tokens, etc will not be released). 364 */ 365 static void 366 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 367 { 368 struct vke_softc *sc = ifp->if_softc; 369 struct mbuf *m; 370 cothread_t cotd = sc->cotd_tx; 371 int count; 372 373 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 374 ASSERT_SERIALIZED(ifp->if_serializer); 375 376 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 377 return; 378 379 count = 0; 380 while ((m = ifsq_dequeue(ifsq)) != NULL) { 381 if (vke_txfifo_enqueue(sc, m) != -1) { 382 ETHER_BPF_MTAP(ifp, m); 383 if (count++ == VKE_CHUNK) { 384 cothread_lock(cotd, 0); 385 cothread_signal(cotd); 386 cothread_unlock(cotd, 0); 387 count = 0; 388 } 389 } else { 390 m_freem(m); 391 } 392 } 393 if (count) { 394 cothread_lock(cotd, 0); 395 cothread_signal(cotd); 396 cothread_unlock(cotd, 0); 397 } 398 } 399 400 static int 401 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 402 { 403 struct vke_softc *sc = ifp->if_softc; 404 struct ifreq *ifr = (struct ifreq *)data; 405 int error = 0; 406 407 ASSERT_SERIALIZED(ifp->if_serializer); 408 409 switch (cmd) { 410 case SIOCSIFFLAGS: 411 if (ifp->if_flags & IFF_UP) { 412 if ((ifp->if_flags & IFF_RUNNING) == 0) 413 vke_init(sc); 414 } else { 415 if (ifp->if_flags & IFF_RUNNING) 416 vke_stop(sc); 417 } 418 break; 419 case SIOCGIFMEDIA: 420 case SIOCSIFMEDIA: 421 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 422 break; 423 case SIOCGIFSTATUS: { 424 struct ifstat *ifs = (struct ifstat *)data; 425 int len; 426 427 len = strlen(ifs->ascii); 428 if (len < sizeof(ifs->ascii)) { 429 if (sc->sc_tap_unit >= 0) { 430 ksnprintf(ifs->ascii + len, 431 sizeof(ifs->ascii) - len, 432 "\tBacked by tap%d\n", 433 sc->sc_tap_unit); 434 } 435 } 436 break; 437 } 438 case SIOCSIFADDR: 439 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) { 440 /* 441 * If we are explicitly requested to change address, 442 * we should invalidate address/netmask passed in 443 * from vkernel command line. 444 */ 445 sc->sc_addr = 0; 446 sc->sc_mask = 0; 447 } 448 /* FALL THROUGH */ 449 default: 450 error = ether_ioctl(ifp, cmd, data); 451 break; 452 } 453 return error; 454 } 455 456 static int 457 vke_stop(struct vke_softc *sc) 458 { 459 struct ifnet *ifp = &sc->arpcom.ac_if; 460 int i; 461 462 ASSERT_SERIALIZED(ifp->if_serializer); 463 464 ifp->if_flags &= ~IFF_RUNNING; 465 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 466 467 if (sc) { 468 if (sc->cotd_tx) { 469 cothread_lock(sc->cotd_tx, 0); 470 if (sc->cotd_tx_exit == VKE_COTD_RUN) 471 sc->cotd_tx_exit = VKE_COTD_EXIT; 472 cothread_signal(sc->cotd_tx); 473 cothread_unlock(sc->cotd_tx, 0); 474 cothread_delete(&sc->cotd_tx); 475 } 476 if (sc->cotd_rx) { 477 cothread_lock(sc->cotd_rx, 0); 478 if (sc->cotd_rx_exit == VKE_COTD_RUN) 479 sc->cotd_rx_exit = VKE_COTD_EXIT; 480 cothread_signal(sc->cotd_rx); 481 cothread_unlock(sc->cotd_rx, 0); 482 cothread_delete(&sc->cotd_rx); 483 } 484 485 for (i = 0; i < sc->sc_ringsize; i++) { 486 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) { 487 m_freem(sc->sc_rxfifo->array[i]); 488 sc->sc_rxfifo->array[i] = NULL; 489 } 490 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) { 491 m_freem(sc->sc_txfifo->array[i]); 492 sc->sc_txfifo->array[i] = NULL; 493 } 494 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) { 495 m_freem(sc->sc_txfifo_done->array[i]); 496 sc->sc_txfifo_done->array[i] = NULL; 497 } 498 } 499 500 if (sc->sc_txfifo) { 501 if (sc->sc_txfifo->array) 502 kfree(sc->sc_txfifo->array, M_DEVBUF); 503 kfree(sc->sc_txfifo, M_DEVBUF); 504 sc->sc_txfifo = NULL; 505 } 506 507 if (sc->sc_txfifo_done) { 508 if (sc->sc_txfifo_done->array) 509 kfree(sc->sc_txfifo_done->array, M_DEVBUF); 510 kfree(sc->sc_txfifo_done, M_DEVBUF); 511 sc->sc_txfifo_done = NULL; 512 } 513 514 if (sc->sc_rxfifo) { 515 if (sc->sc_rxfifo->array) 516 kfree(sc->sc_rxfifo->array, M_DEVBUF); 517 kfree(sc->sc_rxfifo, M_DEVBUF); 518 sc->sc_rxfifo = NULL; 519 } 520 } 521 522 523 return 0; 524 } 525 526 /* 527 * vke_rx_intr() is the interrupt function for the receive cothread. 528 */ 529 static void 530 vke_rx_intr(cothread_t cotd) 531 { 532 struct mbuf *m; 533 struct mbuf *nm; 534 struct vke_softc *sc = cotd->arg; 535 struct ifnet *ifp = &sc->arpcom.ac_if; 536 static int count = 0; 537 538 ifnet_serialize_all(ifp); 539 cothread_lock(cotd, 0); 540 541 if (sc->cotd_rx_exit != VKE_COTD_RUN) { 542 cothread_unlock(cotd, 0); 543 ifnet_deserialize_all(ifp); 544 return; 545 } 546 if (sc->cotd_ipackets) { 547 IFNET_STAT_INC(ifp, ipackets, 1); 548 sc->cotd_ipackets = 0; 549 } 550 cothread_unlock(cotd, 0); 551 552 while ((m = vke_rxfifo_sniff(sc)) != NULL) { 553 nm = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 554 if (nm) { 555 vke_rxfifo_dequeue(sc, nm); 556 ifp->if_input(ifp, m, NULL, -1); 557 if (count++ == VKE_CHUNK) { 558 cothread_lock(cotd, 0); 559 cothread_signal(cotd); 560 cothread_unlock(cotd, 0); 561 count = 0; 562 } 563 } else { 564 vke_rxfifo_dequeue(sc, m); 565 } 566 } 567 568 if (count) { 569 cothread_lock(cotd, 0); 570 cothread_signal(cotd); 571 cothread_unlock(cotd, 0); 572 } 573 ifnet_deserialize_all(ifp); 574 } 575 576 /* 577 * vke_tx_intr() is the interrupt function for the transmit cothread. 578 * Calls vke_start() to handle processing transmit mbufs. 579 */ 580 static void 581 vke_tx_intr(cothread_t cotd) 582 { 583 struct vke_softc *sc = cotd->arg; 584 struct ifnet *ifp = &sc->arpcom.ac_if; 585 struct mbuf *m; 586 587 ifnet_serialize_all(ifp); 588 cothread_lock(cotd, 0); 589 if (sc->cotd_tx_exit != VKE_COTD_RUN) { 590 cothread_unlock(cotd, 0); 591 ifnet_deserialize_all(ifp); 592 return; 593 } 594 if (sc->cotd_opackets) { 595 IFNET_STAT_INC(ifp, opackets, 1); 596 sc->cotd_opackets = 0; 597 } 598 if (sc->cotd_oerrors) { 599 IFNET_STAT_INC(ifp, oerrors, 1); 600 sc->cotd_oerrors = 0; 601 } 602 cothread_unlock(cotd, 0); 603 604 /* 605 * Free TX mbufs that have been processed before starting new 606 * ones going to be pipeline friendly. 607 */ 608 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) { 609 m_freem(m); 610 } 611 612 if ((ifp->if_flags & IFF_RUNNING) == 0) 613 if_devstart(ifp); 614 615 ifnet_deserialize_all(ifp); 616 } 617 618 /* 619 * vke_rx_thread() is the body of the receive cothread. 620 * 621 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 622 */ 623 static void 624 vke_rx_thread(cothread_t cotd) 625 { 626 struct mbuf *m; 627 struct vke_softc *sc = cotd->arg; 628 struct ifnet *ifp = &sc->arpcom.ac_if; 629 fifo_t fifo = sc->sc_rxfifo; 630 fd_set fdset; 631 struct timeval tv; 632 int count; 633 int n; 634 int r; 635 636 /* Select timeout cannot be infinite since we need to check for 637 * the exit flag sc->cotd_rx_exit. 638 */ 639 tv.tv_sec = 0; 640 tv.tv_usec = 500000; 641 642 FD_ZERO(&fdset); 643 count = 0; 644 645 while (sc->cotd_rx_exit == VKE_COTD_RUN) { 646 /* 647 * Wait for the RX FIFO to be loaded with 648 * empty mbufs. 649 */ 650 if (NETFIFOINDEX(fifo->windex + 1, sc) == 651 NETFIFOINDEX(fifo->rindex, sc)) { 652 usleep(20000); 653 continue; 654 } 655 656 /* 657 * Load data into the rx fifo 658 */ 659 cpu_lfence(); 660 m = fifo->array[NETFIFOINDEX(fifo->windex, sc)]; 661 if (m == NULL) { 662 fprintf(stderr, 663 VKE_DEVNAME "%d: NULL rxring mbuf\n", 664 sc->sc_unit); 665 *(volatile int *)0 = 1; 666 } 667 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); 668 if (n > 0) { 669 /* no mycpu in cothread */ 670 /*IFNET_STAT_INC(ifp, ipackets, 1);*/ 671 ++sc->cotd_ipackets; 672 m->m_pkthdr.rcvif = ifp; 673 m->m_pkthdr.len = m->m_len = n; 674 cpu_sfence(); 675 ++fifo->windex; 676 if (count++ == VKE_CHUNK) { 677 cothread_intr(cotd); 678 count = 0; 679 } 680 } else { 681 if (count) { 682 cothread_intr(cotd); 683 count = 0; 684 } 685 FD_SET(sc->sc_fd, &fdset); 686 r = select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv); 687 if (r == -1) { 688 fprintf(stderr, 689 VKE_DEVNAME "%d: select failed for " 690 "TAP device\n", sc->sc_unit); 691 usleep(1000000); 692 } 693 } 694 } 695 cpu_sfence(); 696 sc->cotd_rx_exit = VKE_COTD_DEAD; 697 } 698 699 /* 700 * vke_tx_thread() is the body of the transmit cothread. 701 * 702 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 703 */ 704 static void 705 vke_tx_thread(cothread_t cotd) 706 { 707 struct mbuf *m; 708 struct vke_softc *sc = cotd->arg; 709 /*struct ifnet *ifp = &sc->arpcom.ac_if;*/ 710 int count = 0; 711 712 while (sc->cotd_tx_exit == VKE_COTD_RUN) { 713 /* 714 * Write outgoing packets to the TAP interface 715 */ 716 m = vke_txfifo_dequeue(sc); 717 if (m) { 718 if (m->m_pkthdr.len <= MCLBYTES) { 719 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf); 720 sc->sc_txbuf_len = m->m_pkthdr.len; 721 722 if (write(sc->sc_fd, sc->sc_txbuf, 723 sc->sc_txbuf_len) < 0) { 724 /* no mycpu in cothread */ 725 /*IFNET_STAT_INC(ifp, oerrors, 1);*/ 726 ++sc->cotd_oerrors; 727 } else { 728 /* no mycpu in cothread */ 729 /*IFNET_STAT_INC(ifp, opackets, 1);*/ 730 ++sc->cotd_opackets; 731 } 732 } 733 if (count++ == VKE_CHUNK) { 734 cothread_intr(cotd); 735 count = 0; 736 } 737 vke_txfifo_done_enqueue(sc, m); 738 } else { 739 if (count) { 740 cothread_intr(cotd); 741 count = 0; 742 } 743 cothread_lock(cotd, 1); 744 if (vke_txfifo_empty(sc)) 745 cothread_wait(cotd); 746 cothread_unlock(cotd, 1); 747 } 748 } 749 cpu_sfence(); 750 sc->cotd_tx_exit = VKE_COTD_DEAD; 751 } 752 753 static int 754 vke_attach(const struct vknetif_info *info, int unit) 755 { 756 struct vke_softc *sc; 757 struct ifnet *ifp; 758 struct tapinfo tapinfo; 759 uint8_t enaddr[ETHER_ADDR_LEN]; 760 int nmbufs; 761 int fd; 762 763 KKASSERT(info->tap_fd >= 0); 764 fd = info->tap_fd; 765 766 if (info->enaddr) { 767 /* 768 * enaddr is supplied 769 */ 770 bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN); 771 } else { 772 /* 773 * This is only a TAP device if tap_unit is non-zero. If 774 * connecting to a virtual socket we generate a unique MAC. 775 * 776 * WARNING: enaddr[0] bit 0 is the multicast bit, when 777 * randomizing enaddr[] just leave the first 778 * two bytes 00 00 for now. 779 */ 780 bzero(enaddr, sizeof(enaddr)); 781 if (info->tap_unit >= 0) { 782 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) { 783 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) " 784 "failed: %s\n", unit, strerror(errno)); 785 return ENXIO; 786 } 787 788 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) { 789 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) " 790 "failed: %s\n", unit, strerror(errno)); 791 return ENXIO; 792 } 793 } else { 794 int fd = open("/dev/urandom", O_RDONLY); 795 if (fd >= 0) { 796 read(fd, enaddr + 2, 4); 797 close(fd); 798 } 799 enaddr[4] = (int)getpid() >> 8; 800 enaddr[5] = (int)getpid() & 255; 801 802 } 803 enaddr[1] += 1; 804 } 805 if (ETHER_IS_MULTICAST(enaddr)) { 806 kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit); 807 return ENXIO; 808 } 809 810 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 811 812 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK); 813 sc->sc_fd = fd; 814 sc->sc_unit = unit; 815 sc->sc_tap_unit = info->tap_unit; 816 sc->sc_addr = info->netif_addr; 817 sc->sc_mask = info->netif_mask; 818 819 if (vke_max_ringsize == 0) { 820 nmbufs = nmbclusters / (NetifNum * 2); 821 sc->sc_ringsize = LOW_POW_2(nmbufs); 822 if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE) 823 sc->sc_ringsize = VKE_DEFAULT_RINGSIZE; 824 } else if (vke_max_ringsize >= VKE_CHUNK) { /* Tunable specified */ 825 sc->sc_ringsize = LOW_POW_2(vke_max_ringsize); 826 } else { 827 sc->sc_ringsize = LOW_POW_2(VKE_CHUNK); 828 } 829 830 ifp = &sc->arpcom.ac_if; 831 if_initname(ifp, VKE_DEVNAME, sc->sc_unit); 832 833 /* NB: after if_initname() */ 834 sysctl_ctx_init(&sc->sc_sysctl_ctx); 835 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx, 836 SYSCTL_STATIC_CHILDREN(_hw), 837 OID_AUTO, ifp->if_xname, 838 CTLFLAG_RD, 0, ""); 839 if (sc->sc_sysctl_tree == NULL) { 840 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit); 841 } else { 842 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx, 843 SYSCTL_CHILDREN(sc->sc_sysctl_tree), 844 OID_AUTO, "tap_unit", 845 CTLFLAG_RD, &sc->sc_tap_unit, 0, 846 "Backend tap(4) unit"); 847 } 848 849 ifp->if_softc = sc; 850 ifp->if_ioctl = vke_ioctl; 851 ifp->if_start = vke_start; 852 ifp->if_init = vke_init; 853 ifp->if_mtu = tapinfo.mtu; 854 ifp->if_baudrate = tapinfo.baudrate; 855 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 856 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 857 ifq_set_ready(&ifp->if_snd); 858 859 ifmedia_init(&sc->sc_media, 0, vke_media_change, vke_media_status); 860 /* We support as many media types as we please for 861 debugging purposes */ 862 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T, 0, NULL); 863 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); 864 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_2, 0, NULL); 865 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_5, 0, NULL); 866 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX, 0, NULL); 867 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); 868 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_FX, 0, NULL); 869 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_T4, 0, NULL); 870 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_VG, 0, NULL); 871 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_T2, 0, NULL); 872 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_FX, 0, NULL); 873 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_STP, 0, NULL); 874 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_FL, 0, NULL); 875 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_SX, 0, NULL); 876 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_LX, 0, NULL); 877 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_CX, 0, NULL); 878 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T, 0, NULL); 879 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 880 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_HPNA_1, 0, NULL); 881 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_LR, 0, NULL); 882 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_SR, 0, NULL); 883 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_CX4, 0, NULL); 884 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_2500_SX, 0, NULL); 885 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); 886 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); 887 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_LRM, 0, NULL); 888 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL); 889 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_CR4, 0, NULL); 890 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_SR4, 0, NULL); 891 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_LR4, 0, NULL); 892 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 893 894 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 895 896 ifp->if_link_state = LINK_STATE_UP; 897 898 ether_ifattach(ifp, enaddr, NULL); 899 900 if (bootverbose && sc->sc_addr != 0) { 901 if_printf(ifp, "pre-configured " 902 "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n", 903 ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize); 904 } 905 906 return 0; 907 } 908 909 static int 910 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask) 911 { 912 struct ifaliasreq ifra; 913 struct sockaddr_in *sin; 914 int ret; 915 916 ASSERT_SERIALIZED(ifp->if_serializer); 917 918 if (bootverbose) { 919 if_printf(ifp, "add pre-configured " 920 "address 0x%08x, netmask 0x%08x\n", 921 ntohl(addr), ntohl(mask)); 922 } 923 924 bzero(&ifra, sizeof(ifra)); 925 926 /* NB: no need to set ifaliasreq.ifra_name */ 927 928 sin = (struct sockaddr_in *)&ifra.ifra_addr; 929 sin->sin_family = AF_INET; 930 sin->sin_len = sizeof(*sin); 931 sin->sin_addr.s_addr = addr; 932 933 if (mask != 0) { 934 sin = (struct sockaddr_in *)&ifra.ifra_mask; 935 sin->sin_len = sizeof(*sin); 936 sin->sin_addr.s_addr = mask; 937 } 938 939 /* 940 * Temporarily release serializer, in_control() will hold 941 * it again before calling ifnet.if_ioctl(). 942 */ 943 ifnet_deserialize_all(ifp); 944 ret = in_control(SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL); 945 ifnet_serialize_all(ifp); 946 947 return ret; 948 } 949 950 static int vke_media_change(struct ifnet *ifp) 951 { 952 /* ignored */ 953 return(0); 954 } 955 956 static void vke_media_status(struct ifnet *ifp, struct ifmediareq *imr) 957 { 958 struct vke_softc *sc = (struct vke_softc *)ifp->if_softc; 959 960 imr->ifm_status = IFM_AVALID; 961 imr->ifm_status |= IFM_ACTIVE; 962 963 if(sc->sc_media.ifm_cur) { 964 if(sc->sc_media.ifm_cur->ifm_media == IFM_ETHER) { 965 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 966 } else { 967 imr->ifm_active = sc->sc_media.ifm_cur->ifm_media; 968 } 969 } else { 970 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 971 } 972 } 973