1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/endian.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/proc.h> 40 #include <sys/serialize.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/sysctl.h> 44 45 #include <machine/md_var.h> 46 #include <machine/cothread.h> 47 48 #include <net/ethernet.h> 49 #include <net/if.h> 50 #include <net/bpf.h> 51 #include <net/if_arp.h> 52 #include <net/if_media.h> 53 #include <net/ifq_var.h> 54 #include <net/vlan/if_vlan_ether.h> 55 56 #include <netinet/in_var.h> 57 58 #include <sys/stat.h> 59 #include <net/tap/if_tap.h> 60 #include <err.h> 61 #include <errno.h> 62 #include <stdio.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <fcntl.h> 66 67 #define VKE_DEVNAME "vke" 68 69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */ 70 71 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1)) 72 73 #define VKE_COTD_RUN 0 74 #define VKE_COTD_EXIT 1 75 #define VKE_COTD_DEAD 2 76 77 struct vke_fifo { 78 struct mbuf **array; 79 int rindex; 80 int windex; 81 }; 82 typedef struct vke_fifo *fifo_t; 83 84 /* Default value for a long time */ 85 #define VKE_DEFAULT_RINGSIZE 256 86 static int vke_max_ringsize = 0; 87 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize); 88 89 #define LOW_POW_2(n) (1 << (fls(n) - 1)) 90 91 struct vke_softc { 92 struct arpcom arpcom; 93 int sc_fd; 94 int sc_unit; 95 96 cothread_t cotd_tx; 97 cothread_t cotd_rx; 98 99 int cotd_tx_exit; 100 int cotd_rx_exit; 101 102 void *sc_txbuf; 103 int sc_txbuf_len; 104 105 fifo_t sc_txfifo; 106 fifo_t sc_txfifo_done; 107 fifo_t sc_rxfifo; 108 109 int sc_ringsize; 110 111 long cotd_ipackets; 112 long cotd_oerrors; 113 long cotd_opackets; 114 115 struct sysctl_ctx_list sc_sysctl_ctx; 116 struct sysctl_oid *sc_sysctl_tree; 117 118 int sc_tap_unit; /* unit of backend tap(4) */ 119 in_addr_t sc_addr; /* address */ 120 in_addr_t sc_mask; /* netmask */ 121 122 struct ifmedia sc_media; 123 }; 124 125 static void vke_start(struct ifnet *, struct ifaltq_subque *); 126 static void vke_init(void *); 127 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 128 129 static int vke_media_change(struct ifnet *); 130 static void vke_media_status(struct ifnet *, struct ifmediareq *); 131 132 static int vke_attach(const struct vknetif_info *, int); 133 static int vke_stop(struct vke_softc *); 134 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t); 135 static void vke_tx_intr(cothread_t cotd); 136 static void vke_tx_thread(cothread_t cotd); 137 static void vke_rx_intr(cothread_t cotd); 138 static void vke_rx_thread(cothread_t cotd); 139 140 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m); 141 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc); 142 143 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m); 144 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm); 145 146 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm); 147 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc); 148 149 static void 150 vke_sysinit(void *arg __unused) 151 { 152 int i, unit; 153 154 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum)); 155 156 unit = 0; 157 for (i = 0; i < NetifNum; ++i) { 158 if (vke_attach(&NetifInfo[i], unit) == 0) 159 ++unit; 160 } 161 } 162 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL); 163 164 /* 165 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since 166 * the cothread cannot free transmit mbufs after processing we put them on 167 * the done fifo so the kernel can free them. 168 */ 169 static int 170 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m) 171 { 172 fifo_t fifo = sc->sc_txfifo_done; 173 174 while (NETFIFOINDEX(fifo->windex + 1, sc) == NETFIFOINDEX(fifo->rindex, sc)) { 175 usleep(20000); 176 } 177 178 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 179 cpu_sfence(); 180 ++fifo->windex; 181 return (0); 182 } 183 184 /* 185 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo. 186 */ 187 static struct mbuf * 188 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm) 189 { 190 fifo_t fifo = sc->sc_txfifo_done; 191 struct mbuf *m; 192 193 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 194 return (NULL); 195 196 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 197 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm; 198 cpu_lfence(); 199 ++fifo->rindex; 200 return (m); 201 } 202 203 /* 204 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. 205 */ 206 static int 207 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m) 208 { 209 fifo_t fifo = sc->sc_txfifo; 210 211 if (NETFIFOINDEX(fifo->windex + 1, sc) == NETFIFOINDEX(fifo->rindex, sc)) 212 return (-1); 213 214 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 215 cpu_sfence(); 216 ++fifo->windex; 217 218 return (0); 219 } 220 221 /* 222 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one 223 * exists. 224 */ 225 static struct mbuf * 226 vke_txfifo_dequeue(struct vke_softc *sc) 227 { 228 fifo_t fifo = sc->sc_txfifo; 229 struct mbuf *m; 230 231 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 232 return (NULL); 233 234 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 235 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL; 236 237 cpu_lfence(); 238 ++fifo->rindex; 239 return (m); 240 } 241 242 static int 243 vke_txfifo_empty(struct vke_softc *sc) 244 { 245 fifo_t fifo = sc->sc_txfifo; 246 247 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 248 return (1); 249 return(0); 250 } 251 252 /* 253 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one 254 * exists replacing it with newm which should point to a newly allocated 255 * mbuf. 256 */ 257 static struct mbuf * 258 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm) 259 { 260 fifo_t fifo = sc->sc_rxfifo; 261 struct mbuf *m; 262 263 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 264 return (NULL); 265 266 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 267 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm; 268 cpu_lfence(); 269 ++fifo->rindex; 270 return (m); 271 } 272 273 /* 274 * Return the next mbuf if available but do NOT remove it from the FIFO. 275 */ 276 static struct mbuf * 277 vke_rxfifo_sniff(struct vke_softc *sc) 278 { 279 fifo_t fifo = sc->sc_rxfifo; 280 struct mbuf *m; 281 282 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 283 return (NULL); 284 285 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 286 cpu_lfence(); 287 return (m); 288 } 289 290 static void 291 vke_init(void *xsc) 292 { 293 struct vke_softc *sc = xsc; 294 struct ifnet *ifp = &sc->arpcom.ac_if; 295 size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *); 296 int i; 297 298 ASSERT_SERIALIZED(ifp->if_serializer); 299 300 vke_stop(sc); 301 302 ifp->if_flags |= IFF_RUNNING; 303 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 304 305 /* 306 * Allocate memory for FIFO structures and mbufs. 307 */ 308 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), 309 M_DEVBUF, M_WAITOK | M_ZERO); 310 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), 311 M_DEVBUF, M_WAITOK | M_ZERO); 312 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), 313 M_DEVBUF, M_WAITOK | M_ZERO); 314 sc->sc_txfifo->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 315 sc->sc_txfifo_done->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 316 sc->sc_rxfifo->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 317 318 for (i = 0; i < sc->sc_ringsize; i++) { 319 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR); 320 sc->sc_txfifo->array[i] = NULL; 321 sc->sc_txfifo_done->array[i] = NULL; 322 } 323 324 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN; 325 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx"); 326 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx"); 327 328 if (sc->sc_addr != 0) { 329 in_addr_t addr, mask; 330 331 addr = sc->sc_addr; 332 mask = sc->sc_mask; 333 334 /* 335 * Make sure vkernel assigned 336 * address will not be added 337 * again. 338 */ 339 sc->sc_addr = 0; 340 sc->sc_mask = 0; 341 342 vke_init_addr(ifp, addr, mask); 343 } 344 345 } 346 347 /* 348 * Called from kernel. 349 * 350 * NOTE: We can't make any kernel callbacks while holding cothread lock 351 * because the cothread lock is not governed by the kernel scheduler 352 * (so mplock, tokens, etc will not be released). 353 */ 354 static void 355 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 356 { 357 struct vke_softc *sc = ifp->if_softc; 358 struct mbuf *m; 359 cothread_t cotd = sc->cotd_tx; 360 int count; 361 362 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 363 ASSERT_SERIALIZED(ifp->if_serializer); 364 365 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 366 return; 367 368 count = 0; 369 while ((m = ifsq_dequeue(ifsq)) != NULL) { 370 if (vke_txfifo_enqueue(sc, m) != -1) { 371 ETHER_BPF_MTAP(ifp, m); 372 if (count++ == VKE_CHUNK) { 373 cothread_lock(cotd, 0); 374 cothread_signal(cotd); 375 cothread_unlock(cotd, 0); 376 count = 0; 377 } 378 } else { 379 m_freem(m); 380 } 381 } 382 if (count) { 383 cothread_lock(cotd, 0); 384 cothread_signal(cotd); 385 cothread_unlock(cotd, 0); 386 } 387 } 388 389 static int 390 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 391 { 392 struct vke_softc *sc = ifp->if_softc; 393 struct ifreq *ifr = (struct ifreq *)data; 394 int error = 0; 395 396 ASSERT_SERIALIZED(ifp->if_serializer); 397 398 switch (cmd) { 399 case SIOCSIFFLAGS: 400 if (ifp->if_flags & IFF_UP) { 401 if ((ifp->if_flags & IFF_RUNNING) == 0) 402 vke_init(sc); 403 } else { 404 if (ifp->if_flags & IFF_RUNNING) 405 vke_stop(sc); 406 } 407 break; 408 case SIOCGIFMEDIA: 409 case SIOCSIFMEDIA: 410 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 411 break; 412 case SIOCGIFSTATUS: { 413 struct ifstat *ifs = (struct ifstat *)data; 414 int len; 415 416 len = strlen(ifs->ascii); 417 if (len < sizeof(ifs->ascii)) { 418 if (sc->sc_tap_unit >= 0) 419 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len, 420 "\tBacked by tap%d\n", sc->sc_tap_unit); 421 } 422 break; 423 } 424 case SIOCSIFADDR: 425 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) { 426 /* 427 * If we are explicitly requested to change address, 428 * we should invalidate address/netmask passed in 429 * from vkernel command line. 430 */ 431 sc->sc_addr = 0; 432 sc->sc_mask = 0; 433 } 434 /* FALL THROUGH */ 435 default: 436 error = ether_ioctl(ifp, cmd, data); 437 break; 438 } 439 return error; 440 } 441 442 static int 443 vke_stop(struct vke_softc *sc) 444 { 445 struct ifnet *ifp = &sc->arpcom.ac_if; 446 int i; 447 448 ASSERT_SERIALIZED(ifp->if_serializer); 449 450 ifp->if_flags &= ~IFF_RUNNING; 451 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 452 453 if (sc) { 454 if (sc->cotd_tx) { 455 cothread_lock(sc->cotd_tx, 0); 456 if (sc->cotd_tx_exit == VKE_COTD_RUN) 457 sc->cotd_tx_exit = VKE_COTD_EXIT; 458 cothread_signal(sc->cotd_tx); 459 cothread_unlock(sc->cotd_tx, 0); 460 cothread_delete(&sc->cotd_tx); 461 } 462 if (sc->cotd_rx) { 463 cothread_lock(sc->cotd_rx, 0); 464 if (sc->cotd_rx_exit == VKE_COTD_RUN) 465 sc->cotd_rx_exit = VKE_COTD_EXIT; 466 cothread_signal(sc->cotd_rx); 467 cothread_unlock(sc->cotd_rx, 0); 468 cothread_delete(&sc->cotd_rx); 469 } 470 471 for (i = 0; i < sc->sc_ringsize; i++) { 472 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) { 473 m_freem(sc->sc_rxfifo->array[i]); 474 sc->sc_rxfifo->array[i] = NULL; 475 } 476 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) { 477 m_freem(sc->sc_txfifo->array[i]); 478 sc->sc_txfifo->array[i] = NULL; 479 } 480 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) { 481 m_freem(sc->sc_txfifo_done->array[i]); 482 sc->sc_txfifo_done->array[i] = NULL; 483 } 484 } 485 486 if (sc->sc_txfifo) { 487 if (sc->sc_txfifo->array) 488 kfree(sc->sc_txfifo->array, M_DEVBUF); 489 kfree(sc->sc_txfifo, M_DEVBUF); 490 sc->sc_txfifo = NULL; 491 } 492 493 if (sc->sc_txfifo_done) { 494 if (sc->sc_txfifo_done->array) 495 kfree(sc->sc_txfifo_done->array, M_DEVBUF); 496 kfree(sc->sc_txfifo_done, M_DEVBUF); 497 sc->sc_txfifo_done = NULL; 498 } 499 500 if (sc->sc_rxfifo) { 501 if (sc->sc_rxfifo->array) 502 kfree(sc->sc_rxfifo->array, M_DEVBUF); 503 kfree(sc->sc_rxfifo, M_DEVBUF); 504 sc->sc_rxfifo = NULL; 505 } 506 } 507 508 509 return 0; 510 } 511 512 /* 513 * vke_rx_intr() is the interrupt function for the receive cothread. 514 */ 515 static void 516 vke_rx_intr(cothread_t cotd) 517 { 518 struct mbuf *m; 519 struct mbuf *nm; 520 struct vke_softc *sc = cotd->arg; 521 struct ifnet *ifp = &sc->arpcom.ac_if; 522 static int count = 0; 523 524 ifnet_serialize_all(ifp); 525 cothread_lock(cotd, 0); 526 527 if (sc->cotd_rx_exit != VKE_COTD_RUN) { 528 cothread_unlock(cotd, 0); 529 ifnet_deserialize_all(ifp); 530 return; 531 } 532 if (sc->cotd_ipackets) { 533 IFNET_STAT_INC(ifp, ipackets, 1); 534 sc->cotd_ipackets = 0; 535 } 536 cothread_unlock(cotd, 0); 537 538 while ((m = vke_rxfifo_sniff(sc)) != NULL) { 539 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 540 if (nm) { 541 vke_rxfifo_dequeue(sc, nm); 542 ifp->if_input(ifp, m, NULL, -1); 543 if (count++ == VKE_CHUNK) { 544 cothread_lock(cotd, 0); 545 cothread_signal(cotd); 546 cothread_unlock(cotd, 0); 547 count = 0; 548 } 549 } else { 550 vke_rxfifo_dequeue(sc, m); 551 } 552 } 553 554 if (count) { 555 cothread_lock(cotd, 0); 556 cothread_signal(cotd); 557 cothread_unlock(cotd, 0); 558 } 559 ifnet_deserialize_all(ifp); 560 } 561 562 /* 563 * vke_tx_intr() is the interrupt function for the transmit cothread. 564 * Calls vke_start() to handle processing transmit mbufs. 565 */ 566 static void 567 vke_tx_intr(cothread_t cotd) 568 { 569 struct vke_softc *sc = cotd->arg; 570 struct ifnet *ifp = &sc->arpcom.ac_if; 571 struct mbuf *m; 572 573 ifnet_serialize_all(ifp); 574 cothread_lock(cotd, 0); 575 if (sc->cotd_tx_exit != VKE_COTD_RUN) { 576 cothread_unlock(cotd, 0); 577 ifnet_deserialize_all(ifp); 578 return; 579 } 580 if (sc->cotd_opackets) { 581 IFNET_STAT_INC(ifp, opackets, 1); 582 sc->cotd_opackets = 0; 583 } 584 if (sc->cotd_oerrors) { 585 IFNET_STAT_INC(ifp, oerrors, 1); 586 sc->cotd_oerrors = 0; 587 } 588 cothread_unlock(cotd, 0); 589 590 /* 591 * Free TX mbufs that have been processed before starting new 592 * ones going to be pipeline friendly. 593 */ 594 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) { 595 m_freem(m); 596 } 597 598 if ((ifp->if_flags & IFF_RUNNING) == 0) 599 if_devstart(ifp); 600 601 ifnet_deserialize_all(ifp); 602 } 603 604 /* 605 * vke_rx_thread() is the body of the receive cothread. 606 * 607 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 608 */ 609 static void 610 vke_rx_thread(cothread_t cotd) 611 { 612 struct mbuf *m; 613 struct vke_softc *sc = cotd->arg; 614 struct ifnet *ifp = &sc->arpcom.ac_if; 615 fifo_t fifo = sc->sc_rxfifo; 616 fd_set fdset; 617 struct timeval tv; 618 int count; 619 int n; 620 621 /* Select timeout cannot be infinite since we need to check for 622 * the exit flag sc->cotd_rx_exit. 623 */ 624 tv.tv_sec = 0; 625 tv.tv_usec = 500000; 626 627 FD_ZERO(&fdset); 628 count = 0; 629 630 while (sc->cotd_rx_exit == VKE_COTD_RUN) { 631 /* 632 * Wait for the RX FIFO to be loaded with 633 * empty mbufs. 634 */ 635 if (NETFIFOINDEX(fifo->windex + 1, sc) == 636 NETFIFOINDEX(fifo->rindex, sc)) { 637 usleep(20000); 638 continue; 639 } 640 641 /* 642 * Load data into the rx fifo 643 */ 644 m = fifo->array[NETFIFOINDEX(fifo->windex, sc)]; 645 if (m == NULL) 646 continue; 647 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); 648 if (n > 0) { 649 /* no mycpu in cothread */ 650 /*IFNET_STAT_INC(ifp, ipackets, 1);*/ 651 ++sc->cotd_ipackets; 652 m->m_pkthdr.rcvif = ifp; 653 m->m_pkthdr.len = m->m_len = n; 654 cpu_sfence(); 655 ++fifo->windex; 656 if (count++ == VKE_CHUNK) { 657 cothread_intr(cotd); 658 count = 0; 659 } 660 } else { 661 if (count) { 662 cothread_intr(cotd); 663 count = 0; 664 } 665 FD_SET(sc->sc_fd, &fdset); 666 667 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) { 668 fprintf(stderr, 669 VKE_DEVNAME "%d: select failed for " 670 "TAP device\n", sc->sc_unit); 671 usleep(1000000); 672 } 673 } 674 } 675 cpu_sfence(); 676 sc->cotd_rx_exit = VKE_COTD_DEAD; 677 } 678 679 /* 680 * vke_tx_thread() is the body of the transmit cothread. 681 * 682 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 683 */ 684 static void 685 vke_tx_thread(cothread_t cotd) 686 { 687 struct mbuf *m; 688 struct vke_softc *sc = cotd->arg; 689 /*struct ifnet *ifp = &sc->arpcom.ac_if;*/ 690 int count = 0; 691 692 while (sc->cotd_tx_exit == VKE_COTD_RUN) { 693 /* 694 * Write outgoing packets to the TAP interface 695 */ 696 m = vke_txfifo_dequeue(sc); 697 if (m) { 698 if (m->m_pkthdr.len <= MCLBYTES) { 699 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf); 700 sc->sc_txbuf_len = m->m_pkthdr.len; 701 702 if (write(sc->sc_fd, sc->sc_txbuf, 703 sc->sc_txbuf_len) < 0) { 704 /* no mycpu in cothread */ 705 /*IFNET_STAT_INC(ifp, oerrors, 1);*/ 706 ++sc->cotd_oerrors; 707 } else { 708 /* no mycpu in cothread */ 709 /*IFNET_STAT_INC(ifp, opackets, 1);*/ 710 ++sc->cotd_opackets; 711 } 712 } 713 if (count++ == VKE_CHUNK) { 714 cothread_intr(cotd); 715 count = 0; 716 } 717 vke_txfifo_done_enqueue(sc, m); 718 } else { 719 if (count) { 720 cothread_intr(cotd); 721 count = 0; 722 } 723 cothread_lock(cotd, 1); 724 if (vke_txfifo_empty(sc)) 725 cothread_wait(cotd); 726 cothread_unlock(cotd, 1); 727 } 728 } 729 cpu_sfence(); 730 sc->cotd_tx_exit = VKE_COTD_DEAD; 731 } 732 733 static int 734 vke_attach(const struct vknetif_info *info, int unit) 735 { 736 struct vke_softc *sc; 737 struct ifnet *ifp; 738 struct tapinfo tapinfo; 739 uint8_t enaddr[ETHER_ADDR_LEN]; 740 int nmbufs; 741 int fd; 742 743 KKASSERT(info->tap_fd >= 0); 744 fd = info->tap_fd; 745 746 if (info->enaddr) { 747 /* 748 * enaddr is supplied 749 */ 750 bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN); 751 } else { 752 /* 753 * This is only a TAP device if tap_unit is non-zero. If 754 * connecting to a virtual socket we generate a unique MAC. 755 * 756 * WARNING: enaddr[0] bit 0 is the multicast bit, when 757 * randomizing enaddr[] just leave the first 758 * two bytes 00 00 for now. 759 */ 760 bzero(enaddr, sizeof(enaddr)); 761 if (info->tap_unit >= 0) { 762 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) { 763 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) " 764 "failed: %s\n", unit, strerror(errno)); 765 return ENXIO; 766 } 767 768 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) { 769 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) " 770 "failed: %s\n", unit, strerror(errno)); 771 return ENXIO; 772 } 773 } else { 774 int fd = open("/dev/urandom", O_RDONLY); 775 if (fd >= 0) { 776 read(fd, enaddr + 2, 4); 777 close(fd); 778 } 779 enaddr[4] = (int)getpid() >> 8; 780 enaddr[5] = (int)getpid() & 255; 781 782 } 783 enaddr[1] += 1; 784 } 785 if (ETHER_IS_MULTICAST(enaddr)) { 786 kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit); 787 return ENXIO; 788 } 789 790 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 791 792 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK); 793 sc->sc_fd = fd; 794 sc->sc_unit = unit; 795 sc->sc_tap_unit = info->tap_unit; 796 sc->sc_addr = info->netif_addr; 797 sc->sc_mask = info->netif_mask; 798 799 if (vke_max_ringsize == 0) { 800 nmbufs = nmbclusters / (NetifNum * 2); 801 sc->sc_ringsize = LOW_POW_2(nmbufs); 802 if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE) 803 sc->sc_ringsize = VKE_DEFAULT_RINGSIZE; 804 } else if (vke_max_ringsize >= VKE_CHUNK) { /* Tunable specified */ 805 sc->sc_ringsize = LOW_POW_2(vke_max_ringsize); 806 } else { 807 sc->sc_ringsize = LOW_POW_2(VKE_CHUNK); 808 } 809 810 ifp = &sc->arpcom.ac_if; 811 if_initname(ifp, VKE_DEVNAME, sc->sc_unit); 812 813 /* NB: after if_initname() */ 814 sysctl_ctx_init(&sc->sc_sysctl_ctx); 815 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx, 816 SYSCTL_STATIC_CHILDREN(_hw), 817 OID_AUTO, ifp->if_xname, 818 CTLFLAG_RD, 0, ""); 819 if (sc->sc_sysctl_tree == NULL) { 820 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit); 821 } else { 822 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx, 823 SYSCTL_CHILDREN(sc->sc_sysctl_tree), 824 OID_AUTO, "tap_unit", 825 CTLFLAG_RD, &sc->sc_tap_unit, 0, 826 "Backend tap(4) unit"); 827 } 828 829 ifp->if_softc = sc; 830 ifp->if_ioctl = vke_ioctl; 831 ifp->if_start = vke_start; 832 ifp->if_init = vke_init; 833 ifp->if_mtu = tapinfo.mtu; 834 ifp->if_baudrate = tapinfo.baudrate; 835 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 836 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 837 ifq_set_ready(&ifp->if_snd); 838 839 ifmedia_init(&sc->sc_media, 0, vke_media_change, vke_media_status); 840 /* We support as many media types as we please for 841 debugging purposes */ 842 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T, 0, NULL); 843 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); 844 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_2, 0, NULL); 845 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_5, 0, NULL); 846 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX, 0, NULL); 847 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); 848 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_FX, 0, NULL); 849 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_T4, 0, NULL); 850 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_VG, 0, NULL); 851 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_100_T2, 0, NULL); 852 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_FX, 0, NULL); 853 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_STP, 0, NULL); 854 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10_FL, 0, NULL); 855 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_SX, 0, NULL); 856 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_LX, 0, NULL); 857 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_CX, 0, NULL); 858 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T, 0, NULL); 859 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 860 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_HPNA_1, 0, NULL); 861 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_LR, 0, NULL); 862 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_SR, 0, NULL); 863 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_CX4, 0, NULL); 864 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_2500_SX, 0, NULL); 865 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); 866 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); 867 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_LRM, 0, NULL); 868 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_10G_T, 0, NULL); 869 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_CR4, 0, NULL); 870 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_SR4, 0, NULL); 871 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_40G_LR4, 0, NULL); 872 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 873 874 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 875 876 ifp->if_link_state = LINK_STATE_UP; 877 878 ether_ifattach(ifp, enaddr, NULL); 879 880 if (bootverbose && sc->sc_addr != 0) { 881 if_printf(ifp, "pre-configured " 882 "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n", 883 ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize); 884 } 885 886 return 0; 887 } 888 889 static int 890 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask) 891 { 892 struct ifaliasreq ifra; 893 struct sockaddr_in *sin; 894 int ret; 895 896 ASSERT_SERIALIZED(ifp->if_serializer); 897 898 if (bootverbose) { 899 if_printf(ifp, "add pre-configured " 900 "address 0x%08x, netmask 0x%08x\n", 901 ntohl(addr), ntohl(mask)); 902 } 903 904 bzero(&ifra, sizeof(ifra)); 905 906 /* NB: no need to set ifaliasreq.ifra_name */ 907 908 sin = (struct sockaddr_in *)&ifra.ifra_addr; 909 sin->sin_family = AF_INET; 910 sin->sin_len = sizeof(*sin); 911 sin->sin_addr.s_addr = addr; 912 913 if (mask != 0) { 914 sin = (struct sockaddr_in *)&ifra.ifra_mask; 915 sin->sin_len = sizeof(*sin); 916 sin->sin_addr.s_addr = mask; 917 } 918 919 /* 920 * Temporarily release serializer, in_control() will hold 921 * it again before calling ifnet.if_ioctl(). 922 */ 923 ifnet_deserialize_all(ifp); 924 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL); 925 ifnet_serialize_all(ifp); 926 927 return ret; 928 } 929 930 static int vke_media_change(struct ifnet *ifp) 931 { 932 /* ignored */ 933 return(0); 934 } 935 936 static void vke_media_status(struct ifnet *ifp, struct ifmediareq *imr) 937 { 938 struct vke_softc *sc = (struct vke_softc *)ifp->if_softc; 939 940 imr->ifm_status = IFM_AVALID; 941 imr->ifm_status |= IFM_ACTIVE; 942 943 if(sc->sc_media.ifm_cur) { 944 if(sc->sc_media.ifm_cur->ifm_media == IFM_ETHER) { 945 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 946 } else { 947 imr->ifm_active = sc->sc_media.ifm_cur->ifm_media; 948 } 949 } else { 950 imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX; 951 } 952 } 953