1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/endian.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/proc.h> 40 #include <sys/serialize.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/sysctl.h> 44 45 #include <machine/md_var.h> 46 #include <machine/cothread.h> 47 48 #include <net/ethernet.h> 49 #include <net/if.h> 50 #include <net/bpf.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 54 #include <netinet/in_var.h> 55 56 #include <sys/stat.h> 57 #include <net/tap/if_tap.h> 58 #include <err.h> 59 #include <errno.h> 60 #include <stdio.h> 61 #include <string.h> 62 #include <unistd.h> 63 #include <fcntl.h> 64 65 #define VKE_DEVNAME "vke" 66 67 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */ 68 69 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1)) 70 71 #define VKE_COTD_RUN 0 72 #define VKE_COTD_EXIT 1 73 #define VKE_COTD_DEAD 2 74 75 struct vke_fifo { 76 struct mbuf **array; 77 int rindex; 78 int windex; 79 }; 80 typedef struct vke_fifo *fifo_t; 81 82 /* Default value for a long time */ 83 #define VKE_DEFAULT_RINGSIZE 256 84 static int vke_max_ringsize = 0; 85 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize); 86 87 #define LOW_POW_2(n) (1 << (fls(n) - 1)) 88 89 struct vke_softc { 90 struct arpcom arpcom; 91 int sc_fd; 92 int sc_unit; 93 94 cothread_t cotd_tx; 95 cothread_t cotd_rx; 96 97 int cotd_tx_exit; 98 int cotd_rx_exit; 99 100 void *sc_txbuf; 101 int sc_txbuf_len; 102 103 fifo_t sc_txfifo; 104 fifo_t sc_txfifo_done; 105 fifo_t sc_rxfifo; 106 107 int sc_ringsize; 108 109 long cotd_ipackets; 110 long cotd_oerrors; 111 long cotd_opackets; 112 113 struct sysctl_ctx_list sc_sysctl_ctx; 114 struct sysctl_oid *sc_sysctl_tree; 115 116 int sc_tap_unit; /* unit of backend tap(4) */ 117 in_addr_t sc_addr; /* address */ 118 in_addr_t sc_mask; /* netmask */ 119 }; 120 121 static void vke_start(struct ifnet *, struct ifaltq_subque *); 122 static void vke_init(void *); 123 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 124 125 static int vke_attach(const struct vknetif_info *, int); 126 static int vke_stop(struct vke_softc *); 127 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t); 128 static void vke_tx_intr(cothread_t cotd); 129 static void vke_tx_thread(cothread_t cotd); 130 static void vke_rx_intr(cothread_t cotd); 131 static void vke_rx_thread(cothread_t cotd); 132 133 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m); 134 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc); 135 136 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m); 137 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm); 138 139 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm); 140 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc); 141 142 static void 143 vke_sysinit(void *arg __unused) 144 { 145 int i, unit; 146 147 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum)); 148 149 unit = 0; 150 for (i = 0; i < NetifNum; ++i) { 151 if (vke_attach(&NetifInfo[i], unit) == 0) 152 ++unit; 153 } 154 } 155 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL); 156 157 /* 158 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since 159 * the cothread cannot free transmit mbufs after processing we put them on 160 * the done fifo so the kernel can free them. 161 */ 162 static int 163 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m) 164 { 165 fifo_t fifo = sc->sc_txfifo_done; 166 167 while (NETFIFOINDEX(fifo->windex + 1, sc) == NETFIFOINDEX(fifo->rindex, sc)) { 168 usleep(20000); 169 } 170 171 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 172 cpu_sfence(); 173 ++fifo->windex; 174 return (0); 175 } 176 177 /* 178 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo. 179 */ 180 static struct mbuf * 181 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm) 182 { 183 fifo_t fifo = sc->sc_txfifo_done; 184 struct mbuf *m; 185 186 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 187 return (NULL); 188 189 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 190 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm; 191 cpu_lfence(); 192 ++fifo->rindex; 193 return (m); 194 } 195 196 /* 197 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. 198 */ 199 static int 200 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m) 201 { 202 fifo_t fifo = sc->sc_txfifo; 203 204 if (NETFIFOINDEX(fifo->windex + 1, sc) == NETFIFOINDEX(fifo->rindex, sc)) 205 return (-1); 206 207 fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m; 208 cpu_sfence(); 209 ++fifo->windex; 210 211 return (0); 212 } 213 214 /* 215 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one 216 * exists. 217 */ 218 static struct mbuf * 219 vke_txfifo_dequeue(struct vke_softc *sc) 220 { 221 fifo_t fifo = sc->sc_txfifo; 222 struct mbuf *m; 223 224 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 225 return (NULL); 226 227 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 228 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL; 229 230 cpu_lfence(); 231 ++fifo->rindex; 232 return (m); 233 } 234 235 static int 236 vke_txfifo_empty(struct vke_softc *sc) 237 { 238 fifo_t fifo = sc->sc_txfifo; 239 240 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 241 return (1); 242 return(0); 243 } 244 245 /* 246 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one 247 * exists replacing it with newm which should point to a newly allocated 248 * mbuf. 249 */ 250 static struct mbuf * 251 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm) 252 { 253 fifo_t fifo = sc->sc_rxfifo; 254 struct mbuf *m; 255 256 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 257 return (NULL); 258 259 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 260 fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm; 261 cpu_lfence(); 262 ++fifo->rindex; 263 return (m); 264 } 265 266 /* 267 * Return the next mbuf if available but do NOT remove it from the FIFO. 268 */ 269 static struct mbuf * 270 vke_rxfifo_sniff(struct vke_softc *sc) 271 { 272 fifo_t fifo = sc->sc_rxfifo; 273 struct mbuf *m; 274 275 if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc)) 276 return (NULL); 277 278 m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)]; 279 cpu_lfence(); 280 return (m); 281 } 282 283 static void 284 vke_init(void *xsc) 285 { 286 struct vke_softc *sc = xsc; 287 struct ifnet *ifp = &sc->arpcom.ac_if; 288 size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *); 289 int i; 290 291 ASSERT_SERIALIZED(ifp->if_serializer); 292 293 vke_stop(sc); 294 295 ifp->if_flags |= IFF_RUNNING; 296 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 297 298 /* 299 * Allocate memory for FIFO structures and mbufs. 300 */ 301 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), 302 M_DEVBUF, M_WAITOK | M_ZERO); 303 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), 304 M_DEVBUF, M_WAITOK | M_ZERO); 305 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), 306 M_DEVBUF, M_WAITOK | M_ZERO); 307 sc->sc_txfifo->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 308 sc->sc_txfifo_done->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 309 sc->sc_rxfifo->array = kmalloc(ringsize, M_DEVBUF, M_WAITOK | M_ZERO); 310 311 for (i = 0; i < sc->sc_ringsize; i++) { 312 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR); 313 sc->sc_txfifo->array[i] = NULL; 314 sc->sc_txfifo_done->array[i] = NULL; 315 } 316 317 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN; 318 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx"); 319 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx"); 320 321 if (sc->sc_addr != 0) { 322 in_addr_t addr, mask; 323 324 addr = sc->sc_addr; 325 mask = sc->sc_mask; 326 327 /* 328 * Make sure vkernel assigned 329 * address will not be added 330 * again. 331 */ 332 sc->sc_addr = 0; 333 sc->sc_mask = 0; 334 335 vke_init_addr(ifp, addr, mask); 336 } 337 338 } 339 340 /* 341 * Called from kernel. 342 * 343 * NOTE: We can't make any kernel callbacks while holding cothread lock 344 * because the cothread lock is not governed by the kernel scheduler 345 * (so mplock, tokens, etc will not be released). 346 */ 347 static void 348 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 349 { 350 struct vke_softc *sc = ifp->if_softc; 351 struct mbuf *m; 352 cothread_t cotd = sc->cotd_tx; 353 int count; 354 355 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 356 ASSERT_SERIALIZED(ifp->if_serializer); 357 358 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 359 return; 360 361 count = 0; 362 while ((m = ifsq_dequeue(ifsq)) != NULL) { 363 if (vke_txfifo_enqueue(sc, m) != -1) { 364 if (count++ == VKE_CHUNK) { 365 cothread_lock(cotd, 0); 366 cothread_signal(cotd); 367 cothread_unlock(cotd, 0); 368 count = 0; 369 } 370 } else { 371 m_freem(m); 372 } 373 } 374 if (count) { 375 cothread_lock(cotd, 0); 376 cothread_signal(cotd); 377 cothread_unlock(cotd, 0); 378 } 379 } 380 381 static int 382 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 383 { 384 struct vke_softc *sc = ifp->if_softc; 385 int error = 0; 386 387 ASSERT_SERIALIZED(ifp->if_serializer); 388 389 switch (cmd) { 390 case SIOCSIFFLAGS: 391 if (ifp->if_flags & IFF_UP) { 392 if ((ifp->if_flags & IFF_RUNNING) == 0) 393 vke_init(sc); 394 } else { 395 if (ifp->if_flags & IFF_RUNNING) 396 vke_stop(sc); 397 } 398 break; 399 case SIOCGIFMEDIA: 400 case SIOCSIFMEDIA: 401 error = EOPNOTSUPP; 402 /* TODO */ 403 break; 404 case SIOCGIFSTATUS: { 405 struct ifstat *ifs = (struct ifstat *)data; 406 int len; 407 408 len = strlen(ifs->ascii); 409 if (len < sizeof(ifs->ascii)) { 410 if (sc->sc_tap_unit >= 0) 411 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len, 412 "\tBacked by tap%d\n", sc->sc_tap_unit); 413 } 414 break; 415 } 416 case SIOCSIFADDR: 417 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) { 418 /* 419 * If we are explicitly requested to change address, 420 * we should invalidate address/netmask passed in 421 * from vkernel command line. 422 */ 423 sc->sc_addr = 0; 424 sc->sc_mask = 0; 425 } 426 /* FALL THROUGH */ 427 default: 428 error = ether_ioctl(ifp, cmd, data); 429 break; 430 } 431 return error; 432 } 433 434 static int 435 vke_stop(struct vke_softc *sc) 436 { 437 struct ifnet *ifp = &sc->arpcom.ac_if; 438 int i; 439 440 ASSERT_SERIALIZED(ifp->if_serializer); 441 442 ifp->if_flags &= ~IFF_RUNNING; 443 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd)); 444 445 if (sc) { 446 if (sc->cotd_tx) { 447 cothread_lock(sc->cotd_tx, 0); 448 if (sc->cotd_tx_exit == VKE_COTD_RUN) 449 sc->cotd_tx_exit = VKE_COTD_EXIT; 450 cothread_signal(sc->cotd_tx); 451 cothread_unlock(sc->cotd_tx, 0); 452 cothread_delete(&sc->cotd_tx); 453 } 454 if (sc->cotd_rx) { 455 cothread_lock(sc->cotd_rx, 0); 456 if (sc->cotd_rx_exit == VKE_COTD_RUN) 457 sc->cotd_rx_exit = VKE_COTD_EXIT; 458 cothread_signal(sc->cotd_rx); 459 cothread_unlock(sc->cotd_rx, 0); 460 cothread_delete(&sc->cotd_rx); 461 } 462 463 for (i = 0; i < sc->sc_ringsize; i++) { 464 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) { 465 m_freem(sc->sc_rxfifo->array[i]); 466 sc->sc_rxfifo->array[i] = NULL; 467 } 468 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) { 469 m_freem(sc->sc_txfifo->array[i]); 470 sc->sc_txfifo->array[i] = NULL; 471 } 472 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) { 473 m_freem(sc->sc_txfifo_done->array[i]); 474 sc->sc_txfifo_done->array[i] = NULL; 475 } 476 } 477 478 if (sc->sc_txfifo) { 479 if (sc->sc_txfifo->array) 480 kfree(sc->sc_txfifo->array, M_DEVBUF); 481 kfree(sc->sc_txfifo, M_DEVBUF); 482 sc->sc_txfifo = NULL; 483 } 484 485 if (sc->sc_txfifo_done) { 486 if (sc->sc_txfifo_done->array) 487 kfree(sc->sc_txfifo_done->array, M_DEVBUF); 488 kfree(sc->sc_txfifo_done, M_DEVBUF); 489 sc->sc_txfifo_done = NULL; 490 } 491 492 if (sc->sc_rxfifo) { 493 if (sc->sc_rxfifo->array) 494 kfree(sc->sc_rxfifo->array, M_DEVBUF); 495 kfree(sc->sc_rxfifo, M_DEVBUF); 496 sc->sc_rxfifo = NULL; 497 } 498 } 499 500 501 return 0; 502 } 503 504 /* 505 * vke_rx_intr() is the interrupt function for the receive cothread. 506 */ 507 static void 508 vke_rx_intr(cothread_t cotd) 509 { 510 struct mbuf *m; 511 struct mbuf *nm; 512 struct vke_softc *sc = cotd->arg; 513 struct ifnet *ifp = &sc->arpcom.ac_if; 514 static int count = 0; 515 516 ifnet_serialize_all(ifp); 517 cothread_lock(cotd, 0); 518 519 if (sc->cotd_rx_exit != VKE_COTD_RUN) { 520 cothread_unlock(cotd, 0); 521 ifnet_deserialize_all(ifp); 522 return; 523 } 524 if (sc->cotd_ipackets) { 525 IFNET_STAT_INC(ifp, ipackets, 1); 526 sc->cotd_ipackets = 0; 527 } 528 cothread_unlock(cotd, 0); 529 530 while ((m = vke_rxfifo_sniff(sc)) != NULL) { 531 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 532 if (nm) { 533 vke_rxfifo_dequeue(sc, nm); 534 ifp->if_input(ifp, m); 535 if (count++ == VKE_CHUNK) { 536 cothread_lock(cotd, 0); 537 cothread_signal(cotd); 538 cothread_unlock(cotd, 0); 539 count = 0; 540 } 541 } else { 542 vke_rxfifo_dequeue(sc, m); 543 } 544 } 545 546 if (count) { 547 cothread_lock(cotd, 0); 548 cothread_signal(cotd); 549 cothread_unlock(cotd, 0); 550 } 551 ifnet_deserialize_all(ifp); 552 } 553 554 /* 555 * vke_tx_intr() is the interrupt function for the transmit cothread. 556 * Calls vke_start() to handle processing transmit mbufs. 557 */ 558 static void 559 vke_tx_intr(cothread_t cotd) 560 { 561 struct vke_softc *sc = cotd->arg; 562 struct ifnet *ifp = &sc->arpcom.ac_if; 563 struct mbuf *m; 564 565 ifnet_serialize_all(ifp); 566 cothread_lock(cotd, 0); 567 if (sc->cotd_tx_exit != VKE_COTD_RUN) { 568 cothread_unlock(cotd, 0); 569 ifnet_deserialize_all(ifp); 570 return; 571 } 572 if (sc->cotd_opackets) { 573 IFNET_STAT_INC(ifp, opackets, 1); 574 sc->cotd_opackets = 0; 575 } 576 if (sc->cotd_oerrors) { 577 IFNET_STAT_INC(ifp, oerrors, 1); 578 sc->cotd_oerrors = 0; 579 } 580 cothread_unlock(cotd, 0); 581 582 /* 583 * Free TX mbufs that have been processed before starting new 584 * ones going to be pipeline friendly. 585 */ 586 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) { 587 m_freem(m); 588 } 589 590 if ((ifp->if_flags & IFF_RUNNING) == 0) 591 if_devstart(ifp); 592 593 ifnet_deserialize_all(ifp); 594 } 595 596 /* 597 * vke_rx_thread() is the body of the receive cothread. 598 * 599 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 600 */ 601 static void 602 vke_rx_thread(cothread_t cotd) 603 { 604 struct mbuf *m; 605 struct vke_softc *sc = cotd->arg; 606 struct ifnet *ifp = &sc->arpcom.ac_if; 607 fifo_t fifo = sc->sc_rxfifo; 608 fd_set fdset; 609 struct timeval tv; 610 int count; 611 int n; 612 613 /* Select timeout cannot be infinite since we need to check for 614 * the exit flag sc->cotd_rx_exit. 615 */ 616 tv.tv_sec = 0; 617 tv.tv_usec = 500000; 618 619 FD_ZERO(&fdset); 620 count = 0; 621 622 while (sc->cotd_rx_exit == VKE_COTD_RUN) { 623 /* 624 * Wait for the RX FIFO to be loaded with 625 * empty mbufs. 626 */ 627 if (NETFIFOINDEX(fifo->windex + 1, sc) == 628 NETFIFOINDEX(fifo->rindex, sc)) { 629 usleep(20000); 630 continue; 631 } 632 633 /* 634 * Load data into the rx fifo 635 */ 636 m = fifo->array[NETFIFOINDEX(fifo->windex, sc)]; 637 if (m == NULL) 638 continue; 639 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES); 640 if (n > 0) { 641 /* no mycpu in cothread */ 642 /*IFNET_STAT_INC(ifp, ipackets, 1);*/ 643 ++sc->cotd_ipackets; 644 m->m_pkthdr.rcvif = ifp; 645 m->m_pkthdr.len = m->m_len = n; 646 cpu_sfence(); 647 ++fifo->windex; 648 if (count++ == VKE_CHUNK) { 649 cothread_intr(cotd); 650 count = 0; 651 } 652 } else { 653 if (count) { 654 cothread_intr(cotd); 655 count = 0; 656 } 657 FD_SET(sc->sc_fd, &fdset); 658 659 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) { 660 fprintf(stderr, 661 VKE_DEVNAME "%d: select failed for " 662 "TAP device\n", sc->sc_unit); 663 usleep(1000000); 664 } 665 } 666 } 667 cpu_sfence(); 668 sc->cotd_rx_exit = VKE_COTD_DEAD; 669 } 670 671 /* 672 * vke_tx_thread() is the body of the transmit cothread. 673 * 674 * WARNING! THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!! 675 */ 676 static void 677 vke_tx_thread(cothread_t cotd) 678 { 679 struct mbuf *m; 680 struct vke_softc *sc = cotd->arg; 681 /*struct ifnet *ifp = &sc->arpcom.ac_if;*/ 682 int count = 0; 683 684 while (sc->cotd_tx_exit == VKE_COTD_RUN) { 685 /* 686 * Write outgoing packets to the TAP interface 687 */ 688 m = vke_txfifo_dequeue(sc); 689 if (m) { 690 if (m->m_pkthdr.len <= MCLBYTES) { 691 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf); 692 sc->sc_txbuf_len = m->m_pkthdr.len; 693 694 if (write(sc->sc_fd, sc->sc_txbuf, 695 sc->sc_txbuf_len) < 0) { 696 /* no mycpu in cothread */ 697 /*IFNET_STAT_INC(ifp, oerrors, 1);*/ 698 ++sc->cotd_oerrors; 699 } else { 700 /* no mycpu in cothread */ 701 /*IFNET_STAT_INC(ifp, opackets, 1);*/ 702 ++sc->cotd_opackets; 703 } 704 } 705 if (count++ == VKE_CHUNK) { 706 cothread_intr(cotd); 707 count = 0; 708 } 709 vke_txfifo_done_enqueue(sc, m); 710 } else { 711 if (count) { 712 cothread_intr(cotd); 713 count = 0; 714 } 715 cothread_lock(cotd, 1); 716 if (vke_txfifo_empty(sc)) 717 cothread_wait(cotd); 718 cothread_unlock(cotd, 1); 719 } 720 } 721 cpu_sfence(); 722 sc->cotd_tx_exit = VKE_COTD_DEAD; 723 } 724 725 static int 726 vke_attach(const struct vknetif_info *info, int unit) 727 { 728 struct vke_softc *sc; 729 struct ifnet *ifp; 730 struct tapinfo tapinfo; 731 uint8_t enaddr[ETHER_ADDR_LEN]; 732 int nmbufs; 733 int fd; 734 735 KKASSERT(info->tap_fd >= 0); 736 fd = info->tap_fd; 737 738 if (info->enaddr) { 739 /* 740 * enaddr is supplied 741 */ 742 bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN); 743 } else { 744 /* 745 * This is only a TAP device if tap_unit is non-zero. If 746 * connecting to a virtual socket we generate a unique MAC. 747 * 748 * WARNING: enaddr[0] bit 0 is the multicast bit, when 749 * randomizing enaddr[] just leave the first 750 * two bytes 00 00 for now. 751 */ 752 bzero(enaddr, sizeof(enaddr)); 753 if (info->tap_unit >= 0) { 754 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) { 755 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) " 756 "failed: %s\n", unit, strerror(errno)); 757 return ENXIO; 758 } 759 760 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) { 761 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) " 762 "failed: %s\n", unit, strerror(errno)); 763 return ENXIO; 764 } 765 } else { 766 int fd = open("/dev/urandom", O_RDONLY); 767 if (fd >= 0) { 768 read(fd, enaddr + 2, 4); 769 close(fd); 770 } 771 enaddr[4] = (int)getpid() >> 8; 772 enaddr[5] = (int)getpid() & 255; 773 774 } 775 enaddr[1] += 1; 776 } 777 if (ETHER_IS_MULTICAST(enaddr)) { 778 kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit); 779 return ENXIO; 780 } 781 782 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 783 784 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK); 785 sc->sc_fd = fd; 786 sc->sc_unit = unit; 787 sc->sc_tap_unit = info->tap_unit; 788 sc->sc_addr = info->netif_addr; 789 sc->sc_mask = info->netif_mask; 790 791 if (vke_max_ringsize == 0) { 792 nmbufs = nmbclusters / (NetifNum * 2); 793 sc->sc_ringsize = LOW_POW_2(nmbufs); 794 if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE) 795 sc->sc_ringsize = VKE_DEFAULT_RINGSIZE; 796 } else if (vke_max_ringsize >= VKE_CHUNK) { /* Tunable specified */ 797 sc->sc_ringsize = LOW_POW_2(vke_max_ringsize); 798 } else { 799 sc->sc_ringsize = LOW_POW_2(VKE_CHUNK); 800 } 801 802 ifp = &sc->arpcom.ac_if; 803 if_initname(ifp, VKE_DEVNAME, sc->sc_unit); 804 805 /* NB: after if_initname() */ 806 sysctl_ctx_init(&sc->sc_sysctl_ctx); 807 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx, 808 SYSCTL_STATIC_CHILDREN(_hw), 809 OID_AUTO, ifp->if_xname, 810 CTLFLAG_RD, 0, ""); 811 if (sc->sc_sysctl_tree == NULL) { 812 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit); 813 } else { 814 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx, 815 SYSCTL_CHILDREN(sc->sc_sysctl_tree), 816 OID_AUTO, "tap_unit", 817 CTLFLAG_RD, &sc->sc_tap_unit, 0, 818 "Backend tap(4) unit"); 819 } 820 821 ifp->if_softc = sc; 822 ifp->if_ioctl = vke_ioctl; 823 ifp->if_start = vke_start; 824 ifp->if_init = vke_init; 825 ifp->if_mtu = tapinfo.mtu; 826 ifp->if_baudrate = tapinfo.baudrate; 827 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 828 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 829 ifq_set_ready(&ifp->if_snd); 830 831 /* TODO: if_media */ 832 833 ether_ifattach(ifp, enaddr, NULL); 834 835 if (bootverbose && sc->sc_addr != 0) { 836 if_printf(ifp, "pre-configured " 837 "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n", 838 ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize); 839 } 840 841 return 0; 842 } 843 844 static int 845 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask) 846 { 847 struct ifaliasreq ifra; 848 struct sockaddr_in *sin; 849 int ret; 850 851 ASSERT_SERIALIZED(ifp->if_serializer); 852 853 if (bootverbose) { 854 if_printf(ifp, "add pre-configured " 855 "address 0x%08x, netmask 0x%08x\n", 856 ntohl(addr), ntohl(mask)); 857 } 858 859 bzero(&ifra, sizeof(ifra)); 860 861 /* NB: no need to set ifaliasreq.ifra_name */ 862 863 sin = (struct sockaddr_in *)&ifra.ifra_addr; 864 sin->sin_family = AF_INET; 865 sin->sin_len = sizeof(*sin); 866 sin->sin_addr.s_addr = addr; 867 868 if (mask != 0) { 869 sin = (struct sockaddr_in *)&ifra.ifra_mask; 870 sin->sin_len = sizeof(*sin); 871 sin->sin_addr.s_addr = mask; 872 } 873 874 /* 875 * Temporarily release serializer, in_control() will hold 876 * it again before calling ifnet.if_ioctl(). 877 */ 878 ifnet_deserialize_all(ifp); 879 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL); 880 ifnet_serialize_all(ifp); 881 882 return ret; 883 } 884