1 /* $NetBSD: xennetback_xenbus.c,v 1.93 2020/04/06 19:52:38 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.93 2020/04/06 19:52:38 jdolecek Exp $"); 29 30 #include "opt_xen.h" 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kmem.h> 37 #include <sys/queue.h> 38 #include <sys/kernel.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/ioctl.h> 43 #include <sys/errno.h> 44 #include <sys/device.h> 45 #include <sys/intr.h> 46 47 #include <net/if.h> 48 #include <net/if_types.h> 49 #include <net/if_dl.h> 50 #include <net/route.h> 51 #include <net/netisr.h> 52 #include <net/bpf.h> 53 54 #include <net/if_ether.h> 55 56 #include <xen/xen.h> 57 #include <xen/xen_shm.h> 58 #include <xen/evtchn.h> 59 #include <xen/xenbus.h> 60 #include <xen/xennet_checksum.h> 61 62 #include <uvm/uvm.h> 63 64 /* 65 * Backend network device driver for Xen. 66 */ 67 68 #ifdef XENDEBUG_NET 69 #define XENPRINTF(x) printf x 70 #else 71 #define XENPRINTF(x) 72 #endif 73 74 extern pt_entry_t xpmap_pg_nx; 75 76 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 77 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 78 79 /* linux wants at last 16 bytes free in front of the packet */ 80 #define LINUX_REQUESTED_OFFSET 16 81 82 /* ratecheck(9) for pool allocation failures */ 83 static const struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */ 84 85 /* state of a xnetback instance */ 86 typedef enum { 87 CONNECTED, 88 DISCONNECTING, 89 DISCONNECTED 90 } xnetback_state_t; 91 92 /* we keep the xnetback instances in a linked list */ 93 struct xnetback_instance { 94 SLIST_ENTRY(xnetback_instance) next; 95 struct xenbus_device *xni_xbusd; /* our xenstore entry */ 96 domid_t xni_domid; /* attached to this domain */ 97 uint32_t xni_handle; /* domain-specific handle */ 98 xnetback_state_t xni_status; 99 100 /* network interface stuff */ 101 struct ethercom xni_ec; 102 struct callout xni_restart; 103 uint8_t xni_enaddr[ETHER_ADDR_LEN]; 104 105 /* remote domain communication stuff */ 106 unsigned int xni_evtchn; /* our event channel */ 107 struct intrhand *xni_ih; 108 netif_tx_back_ring_t xni_txring; 109 netif_rx_back_ring_t xni_rxring; 110 grant_handle_t xni_tx_ring_handle; /* to unmap the ring */ 111 grant_handle_t xni_rx_ring_handle; 112 vaddr_t xni_tx_ring_va; /* to unmap the ring */ 113 vaddr_t xni_rx_ring_va; 114 }; 115 #define xni_if xni_ec.ec_if 116 #define xni_bpf xni_if.if_bpf 117 118 void xvifattach(int); 119 static int xennetback_ifioctl(struct ifnet *, u_long, void *); 120 static void xennetback_ifstart(struct ifnet *); 121 static void xennetback_ifsoftstart_copy(struct xnetback_instance *); 122 static void xennetback_ifwatchdog(struct ifnet *); 123 static int xennetback_ifinit(struct ifnet *); 124 static void xennetback_ifstop(struct ifnet *, int); 125 126 static int xennetback_xenbus_create(struct xenbus_device *); 127 static int xennetback_xenbus_destroy(void *); 128 static void xennetback_frontend_changed(void *, XenbusState); 129 130 static inline void xennetback_tx_response(struct xnetback_instance *, 131 int, int); 132 static void xennetback_mbuf_addr(struct mbuf *, paddr_t *, int *); 133 134 static SLIST_HEAD(, xnetback_instance) xnetback_instances; 135 static kmutex_t xnetback_lock; 136 137 static bool xnetif_lookup(domid_t, uint32_t); 138 static int xennetback_evthandler(void *); 139 140 static struct xenbus_backend_driver xvif_backend_driver = { 141 .xbakd_create = xennetback_xenbus_create, 142 .xbakd_type = "vif" 143 }; 144 145 /* 146 * Number of packets to transmit in one hypercall (= number of pages to 147 * transmit at once). 148 */ 149 #define NB_XMIT_PAGES_BATCH 64 150 151 /* arrays used in xennetback_ifstart(), too large to allocate on stack */ 152 /* XXXSMP */ 153 static gnttab_copy_t xstart_gop_copy[NB_XMIT_PAGES_BATCH]; 154 static struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH]; 155 static struct _req_info { 156 int id; 157 int flags; 158 } xstart_req[NB_XMIT_PAGES_BATCH]; 159 160 161 void 162 xvifattach(int n) 163 { 164 XENPRINTF(("xennetback_init\n")); 165 166 SLIST_INIT(&xnetback_instances); 167 mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE); 168 169 xenbus_backend_register(&xvif_backend_driver); 170 } 171 172 static int 173 xennetback_xenbus_create(struct xenbus_device *xbusd) 174 { 175 struct xnetback_instance *xneti; 176 long domid, handle; 177 struct ifnet *ifp; 178 extern int ifqmaxlen; /* XXX */ 179 char *val, *e, *p; 180 int i, err; 181 struct xenbus_transaction *xbt; 182 183 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 184 "frontend-id", &domid, 10)) != 0) { 185 aprint_error("xvif: can't read %s/frontend-id: %d\n", 186 xbusd->xbusd_path, err); 187 return err; 188 } 189 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 190 "handle", &handle, 10)) != 0) { 191 aprint_error("xvif: can't read %s/handle: %d\n", 192 xbusd->xbusd_path, err); 193 return err; 194 } 195 196 if (xnetif_lookup(domid, handle)) { 197 return EEXIST; 198 } 199 xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP); 200 xneti->xni_domid = domid; 201 xneti->xni_handle = handle; 202 xneti->xni_status = DISCONNECTED; 203 204 xbusd->xbusd_u.b.b_cookie = xneti; 205 xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy; 206 xneti->xni_xbusd = xbusd; 207 208 ifp = &xneti->xni_if; 209 ifp->if_softc = xneti; 210 snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d", 211 (int)domid, (int)handle); 212 213 /* read mac address */ 214 if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) { 215 aprint_error_ifnet(ifp, "can't read %s/mac: %d\n", 216 xbusd->xbusd_path, err); 217 goto fail; 218 } 219 for (i = 0, p = val; i < 6; i++) { 220 xneti->xni_enaddr[i] = strtoul(p, &e, 16); 221 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 222 aprint_error_ifnet(ifp, 223 "%s is not a valid mac address\n", val); 224 free(val, M_DEVBUF); 225 err = EINVAL; 226 goto fail; 227 } 228 p = &e[1]; 229 } 230 free(val, M_DEVBUF); 231 232 /* we can't use the same MAC addr as our guest */ 233 xneti->xni_enaddr[3]++; 234 /* create pseudo-interface */ 235 aprint_verbose_ifnet(ifp, "Ethernet address %s\n", 236 ether_sprintf(xneti->xni_enaddr)); 237 xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU; 238 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 239 ifp->if_snd.ifq_maxlen = 240 uimax(ifqmaxlen, NET_TX_RING_SIZE * 2); 241 ifp->if_capabilities = 242 IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx 243 | IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx 244 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx 245 | IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx 246 | IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx; 247 #define XN_M_CSUM_SUPPORTED ( \ 248 M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_IPv4 \ 249 | M_CSUM_TCPv6 | M_CSUM_UDPv6 \ 250 ) 251 ifp->if_ioctl = xennetback_ifioctl; 252 ifp->if_start = xennetback_ifstart; 253 ifp->if_watchdog = xennetback_ifwatchdog; 254 ifp->if_init = xennetback_ifinit; 255 ifp->if_stop = xennetback_ifstop; 256 ifp->if_timer = 0; 257 IFQ_SET_READY(&ifp->if_snd); 258 if_attach(ifp); 259 if_deferred_start_init(ifp, NULL); 260 ether_ifattach(&xneti->xni_if, xneti->xni_enaddr); 261 262 mutex_enter(&xnetback_lock); 263 SLIST_INSERT_HEAD(&xnetback_instances, xneti, next); 264 mutex_exit(&xnetback_lock); 265 266 xbusd->xbusd_otherend_changed = xennetback_frontend_changed; 267 268 do { 269 xbt = xenbus_transaction_start(); 270 if (xbt == NULL) { 271 aprint_error_ifnet(ifp, 272 "%s: can't start transaction\n", 273 xbusd->xbusd_path); 274 goto fail; 275 } 276 err = xenbus_printf(xbt, xbusd->xbusd_path, 277 "vifname", "%s", ifp->if_xname); 278 if (err) { 279 aprint_error_ifnet(ifp, 280 "failed to write %s/vifname: %d\n", 281 xbusd->xbusd_path, err); 282 goto abort_xbt; 283 } 284 err = xenbus_printf(xbt, xbusd->xbusd_path, 285 "feature-rx-copy", "%d", 1); 286 if (err) { 287 aprint_error_ifnet(ifp, 288 "failed to write %s/feature-rx-copy: %d\n", 289 xbusd->xbusd_path, err); 290 goto abort_xbt; 291 } 292 err = xenbus_printf(xbt, xbusd->xbusd_path, 293 "feature-ipv6-csum-offload", "%d", 1); 294 if (err) { 295 aprint_error_ifnet(ifp, 296 "failed to write %s/feature-ipv6-csum-offload: %d\n", 297 xbusd->xbusd_path, err); 298 goto abort_xbt; 299 } 300 } while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN); 301 if (err) { 302 aprint_error_ifnet(ifp, 303 "%s: can't end transaction: %d\n", 304 xbusd->xbusd_path, err); 305 } 306 307 err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 308 if (err) { 309 aprint_error_ifnet(ifp, 310 "failed to switch state on %s: %d\n", 311 xbusd->xbusd_path, err); 312 goto fail; 313 } 314 return 0; 315 316 abort_xbt: 317 xenbus_transaction_end(xbt, 1); 318 fail: 319 kmem_free(xneti, sizeof(*xneti)); 320 return err; 321 } 322 323 int 324 xennetback_xenbus_destroy(void *arg) 325 { 326 struct xnetback_instance *xneti = arg; 327 struct gnttab_unmap_grant_ref op; 328 int err; 329 330 aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n"); 331 332 if (xneti->xni_ih != NULL) { 333 hypervisor_mask_event(xneti->xni_evtchn); 334 xen_intr_disestablish(xneti->xni_ih); 335 xneti->xni_ih = NULL; 336 } 337 338 mutex_enter(&xnetback_lock); 339 SLIST_REMOVE(&xnetback_instances, 340 xneti, xnetback_instance, next); 341 mutex_exit(&xnetback_lock); 342 343 ether_ifdetach(&xneti->xni_if); 344 if_detach(&xneti->xni_if); 345 346 if (xneti->xni_txring.sring) { 347 op.host_addr = xneti->xni_tx_ring_va; 348 op.handle = xneti->xni_tx_ring_handle; 349 op.dev_bus_addr = 0; 350 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 351 &op, 1); 352 if (err) 353 aprint_error_ifnet(&xneti->xni_if, 354 "unmap_grant_ref failed: %d\n", err); 355 } 356 if (xneti->xni_rxring.sring) { 357 op.host_addr = xneti->xni_rx_ring_va; 358 op.handle = xneti->xni_rx_ring_handle; 359 op.dev_bus_addr = 0; 360 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 361 &op, 1); 362 if (err) 363 aprint_error_ifnet(&xneti->xni_if, 364 "unmap_grant_ref failed: %d\n", err); 365 } 366 if (xneti->xni_tx_ring_va != 0) { 367 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 368 PAGE_SIZE, UVM_KMF_VAONLY); 369 xneti->xni_tx_ring_va = 0; 370 } 371 if (xneti->xni_rx_ring_va != 0) { 372 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 373 PAGE_SIZE, UVM_KMF_VAONLY); 374 xneti->xni_rx_ring_va = 0; 375 } 376 kmem_free(xneti, sizeof(*xneti)); 377 return 0; 378 } 379 380 static int 381 xennetback_connect(struct xnetback_instance *xneti) 382 { 383 int err; 384 netif_tx_sring_t *tx_ring; 385 netif_rx_sring_t *rx_ring; 386 struct gnttab_map_grant_ref op; 387 struct gnttab_unmap_grant_ref uop; 388 evtchn_op_t evop; 389 u_long tx_ring_ref, rx_ring_ref; 390 u_long revtchn, rx_copy; 391 struct xenbus_device *xbusd = xneti->xni_xbusd; 392 393 /* read communication information */ 394 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 395 "tx-ring-ref", &tx_ring_ref, 10); 396 if (err) { 397 xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref", 398 xbusd->xbusd_otherend); 399 return -1; 400 } 401 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 402 "rx-ring-ref", &rx_ring_ref, 10); 403 if (err) { 404 xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref", 405 xbusd->xbusd_otherend); 406 return -1; 407 } 408 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 409 "event-channel", &revtchn, 10); 410 if (err) { 411 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 412 xbusd->xbusd_otherend); 413 return -1; 414 } 415 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 416 "request-rx-copy", &rx_copy, 10); 417 if (err == ENOENT || !rx_copy) { 418 xenbus_dev_fatal(xbusd, err, 419 "%s/request-rx-copy not supported by frontend", 420 xbusd->xbusd_otherend); 421 return -1; 422 } else if (err) { 423 xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy", 424 xbusd->xbusd_otherend); 425 return -1; 426 } 427 428 /* allocate VA space and map rings */ 429 xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 430 UVM_KMF_VAONLY); 431 if (xneti->xni_tx_ring_va == 0) { 432 xenbus_dev_fatal(xbusd, ENOMEM, 433 "can't get VA for TX ring", xbusd->xbusd_otherend); 434 goto err1; 435 } 436 tx_ring = (void *)xneti->xni_tx_ring_va; 437 438 xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 439 UVM_KMF_VAONLY); 440 if (xneti->xni_rx_ring_va == 0) { 441 xenbus_dev_fatal(xbusd, ENOMEM, 442 "can't get VA for RX ring", xbusd->xbusd_otherend); 443 goto err1; 444 } 445 rx_ring = (void *)xneti->xni_rx_ring_va; 446 447 op.host_addr = xneti->xni_tx_ring_va; 448 op.flags = GNTMAP_host_map; 449 op.ref = tx_ring_ref; 450 op.dom = xneti->xni_domid; 451 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 452 if (err || op.status) { 453 aprint_error_ifnet(&xneti->xni_if, 454 "can't map TX grant ref: err %d status %d\n", 455 err, op.status); 456 goto err2; 457 } 458 xneti->xni_tx_ring_handle = op.handle; 459 BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE); 460 461 op.host_addr = xneti->xni_rx_ring_va; 462 op.flags = GNTMAP_host_map; 463 op.ref = rx_ring_ref; 464 op.dom = xneti->xni_domid; 465 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 466 if (err || op.status) { 467 aprint_error_ifnet(&xneti->xni_if, 468 "can't map RX grant ref: err %d status %d\n", 469 err, op.status); 470 goto err2; 471 } 472 xneti->xni_rx_ring_handle = op.handle; 473 BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE); 474 475 evop.cmd = EVTCHNOP_bind_interdomain; 476 evop.u.bind_interdomain.remote_dom = xneti->xni_domid; 477 evop.u.bind_interdomain.remote_port = revtchn; 478 err = HYPERVISOR_event_channel_op(&evop); 479 if (err) { 480 aprint_error_ifnet(&xneti->xni_if, 481 "can't get event channel: %d\n", err); 482 goto err2; 483 } 484 xneti->xni_evtchn = evop.u.bind_interdomain.local_port; 485 xen_wmb(); 486 xneti->xni_status = CONNECTED; 487 xen_wmb(); 488 489 xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic, xneti->xni_evtchn, 490 IST_LEVEL, IPL_NET, xennetback_evthandler, xneti, false, 491 xneti->xni_if.if_xname); 492 KASSERT(xneti->xni_ih != NULL); 493 xennetback_ifinit(&xneti->xni_if); 494 hypervisor_unmask_event(xneti->xni_evtchn); 495 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 496 return 0; 497 498 err2: 499 /* unmap rings */ 500 if (xneti->xni_tx_ring_handle != 0) { 501 uop.host_addr = xneti->xni_tx_ring_va; 502 uop.handle = xneti->xni_tx_ring_handle; 503 uop.dev_bus_addr = 0; 504 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 505 &uop, 1); 506 if (err) 507 aprint_error_ifnet(&xneti->xni_if, 508 "unmap_grant_ref failed: %d\n", err); 509 } 510 511 if (xneti->xni_rx_ring_handle != 0) { 512 uop.host_addr = xneti->xni_rx_ring_va; 513 uop.handle = xneti->xni_rx_ring_handle; 514 uop.dev_bus_addr = 0; 515 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 516 &uop, 1); 517 if (err) 518 aprint_error_ifnet(&xneti->xni_if, 519 "unmap_grant_ref failed: %d\n", err); 520 } 521 522 err1: 523 /* free rings VA space */ 524 if (xneti->xni_rx_ring_va != 0) 525 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 526 PAGE_SIZE, UVM_KMF_VAONLY); 527 528 if (xneti->xni_tx_ring_va != 0) 529 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 530 PAGE_SIZE, UVM_KMF_VAONLY); 531 532 return -1; 533 534 } 535 536 static void 537 xennetback_frontend_changed(void *arg, XenbusState new_state) 538 { 539 struct xnetback_instance *xneti = arg; 540 struct xenbus_device *xbusd = xneti->xni_xbusd; 541 542 XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state)); 543 switch(new_state) { 544 case XenbusStateInitialising: 545 case XenbusStateInitialised: 546 break; 547 548 case XenbusStateConnected: 549 if (xneti->xni_status == CONNECTED) 550 break; 551 if (xennetback_connect(xneti) == 0) 552 xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 553 break; 554 555 case XenbusStateClosing: 556 xneti->xni_status = DISCONNECTING; 557 xneti->xni_if.if_flags &= ~IFF_RUNNING; 558 xneti->xni_if.if_timer = 0; 559 xenbus_switch_state(xbusd, NULL, XenbusStateClosing); 560 break; 561 562 case XenbusStateClosed: 563 /* otherend_changed() should handle it for us */ 564 panic("xennetback_frontend_changed: closed\n"); 565 case XenbusStateUnknown: 566 case XenbusStateInitWait: 567 default: 568 aprint_error("%s: invalid frontend state %d\n", 569 xneti->xni_if.if_xname, new_state); 570 break; 571 } 572 return; 573 574 } 575 576 /* lookup a xneti based on domain id and interface handle */ 577 static bool 578 xnetif_lookup(domid_t dom , uint32_t handle) 579 { 580 struct xnetback_instance *xneti; 581 bool found = false; 582 583 mutex_enter(&xnetback_lock); 584 SLIST_FOREACH(xneti, &xnetback_instances, next) { 585 if (xneti->xni_domid == dom && xneti->xni_handle == handle) { 586 found = true; 587 break; 588 } 589 } 590 mutex_exit(&xnetback_lock); 591 592 return found; 593 } 594 595 static inline void 596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status) 597 { 598 RING_IDX resp_prod; 599 netif_tx_response_t *txresp; 600 int do_event; 601 602 resp_prod = xneti->xni_txring.rsp_prod_pvt; 603 txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod); 604 605 txresp->id = id; 606 txresp->status = status; 607 xneti->xni_txring.rsp_prod_pvt++; 608 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event); 609 if (do_event) { 610 XENPRINTF(("%s send event\n", xneti->xni_if.if_xname)); 611 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 612 } 613 } 614 615 static inline const char * 616 xennetback_tx_check_packet(const netif_tx_request_t *txreq, int vlan) 617 { 618 if (__predict_false(txreq->size < ETHER_HDR_LEN)) 619 return "too small"; 620 621 if (__predict_false(txreq->offset + txreq->size > PAGE_SIZE)) 622 return "crossing a page boundary"; 623 624 int maxlen = ETHER_MAX_LEN - ETHER_CRC_LEN; 625 if (vlan) 626 maxlen += ETHER_VLAN_ENCAP_LEN; 627 if (__predict_false(txreq->size > maxlen)) 628 return "too big"; 629 630 /* Somewhat duplicit, MCLBYTES is > ETHER_MAX_LEN */ 631 if (__predict_false(txreq->size > MCLBYTES)) 632 return "bigger than MCLBYTES"; 633 634 return NULL; 635 } 636 637 static void 638 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti, 639 int queued) 640 { 641 int i = 0; 642 gnttab_copy_t *gop; 643 struct mbuf *m; 644 struct _req_info *req; 645 646 /* 647 * Copy the data and ack it. Delaying it until the mbuf is 648 * freed will stall transmit. 649 */ 650 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xstart_gop_copy, queued) 651 != 0) { 652 printf("%s: GNTTABOP_copy failed", ifp->if_xname); 653 goto abort; 654 } 655 656 for (; i < queued; i++) { 657 gop = &xstart_gop_copy[i]; 658 m = mbufs_sent[i]; 659 req = &xstart_req[i]; 660 661 if (gop->status != GNTST_okay) { 662 printf("%s GNTTABOP_copy[%d] %d\n", 663 ifp->if_xname, i, gop->status); 664 goto abort; 665 } 666 667 xennetback_tx_response(xneti, req->id, NETIF_RSP_OKAY); 668 669 if ((ifp->if_flags & IFF_PROMISC) == 0) { 670 struct ether_header *eh = 671 mtod(m, struct ether_header *); 672 if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 && 673 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost, 674 ETHER_ADDR_LEN) != 0) { 675 m_freem(m); 676 continue; /* packet is not for us */ 677 } 678 } 679 680 if (req->flags & NETTXF_csum_blank) 681 xennet_checksum_fill(ifp, m); 682 else if (req->flags & NETTXF_data_validated) 683 m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED; 684 m_set_rcvif(m, ifp); 685 686 if_percpuq_enqueue(ifp->if_percpuq, m); 687 } 688 689 return; 690 691 abort: 692 for (; i < queued; i++) { 693 m = mbufs_sent[i]; 694 req = &xstart_req[i]; 695 696 m_freem(m); 697 xennetback_tx_response(xneti, req->id, NETIF_RSP_ERROR); 698 if_statinc(ifp, if_ierrors); 699 } 700 } 701 702 static int 703 xennetback_evthandler(void *arg) 704 { 705 struct xnetback_instance *xneti = arg; 706 struct ifnet *ifp = &xneti->xni_if; 707 netif_tx_request_t txreq; 708 struct mbuf *m; 709 int receive_pending; 710 RING_IDX req_cons; 711 gnttab_copy_t *gop; 712 paddr_t pa; 713 int offset, queued = 0; 714 715 XENPRINTF(("xennetback_evthandler ")); 716 req_cons = xneti->xni_txring.req_cons; 717 xen_rmb(); 718 while (1) { 719 xen_rmb(); /* be sure to read the request before updating */ 720 xneti->xni_txring.req_cons = req_cons; 721 xen_wmb(); 722 RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, 723 receive_pending); 724 if (receive_pending == 0) 725 break; 726 RING_COPY_REQUEST(&xneti->xni_txring, req_cons, &txreq); 727 xen_rmb(); 728 XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname, 729 txreq.size)); 730 req_cons++; 731 if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != 732 (IFF_UP | IFF_RUNNING))) { 733 /* interface not up, drop */ 734 xennetback_tx_response(xneti, txreq.id, 735 NETIF_RSP_DROPPED); 736 continue; 737 } 738 739 /* 740 * Do some sanity checks, and map the packet's page. 741 */ 742 const char *msg = xennetback_tx_check_packet(&txreq, 743 xneti->xni_ec.ec_capenable & ETHERCAP_VLAN_MTU); 744 if (msg) { 745 printf("%s: packet with size %d is %s\n", 746 ifp->if_xname, txreq.size, msg); 747 xennetback_tx_response(xneti, txreq.id, 748 NETIF_RSP_ERROR); 749 if_statinc(ifp, if_ierrors); 750 continue; 751 } 752 753 /* get a mbuf for this packet */ 754 MGETHDR(m, M_DONTWAIT, MT_DATA); 755 if (__predict_false(m == NULL)) { 756 static struct timeval lasttime; 757 if (ratecheck(&lasttime, &xni_pool_errintvl)) 758 printf("%s: mbuf alloc failed\n", 759 ifp->if_xname); 760 xennetback_tx_response(xneti, txreq.id, 761 NETIF_RSP_DROPPED); 762 if_statinc(ifp, if_ierrors); 763 continue; 764 } 765 if (txreq.size > MHLEN) { 766 MCLGET(m, M_DONTWAIT); 767 if (__predict_false(m->m_ext_storage.ext_buf == NULL)) { 768 m_freem(m); 769 xennetback_tx_response(xneti, txreq.id, 770 NETIF_RSP_DROPPED); 771 if_statinc(ifp, if_ierrors); 772 continue; 773 } 774 } 775 776 XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n", 777 xneti->xni_if.if_xname, txreq.offset, 778 txreq.size, txreq.id, MASK_NETIF_TX_IDX(req_cons))); 779 780 xennetback_mbuf_addr(m, &pa, &offset); 781 782 /* Queue for the copy */ 783 gop = &xstart_gop_copy[queued]; 784 memset(gop, 0, sizeof(*gop)); 785 gop->flags = GNTCOPY_source_gref; 786 gop->len = txreq.size; 787 788 gop->source.u.ref = txreq.gref; 789 gop->source.offset = txreq.offset; 790 gop->source.domid = xneti->xni_domid; 791 792 gop->dest.offset = offset; 793 gop->dest.domid = DOMID_SELF; 794 gop->dest.u.gmfn = xpmap_ptom(pa) >> PAGE_SHIFT; 795 796 m->m_len = m->m_pkthdr.len = txreq.size; 797 mbufs_sent[queued] = m; 798 799 xstart_req[queued].id = txreq.id; 800 xstart_req[queued].flags = txreq.flags; 801 802 queued++; 803 804 KASSERT(queued <= NB_XMIT_PAGES_BATCH); 805 if (queued == NB_XMIT_PAGES_BATCH) { 806 xennetback_tx_copy_process(ifp, xneti, queued); 807 queued = 0; 808 } 809 } 810 if (queued > 0) 811 xennetback_tx_copy_process(ifp, xneti, queued); 812 xen_rmb(); /* be sure to read the request before updating pointer */ 813 xneti->xni_txring.req_cons = req_cons; 814 xen_wmb(); 815 816 /* check to see if we can transmit more packets */ 817 if_schedule_deferred_start(ifp); 818 819 return 1; 820 } 821 822 static int 823 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data) 824 { 825 //struct xnetback_instance *xneti = ifp->if_softc; 826 //struct ifreq *ifr = (struct ifreq *)data; 827 int s, error; 828 829 s = splnet(); 830 error = ether_ioctl(ifp, cmd, data); 831 if (error == ENETRESET) 832 error = 0; 833 splx(s); 834 return error; 835 } 836 837 static void 838 xennetback_ifstart(struct ifnet *ifp) 839 { 840 struct xnetback_instance *xneti = ifp->if_softc; 841 842 /* 843 * The Xen communication channel is much more efficient if we can 844 * schedule batch of packets for the domain. Deferred start by network 845 * stack will enqueue all pending mbufs in the interface's send queue 846 * before it is processed by the soft interrupt handler. 847 */ 848 xennetback_ifsoftstart_copy(xneti); 849 } 850 851 /* 852 * sighly different from m_dup(); for some reason m_dup() can return 853 * a chain where the data area can cross a page boundary. 854 * This doesn't happens with the function below. 855 */ 856 static struct mbuf * 857 xennetback_copymbuf(struct mbuf *m) 858 { 859 struct mbuf *new_m; 860 861 MGETHDR(new_m, M_DONTWAIT, MT_DATA); 862 if (__predict_false(new_m == NULL)) { 863 return NULL; 864 } 865 if (m->m_pkthdr.len > MHLEN) { 866 MCLGET(new_m, M_DONTWAIT); 867 if (__predict_false( 868 (new_m->m_flags & M_EXT) == 0)) { 869 m_freem(new_m); 870 return NULL; 871 } 872 } 873 m_copydata(m, 0, m->m_pkthdr.len, 874 mtod(new_m, void *)); 875 new_m->m_len = new_m->m_pkthdr.len = 876 m->m_pkthdr.len; 877 878 /* 879 * Need to retain csum flags to know if csum was actually computed. 880 * This is used to set NETRXF_csum_blank/NETRXF_data_validated. 881 */ 882 new_m->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags; 883 884 return new_m; 885 } 886 887 /* return physical page address and offset of data area of an mbuf */ 888 static void 889 xennetback_mbuf_addr(struct mbuf *m, paddr_t *xmit_pa, int *offset) 890 { 891 switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) { 892 case M_EXT|M_EXT_CLUSTER: 893 KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID); 894 *xmit_pa = m->m_ext.ext_paddr; 895 *offset = m->m_data - m->m_ext.ext_buf; 896 break; 897 case 0: 898 KASSERT(m->m_paddr != M_PADDR_INVALID); 899 *xmit_pa = m->m_paddr; 900 *offset = M_BUFOFFSET(m) + 901 (m->m_data - M_BUFADDR(m)); 902 break; 903 default: 904 if (__predict_false( 905 !pmap_extract(pmap_kernel(), 906 (vaddr_t)m->m_data, xmit_pa))) { 907 panic("xennet_start: no pa"); 908 } 909 *offset = 0; 910 break; 911 } 912 *offset += (*xmit_pa & ~PTE_FRAME); 913 *xmit_pa = (*xmit_pa & PTE_FRAME); 914 } 915 916 static void 917 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti) 918 { 919 struct ifnet *ifp = &xneti->xni_if; 920 struct mbuf *m, *new_m; 921 paddr_t xmit_pa; 922 paddr_t xmit_ma; 923 int i, j; 924 netif_rx_response_t *rxresp; 925 netif_rx_request_t rxreq; 926 RING_IDX req_prod, resp_prod; 927 int do_event = 0; 928 gnttab_copy_t *gop; 929 int id, offset; 930 bool abort; 931 932 XENPRINTF(("xennetback_ifsoftstart_copy ")); 933 int s = splnet(); 934 if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) { 935 splx(s); 936 return; 937 } 938 939 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 940 XENPRINTF(("pkt\n")); 941 req_prod = xneti->xni_rxring.sring->req_prod; 942 resp_prod = xneti->xni_rxring.rsp_prod_pvt; 943 xen_rmb(); 944 945 gop = xstart_gop_copy; 946 abort = false; 947 for (i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) { 948 XENPRINTF(("have a packet\n")); 949 IFQ_POLL(&ifp->if_snd, m); 950 if (__predict_false(m == NULL)) 951 panic("xennetback_ifstart: IFQ_POLL"); 952 if (__predict_false( 953 req_prod == xneti->xni_rxring.req_cons || 954 xneti->xni_rxring.req_cons - resp_prod == 955 NET_RX_RING_SIZE)) { 956 /* out of ring space */ 957 XENPRINTF(("xennetback_ifstart: ring full " 958 "req_prod 0x%x req_cons 0x%x resp_prod " 959 "0x%x\n", 960 req_prod, xneti->xni_rxring.req_cons, 961 resp_prod)); 962 abort = true; 963 break; 964 } 965 if (__predict_false(i == NB_XMIT_PAGES_BATCH)) 966 break; /* we filled the array */ 967 968 xennetback_mbuf_addr(m, &xmit_pa, &offset); 969 if (m->m_pkthdr.len != m->m_len || 970 (offset + m->m_pkthdr.len) > PAGE_SIZE) { 971 new_m = xennetback_copymbuf(m); 972 if (__predict_false(new_m == NULL)) { 973 static struct timeval lasttime; 974 if (ratecheck(&lasttime, &xni_pool_errintvl)) 975 printf("%s: cannot allocate new mbuf\n", 976 ifp->if_xname); 977 abort = 1; 978 break; 979 } else { 980 IFQ_DEQUEUE(&ifp->if_snd, m); 981 m_freem(m); 982 m = new_m; 983 xennetback_mbuf_addr(m, 984 &xmit_pa, &offset); 985 } 986 } else { 987 IFQ_DEQUEUE(&ifp->if_snd, m); 988 } 989 990 KASSERT(xmit_pa != POOL_PADDR_INVALID); 991 KASSERT((offset + m->m_pkthdr.len) <= PAGE_SIZE); 992 xmit_ma = xpmap_ptom(xmit_pa); 993 /* start filling ring */ 994 gop->flags = GNTCOPY_dest_gref; 995 gop->source.offset = offset; 996 gop->source.domid = DOMID_SELF; 997 gop->source.u.gmfn = xmit_ma >> PAGE_SHIFT; 998 999 RING_COPY_REQUEST(&xneti->xni_rxring, 1000 xneti->xni_rxring.req_cons, &rxreq); 1001 gop->dest.u.ref = rxreq.gref; 1002 gop->dest.offset = 0; 1003 gop->dest.domid = xneti->xni_domid; 1004 1005 gop->len = m->m_pkthdr.len; 1006 gop++; 1007 1008 id = rxreq.id; 1009 xen_rmb(); 1010 xneti->xni_rxring.req_cons++; 1011 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, 1012 resp_prod); 1013 rxresp->id = id; 1014 rxresp->offset = 0; 1015 rxresp->status = m->m_pkthdr.len; 1016 if ((m->m_pkthdr.csum_flags & 1017 XN_M_CSUM_SUPPORTED) != 0) { 1018 rxresp->flags = NETRXF_csum_blank; 1019 } else { 1020 rxresp->flags = NETRXF_data_validated; 1021 } 1022 1023 mbufs_sent[i] = m; 1024 resp_prod++; 1025 i++; /* this packet has been queued */ 1026 if_statinc(ifp, if_opackets); 1027 bpf_mtap(ifp, m, BPF_D_OUT); 1028 } 1029 if (i != 0) { 1030 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1031 xstart_gop_copy, i) != 0) { 1032 panic("%s: GNTTABOP_copy failed", 1033 ifp->if_xname); 1034 } 1035 1036 for (j = 0; j < i; j++) { 1037 if (xstart_gop_copy[j].status != GNTST_okay) { 1038 printf("%s GNTTABOP_copy[%d] %d\n", 1039 ifp->if_xname, 1040 j, xstart_gop_copy[j].status); 1041 printf("%s: req_prod %u req_cons " 1042 "%u rsp_prod %u rsp_prod_pvt %u " 1043 "i %d\n", 1044 ifp->if_xname, 1045 xneti->xni_rxring.sring->req_prod, 1046 xneti->xni_rxring.req_cons, 1047 xneti->xni_rxring.sring->rsp_prod, 1048 xneti->xni_rxring.rsp_prod_pvt, 1049 i); 1050 rxresp = RING_GET_RESPONSE( 1051 &xneti->xni_rxring, 1052 xneti->xni_rxring.rsp_prod_pvt + j); 1053 rxresp->status = NETIF_RSP_ERROR; 1054 } 1055 } 1056 1057 /* update pointer */ 1058 KASSERT( 1059 xneti->xni_rxring.rsp_prod_pvt + i == resp_prod); 1060 xneti->xni_rxring.rsp_prod_pvt = resp_prod; 1061 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( 1062 &xneti->xni_rxring, j); 1063 if (j) 1064 do_event = 1; 1065 /* now we can free the mbufs */ 1066 for (j = 0; j < i; j++) { 1067 m_freem(mbufs_sent[j]); 1068 } 1069 } 1070 /* send event */ 1071 if (do_event) { 1072 xen_rmb(); 1073 XENPRINTF(("%s receive event\n", 1074 xneti->xni_if.if_xname)); 1075 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 1076 do_event = 0; 1077 } 1078 /* 1079 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS() 1080 * here, as the frontend doesn't notify when adding 1081 * requests anyway 1082 */ 1083 if (__predict_false(abort || 1084 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) { 1085 /* ring full */ 1086 ifp->if_timer = 1; 1087 break; 1088 } 1089 } 1090 splx(s); 1091 } 1092 1093 static void 1094 xennetback_ifwatchdog(struct ifnet * ifp) 1095 { 1096 /* 1097 * We can get to the following condition: transmit stalls because the 1098 * ring is full when the ifq is full too. 1099 * 1100 * In this case (as, unfortunately, we don't get an interrupt from xen 1101 * on transmit) nothing will ever call xennetback_ifstart() again. 1102 * Here we abuse the watchdog to get out of this condition. 1103 */ 1104 XENPRINTF(("xennetback_ifwatchdog\n")); 1105 xennetback_ifstart(ifp); 1106 } 1107 1108 static int 1109 xennetback_ifinit(struct ifnet *ifp) 1110 { 1111 struct xnetback_instance *xneti = ifp->if_softc; 1112 int s = splnet(); 1113 1114 if ((ifp->if_flags & IFF_UP) == 0) { 1115 splx(s); 1116 return 0; 1117 } 1118 if (xneti->xni_status == CONNECTED) 1119 ifp->if_flags |= IFF_RUNNING; 1120 splx(s); 1121 return 0; 1122 } 1123 1124 static void 1125 xennetback_ifstop(struct ifnet *ifp, int disable) 1126 { 1127 struct xnetback_instance *xneti = ifp->if_softc; 1128 int s = splnet(); 1129 1130 ifp->if_flags &= ~IFF_RUNNING; 1131 ifp->if_timer = 0; 1132 if (xneti->xni_status == CONNECTED) { 1133 XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x " 1134 "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod, 1135 xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons, 1136 xneti->xni_txring->event)); 1137 xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */ 1138 } 1139 splx(s); 1140 } 1141