1 /* $NetBSD: xennetback_xenbus.c,v 1.38 2011/01/18 21:34:31 jym Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include "opt_xen.h" 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/queue.h> 35 #include <sys/kernel.h> 36 #include <sys/mbuf.h> 37 #include <sys/protosw.h> 38 #include <sys/socket.h> 39 #include <sys/ioctl.h> 40 #include <sys/errno.h> 41 #include <sys/device.h> 42 #include <sys/intr.h> 43 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/if_dl.h> 47 #include <net/route.h> 48 #include <net/netisr.h> 49 #include <net/bpf.h> 50 #include <net/bpfdesc.h> 51 52 #include <net/if_ether.h> 53 54 55 #include <xen/xen.h> 56 #include <xen/xen_shm.h> 57 #include <xen/evtchn.h> 58 #include <xen/xenbus.h> 59 #include <xen/xennet_checksum.h> 60 61 #include <uvm/uvm.h> 62 63 #ifdef XENDEBUG_NET 64 #define XENPRINTF(x) printf x 65 #else 66 #define XENPRINTF(x) 67 #endif 68 69 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 70 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 71 72 /* linux wants at last 16 bytes free in front of the packet */ 73 #define LINUX_REQUESTED_OFFSET 16 74 75 /* hash list for TX requests */ 76 /* descriptor of a packet being handled by the kernel */ 77 struct xni_pkt { 78 int pkt_id; /* packet's ID */ 79 grant_handle_t pkt_handle; 80 struct xnetback_instance *pkt_xneti; /* pointer back to our softc */ 81 }; 82 83 static inline void xni_pkt_unmap(struct xni_pkt *, vaddr_t); 84 85 86 /* pools for xni_pkt */ 87 struct pool xni_pkt_pool; 88 /* ratecheck(9) for pool allocation failures */ 89 struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */ 90 /* 91 * Backend network device driver for Xen 92 */ 93 94 /* state of a xnetback instance */ 95 typedef enum {CONNECTED, DISCONNECTING, DISCONNECTED} xnetback_state_t; 96 97 /* we keep the xnetback instances in a linked list */ 98 struct xnetback_instance { 99 SLIST_ENTRY(xnetback_instance) next; 100 struct xenbus_device *xni_xbusd; /* our xenstore entry */ 101 domid_t xni_domid; /* attached to this domain */ 102 uint32_t xni_handle; /* domain-specific handle */ 103 xnetback_state_t xni_status; 104 void *xni_softintr; 105 106 /* network interface stuff */ 107 struct ethercom xni_ec; 108 struct callout xni_restart; 109 uint8_t xni_enaddr[ETHER_ADDR_LEN]; 110 111 /* remote domain communication stuff */ 112 unsigned int xni_evtchn; /* our even channel */ 113 netif_tx_back_ring_t xni_txring; 114 netif_rx_back_ring_t xni_rxring; 115 grant_handle_t xni_tx_ring_handle; /* to unmap the ring */ 116 grant_handle_t xni_rx_ring_handle; 117 vaddr_t xni_tx_ring_va; /* to unmap the ring */ 118 vaddr_t xni_rx_ring_va; 119 }; 120 #define xni_if xni_ec.ec_if 121 #define xni_bpf xni_if.if_bpf 122 123 void xvifattach(int); 124 static int xennetback_ifioctl(struct ifnet *, u_long, void *); 125 static void xennetback_ifstart(struct ifnet *); 126 static void xennetback_ifsoftstart_transfer(void *); 127 static void xennetback_ifsoftstart_copy(void *); 128 static void xennetback_ifwatchdog(struct ifnet *); 129 static int xennetback_ifinit(struct ifnet *); 130 static void xennetback_ifstop(struct ifnet *, int); 131 132 static int xennetback_xenbus_create(struct xenbus_device *); 133 static int xennetback_xenbus_destroy(void *); 134 static void xennetback_frontend_changed(void *, XenbusState); 135 136 static inline void xennetback_tx_response(struct xnetback_instance *, 137 int, int); 138 static void xennetback_tx_free(struct mbuf * , void *, size_t, void *); 139 140 SLIST_HEAD(, xnetback_instance) xnetback_instances; 141 142 static struct xnetback_instance *xnetif_lookup(domid_t, uint32_t); 143 static int xennetback_evthandler(void *); 144 145 static struct xenbus_backend_driver xvif_backend_driver = { 146 .xbakd_create = xennetback_xenbus_create, 147 .xbakd_type = "vif" 148 }; 149 150 /* 151 * Number of packets to transmit in one hypercall (= number of pages to 152 * transmit at once). 153 */ 154 #define NB_XMIT_PAGES_BATCH 64 155 /* 156 * We will transfer a mapped page to the remote domain, and remap another 157 * page in place immediately. For this we keep a list of pages available. 158 * When the list is empty, we ask the hypervisor to give us 159 * NB_XMIT_PAGES_BATCH pages back. 160 */ 161 static unsigned long mcl_pages[NB_XMIT_PAGES_BATCH]; /* our physical pages */ 162 int mcl_pages_alloc; /* current index in mcl_pages */ 163 static int xennetback_get_mcl_page(paddr_t *); 164 static void xennetback_get_new_mcl_pages(void); 165 /* 166 * If we can't transfer the mbuf directly, we have to copy it to a page which 167 * will be transferred to the remote domain. We use a pool_cache 168 * for this, or the mbuf cluster pool cache if MCLBYTES == PAGE_SIZE 169 */ 170 #if MCLBYTES != PAGE_SIZE 171 pool_cache_t xmit_pages_cache; 172 #endif 173 pool_cache_t xmit_pages_cachep; 174 175 /* arrays used in xennetback_ifstart(), too large to allocate on stack */ 176 static mmu_update_t xstart_mmu[NB_XMIT_PAGES_BATCH]; 177 static multicall_entry_t xstart_mcl[NB_XMIT_PAGES_BATCH + 1]; 178 static gnttab_transfer_t xstart_gop_transfer[NB_XMIT_PAGES_BATCH]; 179 static gnttab_copy_t xstart_gop_copy[NB_XMIT_PAGES_BATCH]; 180 struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH]; 181 struct _pages_pool_free { 182 vaddr_t va; 183 paddr_t pa; 184 } pages_pool_free[NB_XMIT_PAGES_BATCH]; 185 186 187 static inline void 188 xni_pkt_unmap(struct xni_pkt *pkt, vaddr_t pkt_va) 189 { 190 xen_shm_unmap(pkt_va, 1, &pkt->pkt_handle); 191 pool_put(&xni_pkt_pool, pkt); 192 } 193 194 void 195 xvifattach(int n) 196 { 197 int i; 198 struct pglist mlist; 199 struct vm_page *pg; 200 201 XENPRINTF(("xennetback_init\n")); 202 203 /* 204 * steal some non-managed pages to the VM system, to replace 205 * mbuf cluster or xmit_pages_pool pages given to foreign domains. 206 */ 207 if (uvm_pglistalloc(PAGE_SIZE * NB_XMIT_PAGES_BATCH, 0, 0xffffffff, 208 0, 0, &mlist, NB_XMIT_PAGES_BATCH, 0) != 0) 209 panic("xennetback_init: uvm_pglistalloc"); 210 for (i = 0, pg = mlist.tqh_first; pg != NULL; 211 pg = pg->pageq.queue.tqe_next, i++) 212 mcl_pages[i] = xpmap_ptom(VM_PAGE_TO_PHYS(pg)) >> PAGE_SHIFT; 213 if (i != NB_XMIT_PAGES_BATCH) 214 panic("xennetback_init: %d mcl pages", i); 215 mcl_pages_alloc = NB_XMIT_PAGES_BATCH - 1; 216 217 /* initialise pools */ 218 pool_init(&xni_pkt_pool, sizeof(struct xni_pkt), 0, 0, 0, 219 "xnbpkt", NULL, IPL_VM); 220 #if MCLBYTES != PAGE_SIZE 221 xmit_pages_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, "xnbxm", NULL, 222 IPL_VM, NULL, NULL, NULL); 223 xmit_pages_cachep = xmit_pages_cache; 224 #else 225 xmit_pages_cachep = mcl_cache; 226 #endif 227 228 SLIST_INIT(&xnetback_instances); 229 xenbus_backend_register(&xvif_backend_driver); 230 } 231 232 static int 233 xennetback_xenbus_create(struct xenbus_device *xbusd) 234 { 235 struct xnetback_instance *xneti; 236 long domid, handle; 237 struct ifnet *ifp; 238 extern int ifqmaxlen; /* XXX */ 239 char *val, *e, *p; 240 int i, err; 241 struct xenbus_transaction *xbt; 242 243 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 244 "frontend-id", &domid, 10)) != 0) { 245 aprint_error("xvif: can't read %s/frontend-id: %d\n", 246 xbusd->xbusd_path, err); 247 return err; 248 } 249 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path, 250 "handle", &handle, 10)) != 0) { 251 aprint_error("xvif: can't read %s/handle: %d\n", 252 xbusd->xbusd_path, err); 253 return err; 254 } 255 256 if (xnetif_lookup(domid, handle) != NULL) { 257 return EEXIST; 258 } 259 xneti = malloc(sizeof(struct xnetback_instance), M_DEVBUF, 260 M_NOWAIT | M_ZERO); 261 if (xneti == NULL) { 262 return ENOMEM; 263 } 264 xneti->xni_domid = domid; 265 xneti->xni_handle = handle; 266 xneti->xni_status = DISCONNECTED; 267 268 xbusd->xbusd_u.b.b_cookie = xneti; 269 xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy; 270 xneti->xni_xbusd = xbusd; 271 272 ifp = &xneti->xni_if; 273 ifp->if_softc = xneti; 274 snprintf(ifp->if_xname, IFNAMSIZ, "xvif%d.%d", 275 (int)domid, (int)handle); 276 277 /* read mac address */ 278 if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) { 279 aprint_error_ifnet(ifp, "can't read %s/mac: %d\n", 280 xbusd->xbusd_path, err); 281 goto fail; 282 } 283 for (i = 0, p = val; i < 6; i++) { 284 xneti->xni_enaddr[i] = strtoul(p, &e, 16); 285 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 286 aprint_error_ifnet(ifp, 287 "%s is not a valid mac address\n", val); 288 err = EINVAL; 289 goto fail; 290 } 291 p = &e[1]; 292 } 293 free(val, M_DEVBUF); 294 295 /* we can't use the same MAC addr as our guest */ 296 xneti->xni_enaddr[3]++; 297 /* create pseudo-interface */ 298 aprint_verbose_ifnet(ifp, "Ethernet address %s\n", 299 ether_sprintf(xneti->xni_enaddr)); 300 ifp->if_flags = 301 IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST; 302 ifp->if_snd.ifq_maxlen = 303 max(ifqmaxlen, NET_TX_RING_SIZE * 2); 304 ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx; 305 ifp->if_ioctl = xennetback_ifioctl; 306 ifp->if_start = xennetback_ifstart; 307 ifp->if_watchdog = xennetback_ifwatchdog; 308 ifp->if_init = xennetback_ifinit; 309 ifp->if_stop = xennetback_ifstop; 310 ifp->if_timer = 0; 311 IFQ_SET_READY(&ifp->if_snd); 312 if_attach(ifp); 313 ether_ifattach(&xneti->xni_if, xneti->xni_enaddr); 314 315 SLIST_INSERT_HEAD(&xnetback_instances, xneti, next); 316 317 xbusd->xbusd_otherend_changed = xennetback_frontend_changed; 318 319 do { 320 xbt = xenbus_transaction_start(); 321 if (xbt == NULL) { 322 aprint_error_ifnet(ifp, 323 "%s: can't start transaction\n", 324 xbusd->xbusd_path); 325 goto fail; 326 } 327 err = xenbus_printf(xbt, xbusd->xbusd_path, 328 "vifname", ifp->if_xname); 329 if (err) { 330 aprint_error_ifnet(ifp, 331 "failed to write %s/vifname: %d\n", 332 xbusd->xbusd_path, err); 333 goto abort_xbt; 334 } 335 err = xenbus_printf(xbt, xbusd->xbusd_path, 336 "feature-rx-copy", "%d", 1); 337 if (err) { 338 aprint_error_ifnet(ifp, 339 "failed to write %s/feature-rx-copy: %d\n", 340 xbusd->xbusd_path, err); 341 goto abort_xbt; 342 } 343 err = xenbus_printf(xbt, xbusd->xbusd_path, 344 "feature-rx-flip", "%d", 1); 345 if (err) { 346 aprint_error_ifnet(ifp, 347 "failed to write %s/feature-rx-flip: %d\n", 348 xbusd->xbusd_path, err); 349 goto abort_xbt; 350 } 351 } while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN); 352 if (err) { 353 aprint_error_ifnet(ifp, 354 "%s: can't end transaction: %d\n", 355 xbusd->xbusd_path, err); 356 } 357 358 err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 359 if (err) { 360 aprint_error_ifnet(ifp, 361 "failed to switch state on %s: %d\n", 362 xbusd->xbusd_path, err); 363 goto fail; 364 } 365 return 0; 366 abort_xbt: 367 xenbus_transaction_end(xbt, 1); 368 fail: 369 free(xneti, M_DEVBUF); 370 return err; 371 } 372 373 int 374 xennetback_xenbus_destroy(void *arg) 375 { 376 struct xnetback_instance *xneti = arg; 377 struct gnttab_unmap_grant_ref op; 378 int err; 379 380 #if 0 381 if (xneti->xni_status == CONNECTED) { 382 return EBUSY; 383 } 384 #endif 385 aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n"); 386 hypervisor_mask_event(xneti->xni_evtchn); 387 event_remove_handler(xneti->xni_evtchn, xennetback_evthandler, xneti); 388 if (xneti->xni_softintr) { 389 softint_disestablish(xneti->xni_softintr); 390 xneti->xni_softintr = NULL; 391 } 392 393 SLIST_REMOVE(&xnetback_instances, 394 xneti, xnetback_instance, next); 395 396 ether_ifdetach(&xneti->xni_if); 397 if_detach(&xneti->xni_if); 398 399 if (xneti->xni_txring.sring) { 400 op.host_addr = xneti->xni_tx_ring_va; 401 op.handle = xneti->xni_tx_ring_handle; 402 op.dev_bus_addr = 0; 403 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 404 &op, 1); 405 if (err) 406 aprint_error_ifnet(&xneti->xni_if, 407 "unmap_grant_ref failed: %d\n", err); 408 } 409 if (xneti->xni_rxring.sring) { 410 op.host_addr = xneti->xni_rx_ring_va; 411 op.handle = xneti->xni_rx_ring_handle; 412 op.dev_bus_addr = 0; 413 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 414 &op, 1); 415 if (err) 416 aprint_error_ifnet(&xneti->xni_if, 417 "unmap_grant_ref failed: %d\n", err); 418 } 419 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 420 PAGE_SIZE, UVM_KMF_VAONLY); 421 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 422 PAGE_SIZE, UVM_KMF_VAONLY); 423 free(xneti, M_DEVBUF); 424 return 0; 425 } 426 427 static void 428 xennetback_frontend_changed(void *arg, XenbusState new_state) 429 { 430 struct xnetback_instance *xneti = arg; 431 struct xenbus_device *xbusd = xneti->xni_xbusd; 432 int err; 433 netif_tx_sring_t *tx_ring; 434 netif_rx_sring_t *rx_ring; 435 struct gnttab_map_grant_ref op; 436 evtchn_op_t evop; 437 u_long tx_ring_ref, rx_ring_ref; 438 u_long revtchn, rx_copy; 439 440 XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state)); 441 switch(new_state) { 442 case XenbusStateInitialising: 443 case XenbusStateInitialised: 444 break; 445 446 case XenbusStateConnected: 447 /* read comunication informations */ 448 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 449 "tx-ring-ref", &tx_ring_ref, 10); 450 if (err) { 451 xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref", 452 xbusd->xbusd_otherend); 453 break; 454 } 455 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 456 "rx-ring-ref", &rx_ring_ref, 10); 457 if (err) { 458 xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref", 459 xbusd->xbusd_otherend); 460 break; 461 } 462 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 463 "event-channel", &revtchn, 10); 464 if (err) { 465 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 466 xbusd->xbusd_otherend); 467 break; 468 } 469 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 470 "request-rx-copy", &rx_copy, 10); 471 if (err == ENOENT) 472 rx_copy = 0; 473 else if (err) { 474 xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy", 475 xbusd->xbusd_otherend); 476 break; 477 } 478 479 if (rx_copy) 480 xneti->xni_softintr = softint_establish(SOFTINT_NET, 481 xennetback_ifsoftstart_copy, xneti); 482 else 483 xneti->xni_softintr = softint_establish(SOFTINT_NET, 484 xennetback_ifsoftstart_transfer, xneti); 485 if (xneti->xni_softintr == NULL) { 486 err = ENOMEM; 487 xenbus_dev_fatal(xbusd, ENOMEM, 488 "can't allocate softint", xbusd->xbusd_otherend); 489 break; 490 } 491 492 /* allocate VA space and map rings */ 493 xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 494 UVM_KMF_VAONLY); 495 if (xneti->xni_tx_ring_va == 0) { 496 xenbus_dev_fatal(xbusd, ENOMEM, 497 "can't get VA for tx ring", xbusd->xbusd_otherend); 498 break; 499 } 500 tx_ring = (void *)xneti->xni_tx_ring_va; 501 xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 502 UVM_KMF_VAONLY); 503 if (xneti->xni_rx_ring_va == 0) { 504 xenbus_dev_fatal(xbusd, ENOMEM, 505 "can't get VA for rx ring", xbusd->xbusd_otherend); 506 goto err1; 507 } 508 rx_ring = (void *)xneti->xni_rx_ring_va; 509 op.host_addr = xneti->xni_tx_ring_va; 510 op.flags = GNTMAP_host_map; 511 op.ref = tx_ring_ref; 512 op.dom = xneti->xni_domid; 513 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 514 if (err || op.status) { 515 printf("%s: can't map TX grant ref: %d/%d\n", 516 xneti->xni_if.if_xname, err, op.status); 517 goto err2; 518 } 519 xneti->xni_tx_ring_handle = op.handle; 520 521 op.host_addr = xneti->xni_rx_ring_va; 522 op.flags = GNTMAP_host_map; 523 op.ref = rx_ring_ref; 524 op.dom = xneti->xni_domid; 525 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); 526 if (err || op.status) { 527 printf("%s: can't map RX grant ref: %d/%d\n", 528 xneti->xni_if.if_xname, err, op.status); 529 goto err2; 530 } 531 xneti->xni_rx_ring_handle = op.handle; 532 BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE); 533 BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE); 534 evop.cmd = EVTCHNOP_bind_interdomain; 535 evop.u.bind_interdomain.remote_dom = xneti->xni_domid; 536 evop.u.bind_interdomain.remote_port = revtchn; 537 err = HYPERVISOR_event_channel_op(&evop); 538 if (err) { 539 printf("%s: can't get event channel: %d\n", 540 xneti->xni_if.if_xname, err); 541 goto err2; 542 } 543 xneti->xni_evtchn = evop.u.bind_interdomain.local_port; 544 xen_wmb(); 545 xneti->xni_status = CONNECTED; 546 xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 547 xen_wmb(); 548 event_set_handler(xneti->xni_evtchn, xennetback_evthandler, 549 xneti, IPL_NET, xneti->xni_if.if_xname); 550 xennetback_ifinit(&xneti->xni_if); 551 hypervisor_enable_event(xneti->xni_evtchn); 552 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 553 break; 554 555 case XenbusStateClosing: 556 xneti->xni_status = DISCONNECTING; 557 xneti->xni_if.if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 558 xneti->xni_if.if_timer = 0; 559 xenbus_switch_state(xbusd, NULL, XenbusStateClosing); 560 break; 561 562 case XenbusStateClosed: 563 /* otherend_changed() should handle it for us */ 564 panic("xennetback_frontend_changed: closed\n"); 565 case XenbusStateUnknown: 566 case XenbusStateInitWait: 567 default: 568 aprint_error("%s: invalid frontend state %d\n", 569 xneti->xni_if.if_xname, new_state); 570 break; 571 } 572 return; 573 err2: 574 uvm_km_free(kernel_map, xneti->xni_rx_ring_va, 575 PAGE_SIZE, UVM_KMF_VAONLY); 576 err1: 577 uvm_km_free(kernel_map, xneti->xni_tx_ring_va, 578 PAGE_SIZE, UVM_KMF_VAONLY); 579 } 580 581 /* lookup a xneti based on domain id and interface handle */ 582 static struct xnetback_instance * 583 xnetif_lookup(domid_t dom , uint32_t handle) 584 { 585 struct xnetback_instance *xneti; 586 587 SLIST_FOREACH(xneti, &xnetback_instances, next) { 588 if (xneti->xni_domid == dom && xneti->xni_handle == handle) 589 return xneti; 590 } 591 return NULL; 592 } 593 594 595 /* get a page to remplace a mbuf cluster page given to a domain */ 596 static int 597 xennetback_get_mcl_page(paddr_t *map) 598 { 599 if (mcl_pages_alloc < 0) 600 /* 601 * we exhausted our allocation. We can't allocate new ones yet 602 * because the current pages may not have been loaned to 603 * the remote domain yet. We have to let the caller do this. 604 */ 605 return -1; 606 607 *map = ((paddr_t)mcl_pages[mcl_pages_alloc]) << PAGE_SHIFT; 608 mcl_pages_alloc--; 609 return 0; 610 611 } 612 613 static void 614 xennetback_get_new_mcl_pages(void) 615 { 616 int nb_pages; 617 struct xen_memory_reservation res; 618 619 /* get some new pages. */ 620 xenguest_handle(res.extent_start) = mcl_pages; 621 res.nr_extents = NB_XMIT_PAGES_BATCH; 622 res.extent_order = 0; 623 res.address_bits = 0; 624 res.domid = DOMID_SELF; 625 626 nb_pages = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res); 627 if (nb_pages <= 0) { 628 printf("xennetback: can't get new mcl pages (%d)\n", nb_pages); 629 return; 630 } 631 if (nb_pages != NB_XMIT_PAGES_BATCH) 632 printf("xennetback: got only %d new mcl pages\n", nb_pages); 633 634 mcl_pages_alloc = nb_pages - 1; 635 } 636 637 static inline void 638 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status) 639 { 640 RING_IDX resp_prod; 641 netif_tx_response_t *txresp; 642 int do_event; 643 644 resp_prod = xneti->xni_txring.rsp_prod_pvt; 645 txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod); 646 647 txresp->id = id; 648 txresp->status = status; 649 xneti->xni_txring.rsp_prod_pvt++; 650 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event); 651 if (do_event) { 652 XENPRINTF(("%s send event\n", xneti->xni_if.if_xname)); 653 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 654 } 655 } 656 657 static int 658 xennetback_evthandler(void *arg) 659 { 660 struct xnetback_instance *xneti = arg; 661 struct ifnet *ifp = &xneti->xni_if; 662 netif_tx_request_t *txreq; 663 struct xni_pkt *pkt; 664 vaddr_t pkt_va; 665 struct mbuf *m; 666 int receive_pending, err; 667 RING_IDX req_cons; 668 669 XENPRINTF(("xennetback_evthandler ")); 670 req_cons = xneti->xni_txring.req_cons; 671 xen_rmb(); 672 while (1) { 673 xen_rmb(); /* be sure to read the request before updating */ 674 xneti->xni_txring.req_cons = req_cons; 675 xen_wmb(); 676 RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, 677 receive_pending); 678 if (receive_pending == 0) 679 break; 680 txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons); 681 xen_rmb(); 682 XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname, 683 txreq->size)); 684 req_cons++; 685 if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != 686 (IFF_UP | IFF_RUNNING))) { 687 /* interface not up, drop */ 688 xennetback_tx_response(xneti, txreq->id, 689 NETIF_RSP_DROPPED); 690 continue; 691 } 692 /* 693 * Do some sanity checks, and map the packet's page. 694 */ 695 if (__predict_false(txreq->size < ETHER_HDR_LEN || 696 txreq->size > (ETHER_MAX_LEN - ETHER_CRC_LEN))) { 697 printf("%s: packet size %d too big\n", 698 ifp->if_xname, txreq->size); 699 xennetback_tx_response(xneti, txreq->id, 700 NETIF_RSP_ERROR); 701 ifp->if_ierrors++; 702 continue; 703 } 704 /* don't cross page boundaries */ 705 if (__predict_false( 706 txreq->offset + txreq->size > PAGE_SIZE)) { 707 printf("%s: packet cross page boundary\n", 708 ifp->if_xname); 709 xennetback_tx_response(xneti, txreq->id, 710 NETIF_RSP_ERROR); 711 ifp->if_ierrors++; 712 continue; 713 } 714 /* get a mbuf for this packet */ 715 MGETHDR(m, M_DONTWAIT, MT_DATA); 716 if (__predict_false(m == NULL)) { 717 static struct timeval lasttime; 718 if (ratecheck(&lasttime, &xni_pool_errintvl)) 719 printf("%s: mbuf alloc failed\n", 720 ifp->if_xname); 721 xennetback_tx_response(xneti, txreq->id, 722 NETIF_RSP_DROPPED); 723 ifp->if_ierrors++; 724 continue; 725 } 726 727 XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n", 728 xneti->xni_if.if_xname, txreq->offset, 729 txreq->size, txreq->id, MASK_NETIF_TX_IDX(req_cons))); 730 731 pkt = pool_get(&xni_pkt_pool, PR_NOWAIT); 732 if (__predict_false(pkt == NULL)) { 733 static struct timeval lasttime; 734 if (ratecheck(&lasttime, &xni_pool_errintvl)) 735 printf("%s: xnbpkt alloc failed\n", 736 ifp->if_xname); 737 xennetback_tx_response(xneti, txreq->id, 738 NETIF_RSP_DROPPED); 739 ifp->if_ierrors++; 740 m_freem(m); 741 continue; 742 } 743 err = xen_shm_map(1, xneti->xni_domid, &txreq->gref, &pkt_va, 744 &pkt->pkt_handle, XSHM_RO); 745 if (__predict_false(err == ENOMEM)) { 746 xennetback_tx_response(xneti, txreq->id, 747 NETIF_RSP_DROPPED); 748 ifp->if_ierrors++; 749 pool_put(&xni_pkt_pool, pkt); 750 m_freem(m); 751 continue; 752 } 753 754 if (__predict_false(err)) { 755 printf("%s: mapping foreing page failed: %d\n", 756 xneti->xni_if.if_xname, err); 757 xennetback_tx_response(xneti, txreq->id, 758 NETIF_RSP_ERROR); 759 ifp->if_ierrors++; 760 pool_put(&xni_pkt_pool, pkt); 761 m_freem(m); 762 continue; 763 } 764 765 if ((ifp->if_flags & IFF_PROMISC) == 0) { 766 struct ether_header *eh = 767 (void*)(pkt_va + txreq->offset); 768 if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 && 769 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost, 770 ETHER_ADDR_LEN) != 0) { 771 xni_pkt_unmap(pkt, pkt_va); 772 m_freem(m); 773 xennetback_tx_response(xneti, txreq->id, 774 NETIF_RSP_OKAY); 775 continue; /* packet is not for us */ 776 } 777 } 778 #ifdef notyet 779 a lot of work is needed in the tcp stack to handle read-only ext storage 780 so always copy for now. 781 if (((req_cons + 1) & (NET_TX_RING_SIZE - 1)) == 782 (xneti->xni_txring.rsp_prod_pvt & (NET_TX_RING_SIZE - 1))) 783 #else 784 if (1) 785 #endif /* notyet */ 786 { 787 /* 788 * This is the last TX buffer. Copy the data and 789 * ack it. Delaying it until the mbuf is 790 * freed will stall transmit. 791 */ 792 m->m_len = min(MHLEN, txreq->size); 793 m->m_pkthdr.len = 0; 794 m_copyback(m, 0, txreq->size, 795 (void *)(pkt_va + txreq->offset)); 796 xni_pkt_unmap(pkt, pkt_va); 797 if (m->m_pkthdr.len < txreq->size) { 798 ifp->if_ierrors++; 799 m_freem(m); 800 xennetback_tx_response(xneti, txreq->id, 801 NETIF_RSP_DROPPED); 802 continue; 803 } 804 xennetback_tx_response(xneti, txreq->id, 805 NETIF_RSP_OKAY); 806 } else { 807 808 pkt->pkt_id = txreq->id; 809 pkt->pkt_xneti = xneti; 810 811 MEXTADD(m, pkt_va + txreq->offset, 812 txreq->size, M_DEVBUF, xennetback_tx_free, pkt); 813 m->m_pkthdr.len = m->m_len = txreq->size; 814 m->m_flags |= M_EXT_ROMAP; 815 } 816 if ((txreq->flags & NETTXF_csum_blank) != 0) { 817 xennet_checksum_fill(&m); 818 if (m == NULL) { 819 ifp->if_ierrors++; 820 continue; 821 } 822 } 823 m->m_pkthdr.rcvif = ifp; 824 ifp->if_ipackets++; 825 826 bpf_mtap(ifp, m); 827 (*ifp->if_input)(ifp, m); 828 } 829 xen_rmb(); /* be sure to read the request before updating pointer */ 830 xneti->xni_txring.req_cons = req_cons; 831 xen_wmb(); 832 /* check to see if we can transmit more packets */ 833 softint_schedule(xneti->xni_softintr); 834 835 return 1; 836 } 837 838 static void 839 xennetback_tx_free(struct mbuf *m, void *va, size_t size, void *arg) 840 { 841 int s = splnet(); 842 struct xni_pkt *pkt = arg; 843 struct xnetback_instance *xneti = pkt->pkt_xneti; 844 845 XENPRINTF(("xennetback_tx_free\n")); 846 847 xennetback_tx_response(xneti, pkt->pkt_id, NETIF_RSP_OKAY); 848 849 xni_pkt_unmap(pkt, (vaddr_t)va & ~PAGE_MASK); 850 851 if (m) 852 pool_cache_put(mb_cache, m); 853 splx(s); 854 } 855 856 static int 857 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data) 858 { 859 //struct xnetback_instance *xneti = ifp->if_softc; 860 //struct ifreq *ifr = (struct ifreq *)data; 861 int s, error; 862 863 s = splnet(); 864 error = ether_ioctl(ifp, cmd, data); 865 if (error == ENETRESET) 866 error = 0; 867 splx(s); 868 return error; 869 } 870 871 static void 872 xennetback_ifstart(struct ifnet *ifp) 873 { 874 struct xnetback_instance *xneti = ifp->if_softc; 875 876 /* 877 * The Xen communication channel is much more efficient if we can 878 * schedule batch of packets for the domain. To achieve this, we 879 * schedule a soft interrupt, and just return. This way, the network 880 * stack will enqueue all pending mbufs in the interface's send queue 881 * before it is processed by the soft inetrrupt handler(). 882 */ 883 softint_schedule(xneti->xni_softintr); 884 } 885 886 static void 887 xennetback_ifsoftstart_transfer(void *arg) 888 { 889 struct xnetback_instance *xneti = arg; 890 struct ifnet *ifp = &xneti->xni_if; 891 struct mbuf *m; 892 vaddr_t xmit_va; 893 paddr_t xmit_pa; 894 paddr_t xmit_ma; 895 paddr_t newp_ma = 0; /* XXX gcc */ 896 int i, j, nppitems; 897 mmu_update_t *mmup; 898 multicall_entry_t *mclp; 899 netif_rx_response_t *rxresp; 900 RING_IDX req_prod, resp_prod; 901 int do_event = 0; 902 gnttab_transfer_t *gop; 903 int id, offset; 904 905 XENPRINTF(("xennetback_ifsoftstart_transfer ")); 906 int s = splnet(); 907 if (__predict_false( 908 (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) { 909 splx(s); 910 return; 911 } 912 913 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 914 XENPRINTF(("pkt\n")); 915 req_prod = xneti->xni_rxring.sring->req_prod; 916 resp_prod = xneti->xni_rxring.rsp_prod_pvt; 917 xen_rmb(); 918 919 mmup = xstart_mmu; 920 mclp = xstart_mcl; 921 gop = xstart_gop_transfer; 922 for (nppitems = 0, i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) { 923 XENPRINTF(("have a packet\n")); 924 IFQ_POLL(&ifp->if_snd, m); 925 if (__predict_false(m == NULL)) 926 panic("xennetback_ifstart: IFQ_POLL"); 927 if (__predict_false( 928 req_prod == xneti->xni_rxring.req_cons || 929 xneti->xni_rxring.req_cons - resp_prod == 930 NET_RX_RING_SIZE)) { 931 /* out of ring space */ 932 XENPRINTF(("xennetback_ifstart: ring full " 933 "req_prod 0x%x req_cons 0x%x resp_prod " 934 "0x%x\n", 935 req_prod, xneti->xni_rxring.req_cons, 936 resp_prod)); 937 ifp->if_timer = 1; 938 break; 939 } 940 if (__predict_false(i == NB_XMIT_PAGES_BATCH)) 941 break; /* we filled the array */ 942 if (__predict_false( 943 xennetback_get_mcl_page(&newp_ma) != 0)) 944 break; /* out of memory */ 945 if ((m->m_flags & M_CLUSTER) != 0 && 946 !M_READONLY(m) && MCLBYTES == PAGE_SIZE) { 947 /* we can give this page away */ 948 xmit_pa = m->m_ext.ext_paddr; 949 xmit_ma = xpmap_ptom(xmit_pa); 950 xmit_va = (vaddr_t)m->m_ext.ext_buf; 951 KASSERT(xmit_pa != M_PADDR_INVALID); 952 KASSERT((xmit_va & PAGE_MASK) == 0); 953 offset = m->m_data - m->m_ext.ext_buf; 954 } else { 955 /* we have to copy the packet */ 956 xmit_va = (vaddr_t)pool_cache_get_paddr( 957 xmit_pages_cachep, 958 PR_NOWAIT, &xmit_pa); 959 if (__predict_false(xmit_va == 0)) 960 break; /* out of memory */ 961 962 KASSERT(xmit_pa != POOL_PADDR_INVALID); 963 xmit_ma = xpmap_ptom(xmit_pa); 964 XENPRINTF(("xennetback_get_xmit_page: got va " 965 "0x%x ma 0x%x\n", (u_int)xmit_va, 966 (u_int)xmit_ma)); 967 m_copydata(m, 0, m->m_pkthdr.len, 968 (char *)xmit_va + LINUX_REQUESTED_OFFSET); 969 offset = LINUX_REQUESTED_OFFSET; 970 pages_pool_free[nppitems].va = xmit_va; 971 pages_pool_free[nppitems].pa = xmit_pa; 972 nppitems++; 973 } 974 /* start filling ring */ 975 gop->ref = RING_GET_REQUEST(&xneti->xni_rxring, 976 xneti->xni_rxring.req_cons)->gref; 977 id = RING_GET_REQUEST(&xneti->xni_rxring, 978 xneti->xni_rxring.req_cons)->id; 979 xen_rmb(); 980 xneti->xni_rxring.req_cons++; 981 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, 982 resp_prod); 983 rxresp->id = id; 984 rxresp->offset = offset; 985 rxresp->status = m->m_pkthdr.len; 986 if ((m->m_pkthdr.csum_flags & 987 (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) { 988 rxresp->flags = NETRXF_csum_blank; 989 } else { 990 rxresp->flags = 0; 991 } 992 /* 993 * transfers the page containing the packet to the 994 * remote domain, and map newp in place. 995 */ 996 xpmap_phys_to_machine_mapping[ 997 (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT] = 998 newp_ma >> PAGE_SHIFT; 999 MULTI_update_va_mapping(mclp, xmit_va, 1000 newp_ma | PG_V | PG_RW | PG_U | PG_M, 0); 1001 mclp++; 1002 gop->mfn = xmit_ma >> PAGE_SHIFT; 1003 gop->domid = xneti->xni_domid; 1004 gop++; 1005 1006 mmup->ptr = newp_ma | MMU_MACHPHYS_UPDATE; 1007 mmup->val = (xmit_pa - XPMAP_OFFSET) >> PAGE_SHIFT; 1008 mmup++; 1009 1010 /* done with this packet */ 1011 IFQ_DEQUEUE(&ifp->if_snd, m); 1012 mbufs_sent[i] = m; 1013 resp_prod++; 1014 i++; /* this packet has been queued */ 1015 ifp->if_opackets++; 1016 bpf_mtap(ifp, m); 1017 } 1018 if (i != 0) { 1019 /* 1020 * We may have allocated buffers which have entries 1021 * outstanding in the page update queue -- make sure 1022 * we flush those first! 1023 */ 1024 int svm = splvm(); 1025 xpq_flush_queue(); 1026 splx(svm); 1027 mclp[-1].args[MULTI_UVMFLAGS_INDEX] = 1028 UVMF_TLB_FLUSH|UVMF_ALL; 1029 mclp->op = __HYPERVISOR_mmu_update; 1030 mclp->args[0] = (unsigned long)xstart_mmu; 1031 mclp->args[1] = i; 1032 mclp->args[2] = 0; 1033 mclp->args[3] = DOMID_SELF; 1034 mclp++; 1035 /* update the MMU */ 1036 if (HYPERVISOR_multicall(xstart_mcl, i + 1) != 0) { 1037 panic("%s: HYPERVISOR_multicall failed", 1038 ifp->if_xname); 1039 } 1040 for (j = 0; j < i + 1; j++) { 1041 if (xstart_mcl[j].result != 0) { 1042 printf("%s: xstart_mcl[%d] " 1043 "failed (%lu)\n", ifp->if_xname, 1044 j, xstart_mcl[j].result); 1045 printf("%s: req_prod %u req_cons " 1046 "%u rsp_prod %u rsp_prod_pvt %u " 1047 "i %u\n", 1048 ifp->if_xname, 1049 xneti->xni_rxring.sring->req_prod, 1050 xneti->xni_rxring.req_cons, 1051 xneti->xni_rxring.sring->rsp_prod, 1052 xneti->xni_rxring.rsp_prod_pvt, 1053 i); 1054 } 1055 } 1056 if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, 1057 xstart_gop_transfer, i) != 0) { 1058 panic("%s: GNTTABOP_transfer failed", 1059 ifp->if_xname); 1060 } 1061 1062 for (j = 0; j < i; j++) { 1063 if (xstart_gop_transfer[j].status != GNTST_okay) { 1064 printf("%s GNTTABOP_transfer[%d] %d\n", 1065 ifp->if_xname, 1066 j, xstart_gop_transfer[j].status); 1067 printf("%s: req_prod %u req_cons " 1068 "%u rsp_prod %u rsp_prod_pvt %u " 1069 "i %d\n", 1070 ifp->if_xname, 1071 xneti->xni_rxring.sring->req_prod, 1072 xneti->xni_rxring.req_cons, 1073 xneti->xni_rxring.sring->rsp_prod, 1074 xneti->xni_rxring.rsp_prod_pvt, 1075 i); 1076 rxresp = RING_GET_RESPONSE( 1077 &xneti->xni_rxring, 1078 xneti->xni_rxring.rsp_prod_pvt + j); 1079 rxresp->status = NETIF_RSP_ERROR; 1080 } 1081 } 1082 1083 /* update pointer */ 1084 KASSERT( 1085 xneti->xni_rxring.rsp_prod_pvt + i == resp_prod); 1086 xneti->xni_rxring.rsp_prod_pvt = resp_prod; 1087 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( 1088 &xneti->xni_rxring, j); 1089 if (j) 1090 do_event = 1; 1091 /* now we can free the mbufs */ 1092 for (j = 0; j < i; j++) { 1093 m_freem(mbufs_sent[j]); 1094 } 1095 for (j = 0; j < nppitems; j++) { 1096 pool_cache_put_paddr(xmit_pages_cachep, 1097 (void *)pages_pool_free[j].va, 1098 pages_pool_free[j].pa); 1099 } 1100 } 1101 /* send event */ 1102 if (do_event) { 1103 xen_rmb(); 1104 XENPRINTF(("%s receive event\n", 1105 xneti->xni_if.if_xname)); 1106 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 1107 do_event = 0; 1108 } 1109 /* check if we need to get back some pages */ 1110 if (mcl_pages_alloc < 0) { 1111 xennetback_get_new_mcl_pages(); 1112 if (mcl_pages_alloc < 0) { 1113 /* 1114 * setup the watchdog to try again, because 1115 * xennetback_ifstart() will never be called 1116 * again if queue is full. 1117 */ 1118 printf("xennetback_ifstart: no mcl_pages\n"); 1119 ifp->if_timer = 1; 1120 break; 1121 } 1122 } 1123 /* 1124 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS() 1125 * here, as the frontend doesn't notify when adding 1126 * requests anyway 1127 */ 1128 if (__predict_false( 1129 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) { 1130 /* ring full */ 1131 break; 1132 } 1133 } 1134 splx(s); 1135 } 1136 1137 static void 1138 xennetback_ifsoftstart_copy(void *arg) 1139 { 1140 struct xnetback_instance *xneti = arg; 1141 struct ifnet *ifp = &xneti->xni_if; 1142 struct mbuf *m, *new_m; 1143 paddr_t xmit_pa; 1144 paddr_t xmit_ma; 1145 int i, j; 1146 netif_rx_response_t *rxresp; 1147 RING_IDX req_prod, resp_prod; 1148 int do_event = 0; 1149 gnttab_copy_t *gop; 1150 int id, offset; 1151 1152 XENPRINTF(("xennetback_ifsoftstart_transfer ")); 1153 int s = splnet(); 1154 if (__predict_false( 1155 (ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)) { 1156 splx(s); 1157 return; 1158 } 1159 1160 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1161 XENPRINTF(("pkt\n")); 1162 req_prod = xneti->xni_rxring.sring->req_prod; 1163 resp_prod = xneti->xni_rxring.rsp_prod_pvt; 1164 xen_rmb(); 1165 1166 gop = xstart_gop_copy; 1167 for (i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) { 1168 XENPRINTF(("have a packet\n")); 1169 IFQ_POLL(&ifp->if_snd, m); 1170 if (__predict_false(m == NULL)) 1171 panic("xennetback_ifstart: IFQ_POLL"); 1172 if (__predict_false( 1173 req_prod == xneti->xni_rxring.req_cons || 1174 xneti->xni_rxring.req_cons - resp_prod == 1175 NET_RX_RING_SIZE)) { 1176 /* out of ring space */ 1177 XENPRINTF(("xennetback_ifstart: ring full " 1178 "req_prod 0x%x req_cons 0x%x resp_prod " 1179 "0x%x\n", 1180 req_prod, xneti->xni_rxring.req_cons, 1181 resp_prod)); 1182 ifp->if_timer = 1; 1183 break; 1184 } 1185 if (__predict_false(i == NB_XMIT_PAGES_BATCH)) 1186 break; /* we filled the array */ 1187 switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) { 1188 case M_EXT|M_EXT_CLUSTER: 1189 KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID); 1190 xmit_pa = m->m_ext.ext_paddr; 1191 offset = m->m_data - m->m_ext.ext_buf; 1192 break; 1193 case 0: 1194 KASSERT(m->m_paddr != M_PADDR_INVALID); 1195 xmit_pa = m->m_paddr; 1196 offset = M_BUFOFFSET(m) + 1197 (m->m_data - M_BUFADDR(m)); 1198 break; 1199 default: 1200 if (__predict_false( 1201 !pmap_extract(pmap_kernel(), 1202 (vaddr_t)m->m_data, &xmit_pa))) { 1203 panic("xennet_start: no pa"); 1204 } 1205 offset = 0; 1206 break; 1207 } 1208 offset += (xmit_pa & ~PG_FRAME); 1209 xmit_pa = (xmit_pa & PG_FRAME); 1210 if (m->m_pkthdr.len != m->m_len || 1211 (offset + m->m_pkthdr.len) > PAGE_SIZE) { 1212 MGETHDR(new_m, M_DONTWAIT, MT_DATA); 1213 if (__predict_false(new_m == NULL)) { 1214 printf("%s: cannot allocate new mbuf\n", 1215 ifp->if_xname); 1216 break; 1217 } 1218 if (m->m_pkthdr.len > MHLEN) { 1219 MCLGET(new_m, M_DONTWAIT); 1220 if (__predict_false( 1221 (new_m->m_flags & M_EXT) == 0)) { 1222 XENPRINTF(( 1223 "%s: no mbuf cluster\n", 1224 ifp->if_xname)); 1225 m_freem(new_m); 1226 break; 1227 } 1228 xmit_pa = new_m->m_ext.ext_paddr; 1229 offset = new_m->m_data - 1230 new_m->m_ext.ext_buf; 1231 } else { 1232 xmit_pa = new_m->m_paddr; 1233 offset = M_BUFOFFSET(new_m) + 1234 (new_m->m_data - M_BUFADDR(new_m)); 1235 } 1236 offset += (xmit_pa & ~PG_FRAME); 1237 xmit_pa = (xmit_pa & PG_FRAME); 1238 m_copydata(m, 0, m->m_pkthdr.len, 1239 mtod(new_m, void *)); 1240 new_m->m_len = new_m->m_pkthdr.len = 1241 m->m_pkthdr.len; 1242 IFQ_DEQUEUE(&ifp->if_snd, m); 1243 m_freem(m); 1244 m = new_m; 1245 } else { 1246 IFQ_DEQUEUE(&ifp->if_snd, m); 1247 } 1248 1249 KASSERT(xmit_pa != POOL_PADDR_INVALID); 1250 KASSERT((offset + m->m_pkthdr.len) <= PAGE_SIZE); 1251 xmit_ma = xpmap_ptom(xmit_pa); 1252 /* start filling ring */ 1253 gop->flags = GNTCOPY_dest_gref; 1254 gop->source.offset = offset; 1255 gop->source.domid = DOMID_SELF; 1256 gop->source.u.gmfn = xmit_ma >> PAGE_SHIFT; 1257 1258 gop->dest.u.ref = RING_GET_REQUEST(&xneti->xni_rxring, 1259 xneti->xni_rxring.req_cons)->gref; 1260 gop->dest.offset = 0; 1261 gop->dest.domid = xneti->xni_domid; 1262 1263 gop->len = m->m_pkthdr.len; 1264 gop++; 1265 1266 id = RING_GET_REQUEST(&xneti->xni_rxring, 1267 xneti->xni_rxring.req_cons)->id; 1268 xen_rmb(); 1269 xneti->xni_rxring.req_cons++; 1270 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, 1271 resp_prod); 1272 rxresp->id = id; 1273 rxresp->offset = 0; 1274 rxresp->status = m->m_pkthdr.len; 1275 if ((m->m_pkthdr.csum_flags & 1276 (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) { 1277 rxresp->flags = NETRXF_csum_blank; 1278 } else { 1279 rxresp->flags = 0; 1280 } 1281 1282 mbufs_sent[i] = m; 1283 resp_prod++; 1284 i++; /* this packet has been queued */ 1285 ifp->if_opackets++; 1286 bpf_mtap(ifp, m); 1287 } 1288 if (i != 0) { 1289 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1290 xstart_gop_copy, i) != 0) { 1291 panic("%s: GNTTABOP_copy failed", 1292 ifp->if_xname); 1293 } 1294 1295 for (j = 0; j < i; j++) { 1296 if (xstart_gop_copy[j].status != GNTST_okay) { 1297 printf("%s GNTTABOP_copy[%d] %d\n", 1298 ifp->if_xname, 1299 j, xstart_gop_copy[j].status); 1300 printf("%s: req_prod %u req_cons " 1301 "%u rsp_prod %u rsp_prod_pvt %u " 1302 "i %d\n", 1303 ifp->if_xname, 1304 xneti->xni_rxring.sring->req_prod, 1305 xneti->xni_rxring.req_cons, 1306 xneti->xni_rxring.sring->rsp_prod, 1307 xneti->xni_rxring.rsp_prod_pvt, 1308 i); 1309 rxresp = RING_GET_RESPONSE( 1310 &xneti->xni_rxring, 1311 xneti->xni_rxring.rsp_prod_pvt + j); 1312 rxresp->status = NETIF_RSP_ERROR; 1313 } 1314 } 1315 1316 /* update pointer */ 1317 KASSERT( 1318 xneti->xni_rxring.rsp_prod_pvt + i == resp_prod); 1319 xneti->xni_rxring.rsp_prod_pvt = resp_prod; 1320 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY( 1321 &xneti->xni_rxring, j); 1322 if (j) 1323 do_event = 1; 1324 /* now we can free the mbufs */ 1325 for (j = 0; j < i; j++) { 1326 m_freem(mbufs_sent[j]); 1327 } 1328 } 1329 /* send event */ 1330 if (do_event) { 1331 xen_rmb(); 1332 XENPRINTF(("%s receive event\n", 1333 xneti->xni_if.if_xname)); 1334 hypervisor_notify_via_evtchn(xneti->xni_evtchn); 1335 do_event = 0; 1336 } 1337 /* 1338 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS() 1339 * here, as the frontend doesn't notify when adding 1340 * requests anyway 1341 */ 1342 if (__predict_false( 1343 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) { 1344 /* ring full */ 1345 break; 1346 } 1347 } 1348 splx(s); 1349 } 1350 1351 1352 static void 1353 xennetback_ifwatchdog(struct ifnet * ifp) 1354 { 1355 /* 1356 * We can get to the following condition: 1357 * transmit stalls because the ring is full when the ifq is full too. 1358 * In this case (as, unfortunably, we don't get an interrupt from xen 1359 * on transmit) noting will ever call xennetback_ifstart() again. 1360 * Here we abuse the watchdog to get out of this condition. 1361 */ 1362 XENPRINTF(("xennetback_ifwatchdog\n")); 1363 xennetback_ifstart(ifp); 1364 } 1365 1366 1367 static int 1368 xennetback_ifinit(struct ifnet *ifp) 1369 { 1370 struct xnetback_instance *xneti = ifp->if_softc; 1371 int s = splnet(); 1372 1373 if ((ifp->if_flags & IFF_UP) == 0) { 1374 splx(s); 1375 return 0; 1376 } 1377 if (xneti->xni_status == CONNECTED) 1378 ifp->if_flags |= IFF_RUNNING; 1379 splx(s); 1380 return 0; 1381 } 1382 1383 static void 1384 xennetback_ifstop(struct ifnet *ifp, int disable) 1385 { 1386 struct xnetback_instance *xneti = ifp->if_softc; 1387 int s = splnet(); 1388 1389 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 1390 ifp->if_timer = 0; 1391 if (xneti->xni_status == CONNECTED) { 1392 XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x " 1393 "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod, 1394 xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons, 1395 xneti->xni_txring->event)); 1396 xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */ 1397 } 1398 splx(s); 1399 } 1400