xref: /netbsd/sys/arch/xen/xen/xennetback_xenbus.c (revision f0b850de)
1 /*      $NetBSD: xennetback_xenbus.c,v 1.93 2020/04/06 19:52:38 jdolecek Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.93 2020/04/06 19:52:38 jdolecek Exp $");
29 
30 #include "opt_xen.h"
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/kmem.h>
37 #include <sys/queue.h>
38 #include <sys/kernel.h>
39 #include <sys/mbuf.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/ioctl.h>
43 #include <sys/errno.h>
44 #include <sys/device.h>
45 #include <sys/intr.h>
46 
47 #include <net/if.h>
48 #include <net/if_types.h>
49 #include <net/if_dl.h>
50 #include <net/route.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 
54 #include <net/if_ether.h>
55 
56 #include <xen/xen.h>
57 #include <xen/xen_shm.h>
58 #include <xen/evtchn.h>
59 #include <xen/xenbus.h>
60 #include <xen/xennet_checksum.h>
61 
62 #include <uvm/uvm.h>
63 
64 /*
65  * Backend network device driver for Xen.
66  */
67 
68 #ifdef XENDEBUG_NET
69 #define XENPRINTF(x) printf x
70 #else
71 #define XENPRINTF(x)
72 #endif
73 
74 extern pt_entry_t xpmap_pg_nx;
75 
76 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
77 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
78 
79 /* linux wants at last 16 bytes free in front of the packet */
80 #define LINUX_REQUESTED_OFFSET 16
81 
82 /* ratecheck(9) for pool allocation failures */
83 static const struct timeval xni_pool_errintvl = { 30, 0 };  /* 30s, each */
84 
85 /* state of a xnetback instance */
86 typedef enum {
87 	CONNECTED,
88 	DISCONNECTING,
89 	DISCONNECTED
90 } xnetback_state_t;
91 
92 /* we keep the xnetback instances in a linked list */
93 struct xnetback_instance {
94 	SLIST_ENTRY(xnetback_instance) next;
95 	struct xenbus_device *xni_xbusd; /* our xenstore entry */
96 	domid_t xni_domid;		/* attached to this domain */
97 	uint32_t xni_handle;	/* domain-specific handle */
98 	xnetback_state_t xni_status;
99 
100 	/* network interface stuff */
101 	struct ethercom xni_ec;
102 	struct callout xni_restart;
103 	uint8_t xni_enaddr[ETHER_ADDR_LEN];
104 
105 	/* remote domain communication stuff */
106 	unsigned int xni_evtchn; /* our event channel */
107 	struct intrhand *xni_ih;
108 	netif_tx_back_ring_t xni_txring;
109 	netif_rx_back_ring_t xni_rxring;
110 	grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
111 	grant_handle_t xni_rx_ring_handle;
112 	vaddr_t xni_tx_ring_va; /* to unmap the ring */
113 	vaddr_t xni_rx_ring_va;
114 };
115 #define xni_if    xni_ec.ec_if
116 #define xni_bpf   xni_if.if_bpf
117 
118        void xvifattach(int);
119 static int  xennetback_ifioctl(struct ifnet *, u_long, void *);
120 static void xennetback_ifstart(struct ifnet *);
121 static void xennetback_ifsoftstart_copy(struct xnetback_instance *);
122 static void xennetback_ifwatchdog(struct ifnet *);
123 static int  xennetback_ifinit(struct ifnet *);
124 static void xennetback_ifstop(struct ifnet *, int);
125 
126 static int  xennetback_xenbus_create(struct xenbus_device *);
127 static int  xennetback_xenbus_destroy(void *);
128 static void xennetback_frontend_changed(void *, XenbusState);
129 
130 static inline void xennetback_tx_response(struct xnetback_instance *,
131     int, int);
132 static void xennetback_mbuf_addr(struct mbuf *, paddr_t *, int *);
133 
134 static SLIST_HEAD(, xnetback_instance) xnetback_instances;
135 static kmutex_t xnetback_lock;
136 
137 static bool xnetif_lookup(domid_t, uint32_t);
138 static int  xennetback_evthandler(void *);
139 
140 static struct xenbus_backend_driver xvif_backend_driver = {
141 	.xbakd_create = xennetback_xenbus_create,
142 	.xbakd_type = "vif"
143 };
144 
145 /*
146  * Number of packets to transmit in one hypercall (= number of pages to
147  * transmit at once).
148  */
149 #define NB_XMIT_PAGES_BATCH 64
150 
151 /* arrays used in xennetback_ifstart(), too large to allocate on stack */
152 /* XXXSMP */
153 static gnttab_copy_t     xstart_gop_copy[NB_XMIT_PAGES_BATCH];
154 static struct mbuf *mbufs_sent[NB_XMIT_PAGES_BATCH];
155 static struct _req_info {
156 	int id;
157 	int flags;
158 } xstart_req[NB_XMIT_PAGES_BATCH];
159 
160 
161 void
162 xvifattach(int n)
163 {
164 	XENPRINTF(("xennetback_init\n"));
165 
166 	SLIST_INIT(&xnetback_instances);
167 	mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE);
168 
169 	xenbus_backend_register(&xvif_backend_driver);
170 }
171 
172 static int
173 xennetback_xenbus_create(struct xenbus_device *xbusd)
174 {
175 	struct xnetback_instance *xneti;
176 	long domid, handle;
177 	struct ifnet *ifp;
178 	extern int ifqmaxlen; /* XXX */
179 	char *val, *e, *p;
180 	int i, err;
181 	struct xenbus_transaction *xbt;
182 
183 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
184 	    "frontend-id", &domid, 10)) != 0) {
185 		aprint_error("xvif: can't read %s/frontend-id: %d\n",
186 		    xbusd->xbusd_path, err);
187 		return err;
188 	}
189 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
190 	    "handle", &handle, 10)) != 0) {
191 		aprint_error("xvif: can't read %s/handle: %d\n",
192 		    xbusd->xbusd_path, err);
193 		return err;
194 	}
195 
196 	if (xnetif_lookup(domid, handle)) {
197 		return EEXIST;
198 	}
199 	xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP);
200 	xneti->xni_domid = domid;
201 	xneti->xni_handle = handle;
202 	xneti->xni_status = DISCONNECTED;
203 
204 	xbusd->xbusd_u.b.b_cookie = xneti;
205 	xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
206 	xneti->xni_xbusd = xbusd;
207 
208 	ifp = &xneti->xni_if;
209 	ifp->if_softc = xneti;
210 	snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
211 	    (int)domid, (int)handle);
212 
213 	/* read mac address */
214 	if ((err = xenbus_read(NULL, xbusd->xbusd_path, "mac", NULL, &val))) {
215 		aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
216 		    xbusd->xbusd_path, err);
217 		goto fail;
218 	}
219 	for (i = 0, p = val; i < 6; i++) {
220 		xneti->xni_enaddr[i] = strtoul(p, &e, 16);
221 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
222 			aprint_error_ifnet(ifp,
223 			    "%s is not a valid mac address\n", val);
224 			free(val, M_DEVBUF);
225 			err = EINVAL;
226 			goto fail;
227 		}
228 		p = &e[1];
229 	}
230 	free(val, M_DEVBUF);
231 
232 	/* we can't use the same MAC addr as our guest */
233 	xneti->xni_enaddr[3]++;
234 	/* create pseudo-interface */
235 	aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
236 	    ether_sprintf(xneti->xni_enaddr));
237 	xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU;
238 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
239 	ifp->if_snd.ifq_maxlen =
240 	    uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
241 	ifp->if_capabilities =
242 		IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx
243 		| IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
244 		| IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
245 		| IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx
246 		| IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx;
247 #define XN_M_CSUM_SUPPORTED	(				\
248 		M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_IPv4	\
249 		| M_CSUM_TCPv6 | M_CSUM_UDPv6			\
250 	)
251 	ifp->if_ioctl = xennetback_ifioctl;
252 	ifp->if_start = xennetback_ifstart;
253 	ifp->if_watchdog = xennetback_ifwatchdog;
254 	ifp->if_init = xennetback_ifinit;
255 	ifp->if_stop = xennetback_ifstop;
256 	ifp->if_timer = 0;
257 	IFQ_SET_READY(&ifp->if_snd);
258 	if_attach(ifp);
259 	if_deferred_start_init(ifp, NULL);
260 	ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
261 
262 	mutex_enter(&xnetback_lock);
263 	SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
264 	mutex_exit(&xnetback_lock);
265 
266 	xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
267 
268 	do {
269 		xbt = xenbus_transaction_start();
270 		if (xbt == NULL) {
271 			aprint_error_ifnet(ifp,
272 			    "%s: can't start transaction\n",
273 			    xbusd->xbusd_path);
274 			goto fail;
275 		}
276 		err = xenbus_printf(xbt, xbusd->xbusd_path,
277 		    "vifname", "%s", ifp->if_xname);
278 		if (err) {
279 			aprint_error_ifnet(ifp,
280 			    "failed to write %s/vifname: %d\n",
281 			    xbusd->xbusd_path, err);
282 			goto abort_xbt;
283 		}
284 		err = xenbus_printf(xbt, xbusd->xbusd_path,
285 		    "feature-rx-copy", "%d", 1);
286 		if (err) {
287 			aprint_error_ifnet(ifp,
288 			    "failed to write %s/feature-rx-copy: %d\n",
289 			    xbusd->xbusd_path, err);
290 			goto abort_xbt;
291 		}
292 		err = xenbus_printf(xbt, xbusd->xbusd_path,
293 		    "feature-ipv6-csum-offload", "%d", 1);
294 		if (err) {
295 			aprint_error_ifnet(ifp,
296 			    "failed to write %s/feature-ipv6-csum-offload: %d\n",
297 			    xbusd->xbusd_path, err);
298 			goto abort_xbt;
299 		}
300 	} while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
301 	if (err) {
302 		aprint_error_ifnet(ifp,
303 		    "%s: can't end transaction: %d\n",
304 		    xbusd->xbusd_path, err);
305 	}
306 
307 	err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
308 	if (err) {
309 		aprint_error_ifnet(ifp,
310 		    "failed to switch state on %s: %d\n",
311 		    xbusd->xbusd_path, err);
312 		goto fail;
313 	}
314 	return 0;
315 
316 abort_xbt:
317 	xenbus_transaction_end(xbt, 1);
318 fail:
319 	kmem_free(xneti, sizeof(*xneti));
320 	return err;
321 }
322 
323 int
324 xennetback_xenbus_destroy(void *arg)
325 {
326 	struct xnetback_instance *xneti = arg;
327 	struct gnttab_unmap_grant_ref op;
328 	int err;
329 
330 	aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
331 
332 	if (xneti->xni_ih != NULL) {
333 		hypervisor_mask_event(xneti->xni_evtchn);
334 		xen_intr_disestablish(xneti->xni_ih);
335 		xneti->xni_ih = NULL;
336 	}
337 
338 	mutex_enter(&xnetback_lock);
339 	SLIST_REMOVE(&xnetback_instances,
340 	    xneti, xnetback_instance, next);
341 	mutex_exit(&xnetback_lock);
342 
343 	ether_ifdetach(&xneti->xni_if);
344 	if_detach(&xneti->xni_if);
345 
346 	if (xneti->xni_txring.sring) {
347 		op.host_addr = xneti->xni_tx_ring_va;
348 		op.handle = xneti->xni_tx_ring_handle;
349 		op.dev_bus_addr = 0;
350 		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
351 		    &op, 1);
352 		if (err)
353 			aprint_error_ifnet(&xneti->xni_if,
354 					"unmap_grant_ref failed: %d\n", err);
355 	}
356 	if (xneti->xni_rxring.sring) {
357 		op.host_addr = xneti->xni_rx_ring_va;
358 		op.handle = xneti->xni_rx_ring_handle;
359 		op.dev_bus_addr = 0;
360 		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
361 		    &op, 1);
362 		if (err)
363 			aprint_error_ifnet(&xneti->xni_if,
364 					"unmap_grant_ref failed: %d\n", err);
365 	}
366 	if (xneti->xni_tx_ring_va != 0) {
367 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
368 		    PAGE_SIZE, UVM_KMF_VAONLY);
369 		xneti->xni_tx_ring_va = 0;
370 	}
371 	if (xneti->xni_rx_ring_va != 0) {
372 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
373 		    PAGE_SIZE, UVM_KMF_VAONLY);
374 		xneti->xni_rx_ring_va = 0;
375 	}
376 	kmem_free(xneti, sizeof(*xneti));
377 	return 0;
378 }
379 
380 static int
381 xennetback_connect(struct xnetback_instance *xneti)
382 {
383 	int err;
384 	netif_tx_sring_t *tx_ring;
385 	netif_rx_sring_t *rx_ring;
386 	struct gnttab_map_grant_ref op;
387 	struct gnttab_unmap_grant_ref uop;
388 	evtchn_op_t evop;
389 	u_long tx_ring_ref, rx_ring_ref;
390 	u_long revtchn, rx_copy;
391 	struct xenbus_device *xbusd = xneti->xni_xbusd;
392 
393 	/* read communication information */
394 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
395 	    "tx-ring-ref", &tx_ring_ref, 10);
396 	if (err) {
397 		xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
398 		    xbusd->xbusd_otherend);
399 		return -1;
400 	}
401 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
402 	    "rx-ring-ref", &rx_ring_ref, 10);
403 	if (err) {
404 		xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
405 		    xbusd->xbusd_otherend);
406 		return -1;
407 	}
408 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
409 	    "event-channel", &revtchn, 10);
410 	if (err) {
411 		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
412 		    xbusd->xbusd_otherend);
413 		return -1;
414 	}
415 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
416 	    "request-rx-copy", &rx_copy, 10);
417 	if (err == ENOENT || !rx_copy) {
418 		xenbus_dev_fatal(xbusd, err,
419 		    "%s/request-rx-copy not supported by frontend",
420 		    xbusd->xbusd_otherend);
421 		return -1;
422 	} else if (err) {
423 		xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
424 		    xbusd->xbusd_otherend);
425 		return -1;
426 	}
427 
428 	/* allocate VA space and map rings */
429 	xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
430 	    UVM_KMF_VAONLY);
431 	if (xneti->xni_tx_ring_va == 0) {
432 		xenbus_dev_fatal(xbusd, ENOMEM,
433 		    "can't get VA for TX ring", xbusd->xbusd_otherend);
434 		goto err1;
435 	}
436 	tx_ring = (void *)xneti->xni_tx_ring_va;
437 
438 	xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
439 	    UVM_KMF_VAONLY);
440 	if (xneti->xni_rx_ring_va == 0) {
441 		xenbus_dev_fatal(xbusd, ENOMEM,
442 		    "can't get VA for RX ring", xbusd->xbusd_otherend);
443 		goto err1;
444 	}
445 	rx_ring = (void *)xneti->xni_rx_ring_va;
446 
447 	op.host_addr = xneti->xni_tx_ring_va;
448 	op.flags = GNTMAP_host_map;
449 	op.ref = tx_ring_ref;
450 	op.dom = xneti->xni_domid;
451 	err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
452 	if (err || op.status) {
453 		aprint_error_ifnet(&xneti->xni_if,
454 		    "can't map TX grant ref: err %d status %d\n",
455 		    err, op.status);
456 		goto err2;
457 	}
458 	xneti->xni_tx_ring_handle = op.handle;
459 	BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
460 
461 	op.host_addr = xneti->xni_rx_ring_va;
462 	op.flags = GNTMAP_host_map;
463 	op.ref = rx_ring_ref;
464 	op.dom = xneti->xni_domid;
465 	err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
466 	if (err || op.status) {
467 		aprint_error_ifnet(&xneti->xni_if,
468 		    "can't map RX grant ref: err %d status %d\n",
469 		    err, op.status);
470 		goto err2;
471 	}
472 	xneti->xni_rx_ring_handle = op.handle;
473 	BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
474 
475 	evop.cmd = EVTCHNOP_bind_interdomain;
476 	evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
477 	evop.u.bind_interdomain.remote_port = revtchn;
478 	err = HYPERVISOR_event_channel_op(&evop);
479 	if (err) {
480 		aprint_error_ifnet(&xneti->xni_if,
481 		    "can't get event channel: %d\n", err);
482 		goto err2;
483 	}
484 	xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
485 	xen_wmb();
486 	xneti->xni_status = CONNECTED;
487 	xen_wmb();
488 
489 	xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic, xneti->xni_evtchn,
490 	    IST_LEVEL, IPL_NET, xennetback_evthandler, xneti, false,
491 	    xneti->xni_if.if_xname);
492 	KASSERT(xneti->xni_ih != NULL);
493 	xennetback_ifinit(&xneti->xni_if);
494 	hypervisor_unmask_event(xneti->xni_evtchn);
495 	hypervisor_notify_via_evtchn(xneti->xni_evtchn);
496 	return 0;
497 
498 err2:
499 	/* unmap rings */
500 	if (xneti->xni_tx_ring_handle != 0) {
501 		uop.host_addr = xneti->xni_tx_ring_va;
502 		uop.handle = xneti->xni_tx_ring_handle;
503 		uop.dev_bus_addr = 0;
504 		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
505 		    &uop, 1);
506 		if (err)
507 			aprint_error_ifnet(&xneti->xni_if,
508 			    "unmap_grant_ref failed: %d\n", err);
509 	}
510 
511 	if (xneti->xni_rx_ring_handle != 0) {
512 		uop.host_addr = xneti->xni_rx_ring_va;
513 		uop.handle = xneti->xni_rx_ring_handle;
514 		uop.dev_bus_addr = 0;
515 		err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
516 		    &uop, 1);
517 		if (err)
518 			aprint_error_ifnet(&xneti->xni_if,
519 			    "unmap_grant_ref failed: %d\n", err);
520 	}
521 
522 err1:
523 	/* free rings VA space */
524 	if (xneti->xni_rx_ring_va != 0)
525 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
526 		    PAGE_SIZE, UVM_KMF_VAONLY);
527 
528 	if (xneti->xni_tx_ring_va != 0)
529 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
530 		    PAGE_SIZE, UVM_KMF_VAONLY);
531 
532 	return -1;
533 
534 }
535 
536 static void
537 xennetback_frontend_changed(void *arg, XenbusState new_state)
538 {
539 	struct xnetback_instance *xneti = arg;
540 	struct xenbus_device *xbusd = xneti->xni_xbusd;
541 
542 	XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
543 	switch(new_state) {
544 	case XenbusStateInitialising:
545 	case XenbusStateInitialised:
546 		break;
547 
548 	case XenbusStateConnected:
549 		if (xneti->xni_status == CONNECTED)
550 			break;
551 		if (xennetback_connect(xneti) == 0)
552 			xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
553 		break;
554 
555 	case XenbusStateClosing:
556 		xneti->xni_status = DISCONNECTING;
557 		xneti->xni_if.if_flags &= ~IFF_RUNNING;
558 		xneti->xni_if.if_timer = 0;
559 		xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
560 		break;
561 
562 	case XenbusStateClosed:
563 		/* otherend_changed() should handle it for us */
564 		panic("xennetback_frontend_changed: closed\n");
565 	case XenbusStateUnknown:
566 	case XenbusStateInitWait:
567 	default:
568 		aprint_error("%s: invalid frontend state %d\n",
569 		    xneti->xni_if.if_xname, new_state);
570 		break;
571 	}
572 	return;
573 
574 }
575 
576 /* lookup a xneti based on domain id and interface handle */
577 static bool
578 xnetif_lookup(domid_t dom , uint32_t handle)
579 {
580 	struct xnetback_instance *xneti;
581 	bool found = false;
582 
583 	mutex_enter(&xnetback_lock);
584 	SLIST_FOREACH(xneti, &xnetback_instances, next) {
585 		if (xneti->xni_domid == dom && xneti->xni_handle == handle) {
586 			found = true;
587 			break;
588 		}
589 	}
590 	mutex_exit(&xnetback_lock);
591 
592 	return found;
593 }
594 
595 static inline void
596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
597 {
598 	RING_IDX resp_prod;
599 	netif_tx_response_t *txresp;
600 	int do_event;
601 
602 	resp_prod = xneti->xni_txring.rsp_prod_pvt;
603 	txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
604 
605 	txresp->id = id;
606 	txresp->status = status;
607 	xneti->xni_txring.rsp_prod_pvt++;
608 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
609 	if (do_event) {
610 		XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
611 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
612 	}
613 }
614 
615 static inline const char *
616 xennetback_tx_check_packet(const netif_tx_request_t *txreq, int vlan)
617 {
618 	if (__predict_false(txreq->size < ETHER_HDR_LEN))
619 		return "too small";
620 
621 	if (__predict_false(txreq->offset + txreq->size > PAGE_SIZE))
622 		return "crossing a page boundary";
623 
624 	int maxlen = ETHER_MAX_LEN - ETHER_CRC_LEN;
625 	if (vlan)
626 		maxlen += ETHER_VLAN_ENCAP_LEN;
627 	if (__predict_false(txreq->size > maxlen))
628 		return "too big";
629 
630 	/* Somewhat duplicit, MCLBYTES is > ETHER_MAX_LEN */
631 	if (__predict_false(txreq->size > MCLBYTES))
632 		return "bigger than MCLBYTES";
633 
634 	return NULL;
635 }
636 
637 static void
638 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
639 	int queued)
640 {
641 	int i = 0;
642 	gnttab_copy_t *gop;
643 	struct mbuf *m;
644 	struct _req_info *req;
645 
646 	/*
647 	 * Copy the data and ack it. Delaying it until the mbuf is
648 	 * freed will stall transmit.
649 	 */
650 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xstart_gop_copy, queued)
651 	    != 0) {
652 		printf("%s: GNTTABOP_copy failed", ifp->if_xname);
653 		goto abort;
654 	}
655 
656 	for (; i < queued; i++) {
657 		gop = &xstart_gop_copy[i];
658 		m = mbufs_sent[i];
659 		req = &xstart_req[i];
660 
661 		if (gop->status != GNTST_okay) {
662 			printf("%s GNTTABOP_copy[%d] %d\n",
663 			    ifp->if_xname, i, gop->status);
664 			goto abort;
665 		}
666 
667 		xennetback_tx_response(xneti, req->id, NETIF_RSP_OKAY);
668 
669 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
670 			struct ether_header *eh =
671 			    mtod(m, struct ether_header *);
672 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
673 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
674 			    ETHER_ADDR_LEN) != 0) {
675 				m_freem(m);
676 				continue; /* packet is not for us */
677 			}
678 		}
679 
680 		if (req->flags & NETTXF_csum_blank)
681 			xennet_checksum_fill(ifp, m);
682 		else if (req->flags & NETTXF_data_validated)
683 			m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED;
684 		m_set_rcvif(m, ifp);
685 
686 		if_percpuq_enqueue(ifp->if_percpuq, m);
687 	}
688 
689 	return;
690 
691 abort:
692 	for (; i < queued; i++) {
693 		m = mbufs_sent[i];
694 		req = &xstart_req[i];
695 
696 		m_freem(m);
697 		xennetback_tx_response(xneti, req->id, NETIF_RSP_ERROR);
698 		if_statinc(ifp, if_ierrors);
699 	}
700 }
701 
702 static int
703 xennetback_evthandler(void *arg)
704 {
705 	struct xnetback_instance *xneti = arg;
706 	struct ifnet *ifp = &xneti->xni_if;
707 	netif_tx_request_t txreq;
708 	struct mbuf *m;
709 	int receive_pending;
710 	RING_IDX req_cons;
711 	gnttab_copy_t *gop;
712 	paddr_t pa;
713 	int offset, queued = 0;
714 
715 	XENPRINTF(("xennetback_evthandler "));
716 	req_cons = xneti->xni_txring.req_cons;
717 	xen_rmb();
718 	while (1) {
719 		xen_rmb(); /* be sure to read the request before updating */
720 		xneti->xni_txring.req_cons = req_cons;
721 		xen_wmb();
722 		RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring,
723 		    receive_pending);
724 		if (receive_pending == 0)
725 			break;
726 		RING_COPY_REQUEST(&xneti->xni_txring, req_cons, &txreq);
727 		xen_rmb();
728 		XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
729 		    txreq.size));
730 		req_cons++;
731 		if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
732 		    (IFF_UP | IFF_RUNNING))) {
733 			/* interface not up, drop */
734 			xennetback_tx_response(xneti, txreq.id,
735 			    NETIF_RSP_DROPPED);
736 			continue;
737 		}
738 
739 		/*
740 		 * Do some sanity checks, and map the packet's page.
741 		 */
742 		const char *msg = xennetback_tx_check_packet(&txreq,
743 		    xneti->xni_ec.ec_capenable & ETHERCAP_VLAN_MTU);
744 		if (msg) {
745 			printf("%s: packet with size %d is %s\n",
746 			    ifp->if_xname, txreq.size, msg);
747 			xennetback_tx_response(xneti, txreq.id,
748 			    NETIF_RSP_ERROR);
749 			if_statinc(ifp, if_ierrors);
750 			continue;
751 		}
752 
753 		/* get a mbuf for this packet */
754 		MGETHDR(m, M_DONTWAIT, MT_DATA);
755 		if (__predict_false(m == NULL)) {
756 			static struct timeval lasttime;
757 			if (ratecheck(&lasttime, &xni_pool_errintvl))
758 				printf("%s: mbuf alloc failed\n",
759 				    ifp->if_xname);
760 			xennetback_tx_response(xneti, txreq.id,
761 			    NETIF_RSP_DROPPED);
762 			if_statinc(ifp, if_ierrors);
763 			continue;
764 		}
765 		if (txreq.size > MHLEN) {
766 			MCLGET(m, M_DONTWAIT);
767 			if (__predict_false(m->m_ext_storage.ext_buf == NULL)) {
768 				m_freem(m);
769 				xennetback_tx_response(xneti, txreq.id,
770 				    NETIF_RSP_DROPPED);
771 				if_statinc(ifp, if_ierrors);
772 				continue;
773 			}
774 		}
775 
776 		XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
777 		    xneti->xni_if.if_xname, txreq.offset,
778 		    txreq.size, txreq.id, MASK_NETIF_TX_IDX(req_cons)));
779 
780 		xennetback_mbuf_addr(m, &pa, &offset);
781 
782 		/* Queue for the copy */
783 		gop = &xstart_gop_copy[queued];
784 		memset(gop, 0, sizeof(*gop));
785 		gop->flags = GNTCOPY_source_gref;
786 		gop->len = txreq.size;
787 
788 		gop->source.u.ref = txreq.gref;
789 		gop->source.offset = txreq.offset;
790 		gop->source.domid = xneti->xni_domid;
791 
792 		gop->dest.offset = offset;
793 		gop->dest.domid = DOMID_SELF;
794 		gop->dest.u.gmfn = xpmap_ptom(pa) >> PAGE_SHIFT;
795 
796 		m->m_len = m->m_pkthdr.len = txreq.size;
797 		mbufs_sent[queued] = m;
798 
799 		xstart_req[queued].id = txreq.id;
800 		xstart_req[queued].flags = txreq.flags;
801 
802 		queued++;
803 
804 		KASSERT(queued <= NB_XMIT_PAGES_BATCH);
805 		if (queued == NB_XMIT_PAGES_BATCH) {
806 			xennetback_tx_copy_process(ifp, xneti, queued);
807 			queued = 0;
808 		}
809 	}
810 	if (queued > 0)
811 		xennetback_tx_copy_process(ifp, xneti, queued);
812 	xen_rmb(); /* be sure to read the request before updating pointer */
813 	xneti->xni_txring.req_cons = req_cons;
814 	xen_wmb();
815 
816 	/* check to see if we can transmit more packets */
817 	if_schedule_deferred_start(ifp);
818 
819 	return 1;
820 }
821 
822 static int
823 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
824 {
825 	//struct xnetback_instance *xneti = ifp->if_softc;
826 	//struct ifreq *ifr = (struct ifreq *)data;
827 	int s, error;
828 
829 	s = splnet();
830 	error = ether_ioctl(ifp, cmd, data);
831 	if (error == ENETRESET)
832 		error = 0;
833 	splx(s);
834 	return error;
835 }
836 
837 static void
838 xennetback_ifstart(struct ifnet *ifp)
839 {
840 	struct xnetback_instance *xneti = ifp->if_softc;
841 
842 	/*
843 	 * The Xen communication channel is much more efficient if we can
844 	 * schedule batch of packets for the domain. Deferred start by network
845 	 * stack will enqueue all pending mbufs in the interface's send queue
846 	 * before it is processed by the soft interrupt handler.
847 	 */
848 	xennetback_ifsoftstart_copy(xneti);
849 }
850 
851 /*
852  * sighly different from m_dup(); for some reason m_dup() can return
853  * a chain where the data area can cross a page boundary.
854  * This doesn't happens with the function below.
855  */
856 static struct mbuf *
857 xennetback_copymbuf(struct mbuf *m)
858 {
859 	struct mbuf *new_m;
860 
861 	MGETHDR(new_m, M_DONTWAIT, MT_DATA);
862 	if (__predict_false(new_m == NULL)) {
863 		return NULL;
864 	}
865 	if (m->m_pkthdr.len > MHLEN) {
866 		MCLGET(new_m, M_DONTWAIT);
867 		if (__predict_false(
868 		    (new_m->m_flags & M_EXT) == 0)) {
869 			m_freem(new_m);
870 			return NULL;
871 		}
872 	}
873 	m_copydata(m, 0, m->m_pkthdr.len,
874 	    mtod(new_m, void *));
875 	new_m->m_len = new_m->m_pkthdr.len =
876 	    m->m_pkthdr.len;
877 
878 	/*
879 	 * Need to retain csum flags to know if csum was actually computed.
880 	 * This is used to set NETRXF_csum_blank/NETRXF_data_validated.
881 	 */
882 	new_m->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
883 
884 	return new_m;
885 }
886 
887 /* return physical page address and offset of data area of an mbuf */
888 static void
889 xennetback_mbuf_addr(struct mbuf *m, paddr_t *xmit_pa, int *offset)
890 {
891 	switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
892 	case M_EXT|M_EXT_CLUSTER:
893 		KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
894 		*xmit_pa = m->m_ext.ext_paddr;
895 		*offset = m->m_data - m->m_ext.ext_buf;
896 		break;
897 	case 0:
898 		KASSERT(m->m_paddr != M_PADDR_INVALID);
899 		*xmit_pa = m->m_paddr;
900 		*offset = M_BUFOFFSET(m) +
901 		    (m->m_data - M_BUFADDR(m));
902 		break;
903 	default:
904 		if (__predict_false(
905 		    !pmap_extract(pmap_kernel(),
906 		    (vaddr_t)m->m_data, xmit_pa))) {
907 			panic("xennet_start: no pa");
908 		}
909 		*offset = 0;
910 		break;
911 	}
912 	*offset += (*xmit_pa & ~PTE_FRAME);
913 	*xmit_pa = (*xmit_pa & PTE_FRAME);
914 }
915 
916 static void
917 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti)
918 {
919 	struct ifnet *ifp = &xneti->xni_if;
920 	struct mbuf *m, *new_m;
921 	paddr_t xmit_pa;
922 	paddr_t xmit_ma;
923 	int i, j;
924 	netif_rx_response_t *rxresp;
925 	netif_rx_request_t rxreq;
926 	RING_IDX req_prod, resp_prod;
927 	int do_event = 0;
928 	gnttab_copy_t *gop;
929 	int id, offset;
930 	bool abort;
931 
932 	XENPRINTF(("xennetback_ifsoftstart_copy "));
933 	int s = splnet();
934 	if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) {
935 		splx(s);
936 		return;
937 	}
938 
939 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
940 		XENPRINTF(("pkt\n"));
941 		req_prod = xneti->xni_rxring.sring->req_prod;
942 		resp_prod = xneti->xni_rxring.rsp_prod_pvt;
943 		xen_rmb();
944 
945 		gop = xstart_gop_copy;
946 		abort = false;
947 		for (i = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
948 			XENPRINTF(("have a packet\n"));
949 			IFQ_POLL(&ifp->if_snd, m);
950 			if (__predict_false(m == NULL))
951 				panic("xennetback_ifstart: IFQ_POLL");
952 			if (__predict_false(
953 			    req_prod == xneti->xni_rxring.req_cons ||
954 			    xneti->xni_rxring.req_cons - resp_prod ==
955 			    NET_RX_RING_SIZE)) {
956 				/* out of ring space */
957 				XENPRINTF(("xennetback_ifstart: ring full "
958 				    "req_prod 0x%x req_cons 0x%x resp_prod "
959 				    "0x%x\n",
960 				    req_prod, xneti->xni_rxring.req_cons,
961 				    resp_prod));
962 				abort = true;
963 				break;
964 			}
965 			if (__predict_false(i == NB_XMIT_PAGES_BATCH))
966 				break; /* we filled the array */
967 
968 			xennetback_mbuf_addr(m, &xmit_pa, &offset);
969 			if (m->m_pkthdr.len != m->m_len ||
970 			    (offset + m->m_pkthdr.len) > PAGE_SIZE) {
971 				new_m = xennetback_copymbuf(m);
972 				if (__predict_false(new_m == NULL)) {
973 					static struct timeval lasttime;
974 					if (ratecheck(&lasttime, &xni_pool_errintvl))
975 						printf("%s: cannot allocate new mbuf\n",
976 						    ifp->if_xname);
977 					abort = 1;
978 					break;
979 				} else {
980 					IFQ_DEQUEUE(&ifp->if_snd, m);
981 					m_freem(m);
982 					m = new_m;
983 					xennetback_mbuf_addr(m,
984 					    &xmit_pa, &offset);
985 				}
986 			} else {
987 				IFQ_DEQUEUE(&ifp->if_snd, m);
988 			}
989 
990 			KASSERT(xmit_pa != POOL_PADDR_INVALID);
991 			KASSERT((offset + m->m_pkthdr.len) <= PAGE_SIZE);
992 			xmit_ma = xpmap_ptom(xmit_pa);
993 			/* start filling ring */
994 			gop->flags = GNTCOPY_dest_gref;
995 			gop->source.offset = offset;
996 			gop->source.domid = DOMID_SELF;
997 			gop->source.u.gmfn = xmit_ma >> PAGE_SHIFT;
998 
999 			RING_COPY_REQUEST(&xneti->xni_rxring,
1000 			    xneti->xni_rxring.req_cons, &rxreq);
1001 			gop->dest.u.ref = rxreq.gref;
1002 			gop->dest.offset = 0;
1003 			gop->dest.domid = xneti->xni_domid;
1004 
1005 			gop->len = m->m_pkthdr.len;
1006 			gop++;
1007 
1008 			id = rxreq.id;
1009 			xen_rmb();
1010 			xneti->xni_rxring.req_cons++;
1011 			rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1012 			    resp_prod);
1013 			rxresp->id = id;
1014 			rxresp->offset = 0;
1015 			rxresp->status = m->m_pkthdr.len;
1016 			if ((m->m_pkthdr.csum_flags &
1017 			    XN_M_CSUM_SUPPORTED) != 0) {
1018 				rxresp->flags = NETRXF_csum_blank;
1019 			} else {
1020 				rxresp->flags = NETRXF_data_validated;
1021 			}
1022 
1023 			mbufs_sent[i] = m;
1024 			resp_prod++;
1025 			i++; /* this packet has been queued */
1026 			if_statinc(ifp, if_opackets);
1027 			bpf_mtap(ifp, m, BPF_D_OUT);
1028 		}
1029 		if (i != 0) {
1030 			if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
1031 			    xstart_gop_copy, i) != 0) {
1032 				panic("%s: GNTTABOP_copy failed",
1033 				    ifp->if_xname);
1034 			}
1035 
1036 			for (j = 0; j < i; j++) {
1037 				if (xstart_gop_copy[j].status != GNTST_okay) {
1038 					printf("%s GNTTABOP_copy[%d] %d\n",
1039 					    ifp->if_xname,
1040 					    j, xstart_gop_copy[j].status);
1041 					printf("%s: req_prod %u req_cons "
1042 					    "%u rsp_prod %u rsp_prod_pvt %u "
1043 					    "i %d\n",
1044 					    ifp->if_xname,
1045 					    xneti->xni_rxring.sring->req_prod,
1046 					    xneti->xni_rxring.req_cons,
1047 					    xneti->xni_rxring.sring->rsp_prod,
1048 					    xneti->xni_rxring.rsp_prod_pvt,
1049 					    i);
1050 					rxresp = RING_GET_RESPONSE(
1051 					    &xneti->xni_rxring,
1052 					    xneti->xni_rxring.rsp_prod_pvt + j);
1053 					rxresp->status = NETIF_RSP_ERROR;
1054 				}
1055 			}
1056 
1057 			/* update pointer */
1058 			KASSERT(
1059 			    xneti->xni_rxring.rsp_prod_pvt + i == resp_prod);
1060 			xneti->xni_rxring.rsp_prod_pvt = resp_prod;
1061 			RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
1062 			    &xneti->xni_rxring, j);
1063 			if (j)
1064 				do_event = 1;
1065 			/* now we can free the mbufs */
1066 			for (j = 0; j < i; j++) {
1067 				m_freem(mbufs_sent[j]);
1068 			}
1069 		}
1070 		/* send event */
1071 		if (do_event) {
1072 			xen_rmb();
1073 			XENPRINTF(("%s receive event\n",
1074 			    xneti->xni_if.if_xname));
1075 			hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1076 			do_event = 0;
1077 		}
1078 		/*
1079 		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1080 		 * here, as the frontend doesn't notify when adding
1081 		 * requests anyway
1082 		 */
1083 		if (__predict_false(abort ||
1084 		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1085 			/* ring full */
1086 			ifp->if_timer = 1;
1087 			break;
1088 		}
1089 	}
1090 	splx(s);
1091 }
1092 
1093 static void
1094 xennetback_ifwatchdog(struct ifnet * ifp)
1095 {
1096 	/*
1097 	 * We can get to the following condition: transmit stalls because the
1098 	 * ring is full when the ifq is full too.
1099 	 *
1100 	 * In this case (as, unfortunately, we don't get an interrupt from xen
1101 	 * on transmit) nothing will ever call xennetback_ifstart() again.
1102 	 * Here we abuse the watchdog to get out of this condition.
1103 	 */
1104 	XENPRINTF(("xennetback_ifwatchdog\n"));
1105 	xennetback_ifstart(ifp);
1106 }
1107 
1108 static int
1109 xennetback_ifinit(struct ifnet *ifp)
1110 {
1111 	struct xnetback_instance *xneti = ifp->if_softc;
1112 	int s = splnet();
1113 
1114 	if ((ifp->if_flags & IFF_UP) == 0) {
1115 		splx(s);
1116 		return 0;
1117 	}
1118 	if (xneti->xni_status == CONNECTED)
1119 		ifp->if_flags |= IFF_RUNNING;
1120 	splx(s);
1121 	return 0;
1122 }
1123 
1124 static void
1125 xennetback_ifstop(struct ifnet *ifp, int disable)
1126 {
1127 	struct xnetback_instance *xneti = ifp->if_softc;
1128 	int s = splnet();
1129 
1130 	ifp->if_flags &= ~IFF_RUNNING;
1131 	ifp->if_timer = 0;
1132 	if (xneti->xni_status == CONNECTED) {
1133 		XENPRINTF(("%s: req_prod 0x%x resp_prod 0x%x req_cons 0x%x "
1134 		    "event 0x%x\n", ifp->if_xname, xneti->xni_txring->req_prod,
1135 		    xneti->xni_txring->resp_prod, xneti->xni_txring->req_cons,
1136 		    xneti->xni_txring->event));
1137 		xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1138 	}
1139 	splx(s);
1140 }
1141