xref: /netbsd/sys/arch/xen/xen/xennetback_xenbus.c (revision 6346be87)
1 /*      $NetBSD: xennetback_xenbus.c,v 1.112 2023/02/25 00:37:47 riastradh Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.112 2023/02/25 00:37:47 riastradh Exp $");
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/queue.h>
35 #include <sys/kernel.h>
36 #include <sys/mbuf.h>
37 #include <sys/protosw.h>
38 #include <sys/socket.h>
39 #include <sys/ioctl.h>
40 #include <sys/errno.h>
41 #include <sys/device.h>
42 
43 #include <net/if.h>
44 #include <net/if_types.h>
45 #include <net/if_dl.h>
46 #include <net/route.h>
47 #include <net/bpf.h>
48 
49 #include <net/if_ether.h>
50 
51 #include <xen/intr.h>
52 #include <xen/hypervisor.h>
53 #include <xen/xen.h>
54 #include <xen/xen_shm.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/xennet_checksum.h>
58 
59 #include <uvm/uvm.h>
60 
61 /*
62  * Backend network device driver for Xen.
63  */
64 
65 #ifdef XENDEBUG_NET
66 #define XENPRINTF(x) printf x
67 #else
68 #define XENPRINTF(x)
69 #endif
70 
71 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
72 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
73 
74 /*
75  * Number of packets to transmit in one hypercall (= number of pages to
76  * transmit at once).
77  */
78 #define NB_XMIT_PAGES_BATCH 64
79 CTASSERT(NB_XMIT_PAGES_BATCH >= XEN_NETIF_NR_SLOTS_MIN);
80 
81 /* ratecheck(9) for pool allocation failures */
82 static const struct timeval xni_pool_errintvl = { 30, 0 };  /* 30s, each */
83 
84 /* state of a xnetback instance */
85 typedef enum {
86 	CONNECTED,
87 	DISCONNECTING,
88 	DISCONNECTED
89 } xnetback_state_t;
90 
91 struct xnetback_xstate {
92 	bus_dmamap_t xs_dmamap;
93 	bool xs_loaded;
94 	struct mbuf *xs_m;
95 	struct netif_tx_request xs_tx;
96 	uint16_t xs_tx_size;		/* Size of data in this Tx fragment */
97 };
98 
99 /* we keep the xnetback instances in a linked list */
100 struct xnetback_instance {
101 	SLIST_ENTRY(xnetback_instance) next;
102 	struct xenbus_device *xni_xbusd; /* our xenstore entry */
103 	domid_t xni_domid;		/* attached to this domain */
104 	uint32_t xni_handle;	/* domain-specific handle */
105 	xnetback_state_t xni_status;
106 
107 	/* network interface stuff */
108 	struct ethercom xni_ec;
109 	struct callout xni_restart;
110 	uint8_t xni_enaddr[ETHER_ADDR_LEN];
111 
112 	/* remote domain communication stuff */
113 	unsigned int xni_evtchn; /* our event channel */
114 	struct intrhand *xni_ih;
115 	netif_tx_back_ring_t xni_txring;
116 	netif_rx_back_ring_t xni_rxring;
117 	grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
118 	grant_handle_t xni_rx_ring_handle;
119 	vaddr_t xni_tx_ring_va; /* to unmap the ring */
120 	vaddr_t xni_rx_ring_va;
121 
122 	/* arrays used in xennetback_ifstart(), used for both Rx and Tx */
123 	gnttab_copy_t     	xni_gop_copy[NB_XMIT_PAGES_BATCH];
124 	struct xnetback_xstate	xni_xstate[NB_XMIT_PAGES_BATCH];
125 
126 	/* event counters */
127 	struct evcnt xni_cnt_rx_cksum_blank;
128 	struct evcnt xni_cnt_rx_cksum_undefer;
129 };
130 #define xni_if    xni_ec.ec_if
131 #define xni_bpf   xni_if.if_bpf
132 
133        void xvifattach(int);
134 static int  xennetback_ifioctl(struct ifnet *, u_long, void *);
135 static void xennetback_ifstart(struct ifnet *);
136 static void xennetback_ifsoftstart_copy(struct xnetback_instance *);
137 static void xennetback_ifwatchdog(struct ifnet *);
138 static int  xennetback_ifinit(struct ifnet *);
139 static void xennetback_ifstop(struct ifnet *, int);
140 
141 static int  xennetback_xenbus_create(struct xenbus_device *);
142 static int  xennetback_xenbus_destroy(void *);
143 static void xennetback_frontend_changed(void *, XenbusState);
144 
145 static inline void xennetback_tx_response(struct xnetback_instance *,
146     int, int);
147 
148 static SLIST_HEAD(, xnetback_instance) xnetback_instances;
149 static kmutex_t xnetback_lock;
150 
151 static bool xnetif_lookup(domid_t, uint32_t);
152 static int  xennetback_evthandler(void *);
153 
154 static struct xenbus_backend_driver xvif_backend_driver = {
155 	.xbakd_create = xennetback_xenbus_create,
156 	.xbakd_type = "vif"
157 };
158 
159 void
xvifattach(int n)160 xvifattach(int n)
161 {
162 	XENPRINTF(("xennetback_init\n"));
163 
164 	SLIST_INIT(&xnetback_instances);
165 	mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE);
166 
167 	xenbus_backend_register(&xvif_backend_driver);
168 }
169 
170 static int
xennetback_xenbus_create(struct xenbus_device * xbusd)171 xennetback_xenbus_create(struct xenbus_device *xbusd)
172 {
173 	struct xnetback_instance *xneti;
174 	long domid, handle;
175 	struct ifnet *ifp;
176 	extern int ifqmaxlen; /* XXX */
177 	char *e, *p;
178 	char mac[32];
179 	int i, err;
180 	struct xenbus_transaction *xbt;
181 
182 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
183 	    "frontend-id", &domid, 10)) != 0) {
184 		aprint_error("xvif: can't read %s/frontend-id: %d\n",
185 		    xbusd->xbusd_path, err);
186 		return err;
187 	}
188 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
189 	    "handle", &handle, 10)) != 0) {
190 		aprint_error("xvif: can't read %s/handle: %d\n",
191 		    xbusd->xbusd_path, err);
192 		return err;
193 	}
194 
195 	xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP);
196 	xneti->xni_domid = domid;
197 	xneti->xni_handle = handle;
198 	xneti->xni_status = DISCONNECTED;
199 
200 	/* Need to keep the lock for lookup and the list update */
201 	mutex_enter(&xnetback_lock);
202 	if (xnetif_lookup(domid, handle)) {
203 		mutex_exit(&xnetback_lock);
204 		kmem_free(xneti, sizeof(*xneti));
205 		return EEXIST;
206 	}
207 	SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
208 	mutex_exit(&xnetback_lock);
209 
210 	xbusd->xbusd_u.b.b_cookie = xneti;
211 	xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
212 	xneti->xni_xbusd = xbusd;
213 
214 	ifp = &xneti->xni_if;
215 	ifp->if_softc = xneti;
216 	snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
217 	    (int)domid, (int)handle);
218 
219 	/* read mac address */
220 	err = xenbus_read(NULL, xbusd->xbusd_path, "mac", mac, sizeof(mac));
221 	if (err) {
222 		aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
223 		    xbusd->xbusd_path, err);
224 		goto fail;
225 	}
226 	for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
227 		xneti->xni_enaddr[i] = strtoul(p, &e, 16);
228 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
229 			aprint_error_ifnet(ifp,
230 			    "%s is not a valid mac address\n", mac);
231 			err = EINVAL;
232 			goto fail;
233 		}
234 		p = &e[1];
235 	}
236 
237 	/* we can't use the same MAC addr as our guest */
238 	xneti->xni_enaddr[3]++;
239 
240 	/* Initialize DMA map, used only for loading PA */
241 	for (i = 0; i < __arraycount(xneti->xni_xstate); i++) {
242 		if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat,
243 		    ETHER_MAX_LEN_JUMBO, XEN_NETIF_NR_SLOTS_MIN,
244 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
245 		    &xneti->xni_xstate[i].xs_dmamap)
246 		    != 0) {
247 			aprint_error_ifnet(ifp,
248 			    "failed to allocate dma map\n");
249 			err = ENOMEM;
250 			goto fail;
251 		}
252 	}
253 
254 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
255 	    NULL, ifp->if_xname, "Rx csum blank");
256 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
257 	    NULL, ifp->if_xname, "Rx csum undeferred");
258 
259 	/* create pseudo-interface */
260 	aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
261 	    ether_sprintf(xneti->xni_enaddr));
262 	xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
263 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
264 	ifp->if_snd.ifq_maxlen =
265 	    uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
266 	ifp->if_capabilities =
267 		IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
268 		| IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
269 		| IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx
270 		| IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx;
271 #define XN_M_CSUM_SUPPORTED						\
272 	(M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
273 
274 	ifp->if_ioctl = xennetback_ifioctl;
275 	ifp->if_start = xennetback_ifstart;
276 	ifp->if_watchdog = xennetback_ifwatchdog;
277 	ifp->if_init = xennetback_ifinit;
278 	ifp->if_stop = xennetback_ifstop;
279 	ifp->if_timer = 0;
280 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
281 	IFQ_SET_READY(&ifp->if_snd);
282 	if_attach(ifp);
283 	if_deferred_start_init(ifp, NULL);
284 	ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
285 
286 	xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
287 
288 	do {
289 		xbt = xenbus_transaction_start();
290 		if (xbt == NULL) {
291 			aprint_error_ifnet(ifp,
292 			    "%s: can't start transaction\n",
293 			    xbusd->xbusd_path);
294 			goto fail;
295 		}
296 		err = xenbus_printf(xbt, xbusd->xbusd_path,
297 		    "vifname", "%s", ifp->if_xname);
298 		if (err) {
299 			aprint_error_ifnet(ifp,
300 			    "failed to write %s/vifname: %d\n",
301 			    xbusd->xbusd_path, err);
302 			goto abort_xbt;
303 		}
304 		err = xenbus_printf(xbt, xbusd->xbusd_path,
305 		    "feature-rx-copy", "%d", 1);
306 		if (err) {
307 			aprint_error_ifnet(ifp,
308 			    "failed to write %s/feature-rx-copy: %d\n",
309 			    xbusd->xbusd_path, err);
310 			goto abort_xbt;
311 		}
312 		err = xenbus_printf(xbt, xbusd->xbusd_path,
313 		    "feature-ipv6-csum-offload", "%d", 1);
314 		if (err) {
315 			aprint_error_ifnet(ifp,
316 			    "failed to write %s/feature-ipv6-csum-offload: %d\n",
317 			    xbusd->xbusd_path, err);
318 			goto abort_xbt;
319 		}
320 		err = xenbus_printf(xbt, xbusd->xbusd_path,
321 		    "feature-sg", "%d", 1);
322 		if (err) {
323 			aprint_error_ifnet(ifp,
324 			    "failed to write %s/feature-sg: %d\n",
325 			    xbusd->xbusd_path, err);
326 			goto abort_xbt;
327 		}
328 	} while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
329 	if (err) {
330 		aprint_error_ifnet(ifp,
331 		    "%s: can't end transaction: %d\n",
332 		    xbusd->xbusd_path, err);
333 	}
334 
335 	err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
336 	if (err) {
337 		aprint_error_ifnet(ifp,
338 		    "failed to switch state on %s: %d\n",
339 		    xbusd->xbusd_path, err);
340 		goto fail;
341 	}
342 	return 0;
343 
344 abort_xbt:
345 	xenbus_transaction_end(xbt, 1);
346 fail:
347 	kmem_free(xneti, sizeof(*xneti));
348 	return err;
349 }
350 
351 int
xennetback_xenbus_destroy(void * arg)352 xennetback_xenbus_destroy(void *arg)
353 {
354 	struct xnetback_instance *xneti = arg;
355 
356 	aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
357 
358 	if (xneti->xni_ih != NULL) {
359 		hypervisor_mask_event(xneti->xni_evtchn);
360 		xen_intr_disestablish(xneti->xni_ih);
361 		xneti->xni_ih = NULL;
362 	}
363 
364 	mutex_enter(&xnetback_lock);
365 	SLIST_REMOVE(&xnetback_instances,
366 	    xneti, xnetback_instance, next);
367 	mutex_exit(&xnetback_lock);
368 
369 	ether_ifdetach(&xneti->xni_if);
370 	if_detach(&xneti->xni_if);
371 
372 	evcnt_detach(&xneti->xni_cnt_rx_cksum_blank);
373 	evcnt_detach(&xneti->xni_cnt_rx_cksum_undefer);
374 
375 	/* Destroy DMA maps */
376 	for (int i = 0; i < __arraycount(xneti->xni_xstate); i++) {
377 		if (xneti->xni_xstate[i].xs_dmamap != NULL) {
378 			bus_dmamap_destroy(xneti->xni_xbusd->xbusd_dmat,
379 			    xneti->xni_xstate[i].xs_dmamap);
380 			xneti->xni_xstate[i].xs_dmamap = NULL;
381 		}
382 	}
383 
384 	if (xneti->xni_txring.sring) {
385 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
386 		    &xneti->xni_tx_ring_handle);
387 	}
388 	if (xneti->xni_rxring.sring) {
389 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
390 		    &xneti->xni_rx_ring_handle);
391 	}
392 	if (xneti->xni_tx_ring_va != 0) {
393 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
394 		    PAGE_SIZE, UVM_KMF_VAONLY);
395 		xneti->xni_tx_ring_va = 0;
396 	}
397 	if (xneti->xni_rx_ring_va != 0) {
398 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
399 		    PAGE_SIZE, UVM_KMF_VAONLY);
400 		xneti->xni_rx_ring_va = 0;
401 	}
402 	kmem_free(xneti, sizeof(*xneti));
403 	return 0;
404 }
405 
406 static int
xennetback_connect(struct xnetback_instance * xneti)407 xennetback_connect(struct xnetback_instance *xneti)
408 {
409 	int err;
410 	netif_tx_sring_t *tx_ring;
411 	netif_rx_sring_t *rx_ring;
412 	evtchn_op_t evop;
413 	u_long tx_ring_ref, rx_ring_ref;
414 	grant_ref_t gtx_ring_ref, grx_ring_ref;
415 	u_long revtchn, rx_copy;
416 	struct xenbus_device *xbusd = xneti->xni_xbusd;
417 
418 	/* read communication information */
419 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
420 	    "tx-ring-ref", &tx_ring_ref, 10);
421 	if (err) {
422 		xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
423 		    xbusd->xbusd_otherend);
424 		return -1;
425 	}
426 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
427 	    "rx-ring-ref", &rx_ring_ref, 10);
428 	if (err) {
429 		xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
430 		    xbusd->xbusd_otherend);
431 		return -1;
432 	}
433 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
434 	    "event-channel", &revtchn, 10);
435 	if (err) {
436 		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
437 		    xbusd->xbusd_otherend);
438 		return -1;
439 	}
440 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
441 	    "request-rx-copy", &rx_copy, 10);
442 	if (err == ENOENT || !rx_copy) {
443 		xenbus_dev_fatal(xbusd, err,
444 		    "%s/request-rx-copy not supported by frontend",
445 		    xbusd->xbusd_otherend);
446 		return -1;
447 	} else if (err) {
448 		xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
449 		    xbusd->xbusd_otherend);
450 		return -1;
451 	}
452 
453 	/* allocate VA space and map rings */
454 	xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
455 	    UVM_KMF_VAONLY);
456 	if (xneti->xni_tx_ring_va == 0) {
457 		xenbus_dev_fatal(xbusd, ENOMEM,
458 		    "can't get VA for TX ring", xbusd->xbusd_otherend);
459 		goto err1;
460 	}
461 	tx_ring = (void *)xneti->xni_tx_ring_va;
462 
463 	xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
464 	    UVM_KMF_VAONLY);
465 	if (xneti->xni_rx_ring_va == 0) {
466 		xenbus_dev_fatal(xbusd, ENOMEM,
467 		    "can't get VA for RX ring", xbusd->xbusd_otherend);
468 		goto err1;
469 	}
470 	rx_ring = (void *)xneti->xni_rx_ring_va;
471 
472 	gtx_ring_ref = tx_ring_ref;
473         if (xen_shm_map(1, xneti->xni_domid, &gtx_ring_ref,
474 	    xneti->xni_tx_ring_va, &xneti->xni_tx_ring_handle, 0) != 0) {
475 		aprint_error_ifnet(&xneti->xni_if,
476 		    "can't map TX grant ref\n");
477 		goto err2;
478 	}
479 	BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
480 
481 	grx_ring_ref = rx_ring_ref;
482         if (xen_shm_map(1, xneti->xni_domid, &grx_ring_ref,
483 	    xneti->xni_rx_ring_va, &xneti->xni_rx_ring_handle, 0) != 0) {
484 		aprint_error_ifnet(&xneti->xni_if,
485 		    "can't map RX grant ref\n");
486 		goto err2;
487 	}
488 	BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
489 
490 	evop.cmd = EVTCHNOP_bind_interdomain;
491 	evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
492 	evop.u.bind_interdomain.remote_port = revtchn;
493 	err = HYPERVISOR_event_channel_op(&evop);
494 	if (err) {
495 		aprint_error_ifnet(&xneti->xni_if,
496 		    "can't get event channel: %d\n", err);
497 		goto err2;
498 	}
499 	xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
500 	xneti->xni_status = CONNECTED;
501 
502 	xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic,
503 	    xneti->xni_evtchn, IST_LEVEL, IPL_NET, xennetback_evthandler,
504 	    xneti, false, xneti->xni_if.if_xname);
505 	KASSERT(xneti->xni_ih != NULL);
506 	xennetback_ifinit(&xneti->xni_if);
507 	hypervisor_unmask_event(xneti->xni_evtchn);
508 	hypervisor_notify_via_evtchn(xneti->xni_evtchn);
509 	return 0;
510 
511 err2:
512 	/* unmap rings */
513 	if (xneti->xni_tx_ring_handle != 0) {
514 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
515 		    &xneti->xni_tx_ring_handle);
516 	}
517 
518 	if (xneti->xni_rx_ring_handle != 0) {
519 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
520 		    &xneti->xni_rx_ring_handle);
521 	}
522 err1:
523 	/* free rings VA space */
524 	if (xneti->xni_rx_ring_va != 0)
525 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
526 		    PAGE_SIZE, UVM_KMF_VAONLY);
527 
528 	if (xneti->xni_tx_ring_va != 0)
529 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
530 		    PAGE_SIZE, UVM_KMF_VAONLY);
531 
532 	return -1;
533 
534 }
535 
536 static void
xennetback_frontend_changed(void * arg,XenbusState new_state)537 xennetback_frontend_changed(void *arg, XenbusState new_state)
538 {
539 	struct xnetback_instance *xneti = arg;
540 	struct xenbus_device *xbusd = xneti->xni_xbusd;
541 
542 	XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
543 	switch(new_state) {
544 	case XenbusStateInitialising:
545 	case XenbusStateInitialised:
546 		break;
547 
548 	case XenbusStateConnected:
549 		if (xneti->xni_status == CONNECTED)
550 			break;
551 		if (xennetback_connect(xneti) == 0)
552 			xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
553 		break;
554 
555 	case XenbusStateClosing:
556 		xneti->xni_status = DISCONNECTING;
557 		xneti->xni_if.if_flags &= ~IFF_RUNNING;
558 		xneti->xni_if.if_timer = 0;
559 		xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
560 		break;
561 
562 	case XenbusStateClosed:
563 		/* otherend_changed() should handle it for us */
564 		panic("xennetback_frontend_changed: closed\n");
565 	case XenbusStateUnknown:
566 	case XenbusStateInitWait:
567 	default:
568 		aprint_error("%s: invalid frontend state %d\n",
569 		    xneti->xni_if.if_xname, new_state);
570 		break;
571 	}
572 	return;
573 
574 }
575 
576 /* lookup a xneti based on domain id and interface handle */
577 static bool
xnetif_lookup(domid_t dom,uint32_t handle)578 xnetif_lookup(domid_t dom , uint32_t handle)
579 {
580 	struct xnetback_instance *xneti;
581 	bool found = false;
582 
583 	KASSERT(mutex_owned(&xnetback_lock));
584 
585 	SLIST_FOREACH(xneti, &xnetback_instances, next) {
586 		if (xneti->xni_domid == dom && xneti->xni_handle == handle) {
587 			found = true;
588 			break;
589 		}
590 	}
591 
592 	return found;
593 }
594 
595 static inline void
xennetback_tx_response(struct xnetback_instance * xneti,int id,int status)596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
597 {
598 	RING_IDX resp_prod;
599 	netif_tx_response_t *txresp;
600 	int do_event;
601 
602 	resp_prod = xneti->xni_txring.rsp_prod_pvt;
603 	txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
604 
605 	txresp->id = id;
606 	txresp->status = status;
607 	xneti->xni_txring.rsp_prod_pvt++;
608 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
609 	if (do_event) {
610 		XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
611 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
612 	}
613 }
614 
615 static const char *
xennetback_tx_check_packet(const netif_tx_request_t * txreq)616 xennetback_tx_check_packet(const netif_tx_request_t *txreq)
617 {
618 	if (__predict_false((txreq->flags & NETTXF_more_data) == 0 &&
619 	    txreq->offset + txreq->size > PAGE_SIZE))
620 		return "crossing page boundary";
621 
622 	if (__predict_false(txreq->size > ETHER_MAX_LEN_JUMBO))
623 		return "bigger then jumbo";
624 
625 	return NULL;
626 }
627 
628 static int
xennetback_copy(struct ifnet * ifp,gnttab_copy_t * gop,int copycnt,const char * dir)629 xennetback_copy(struct ifnet *ifp, gnttab_copy_t *gop, int copycnt,
630     const char *dir)
631 {
632 	/*
633 	 * Copy the data and ack it. Delaying it until the mbuf is
634 	 * freed will stall transmit.
635 	 */
636 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, gop, copycnt) != 0) {
637 		printf("%s: GNTTABOP_copy %s failed", ifp->if_xname, dir);
638 		return EINVAL;
639 	}
640 
641 	for (int i = 0; i < copycnt; i++) {
642 		if (gop->status != GNTST_okay) {
643 			printf("%s GNTTABOP_copy[%d] %s %d\n",
644 			    ifp->if_xname, i, dir, gop->status);
645 			return EINVAL;
646 		}
647 	}
648 
649 	return 0;
650 }
651 
652 static void
xennetback_tx_copy_abort(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)653 xennetback_tx_copy_abort(struct ifnet *ifp, struct xnetback_instance *xneti,
654 	int queued)
655 {
656 	struct xnetback_xstate *xst;
657 
658 	for (int i = 0; i < queued; i++) {
659 		xst = &xneti->xni_xstate[i];
660 
661 		if (xst->xs_loaded) {
662 			KASSERT(xst->xs_m != NULL);
663 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
664 			    xst->xs_dmamap);
665 			xst->xs_loaded = false;
666 			m_freem(xst->xs_m);
667 		}
668 
669 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_ERROR);
670 		if_statinc(ifp, if_ierrors);
671 	}
672 }
673 
674 static void
xennetback_tx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)675 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
676 	int queued)
677 {
678 	gnttab_copy_t *gop;
679 	struct xnetback_xstate *xst;
680 	int copycnt = 0, seg = 0;
681 	size_t goff = 0, segoff = 0, gsize, take;
682 	bus_dmamap_t dm = NULL;
683 	paddr_t ma;
684 
685 	for (int i = 0; i < queued; i++) {
686 		xst = &xneti->xni_xstate[i];
687 
688 		if (xst->xs_m != NULL) {
689 			KASSERT(xst->xs_m->m_pkthdr.len == xst->xs_tx.size);
690 			if (__predict_false(bus_dmamap_load_mbuf(
691 			    xneti->xni_xbusd->xbusd_dmat,
692 			    xst->xs_dmamap, xst->xs_m, BUS_DMA_NOWAIT) != 0))
693 				goto abort;
694 			xst->xs_loaded = true;
695 			dm = xst->xs_dmamap;
696 			seg = 0;
697 			goff = segoff = 0;
698 		}
699 
700 		gsize = xst->xs_tx_size;
701 		goff = 0;
702 		for (; seg < dm->dm_nsegs && gsize > 0; seg++) {
703 			bus_dma_segment_t *ds = &dm->dm_segs[seg];
704 			ma = ds->ds_addr;
705 			take = uimin(gsize, ds->ds_len);
706 
707 			KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
708 			if (copycnt == NB_XMIT_PAGES_BATCH) {
709 				if (xennetback_copy(ifp, xneti->xni_gop_copy,
710 				    copycnt, "Tx") != 0)
711 					goto abort;
712 				copycnt = 0;
713 			}
714 
715 			/* Queue for the copy */
716 			gop = &xneti->xni_gop_copy[copycnt++];
717 			memset(gop, 0, sizeof(*gop));
718 			gop->flags = GNTCOPY_source_gref;
719 			gop->len = take;
720 
721 			gop->source.u.ref = xst->xs_tx.gref;
722 			gop->source.offset = xst->xs_tx.offset + goff;
723 			gop->source.domid = xneti->xni_domid;
724 
725 			gop->dest.offset = (ma & PAGE_MASK) + segoff;
726 			KASSERT(gop->dest.offset <= PAGE_SIZE);
727 			gop->dest.domid = DOMID_SELF;
728 			gop->dest.u.gmfn = ma >> PAGE_SHIFT;
729 
730 			goff += take;
731 			gsize -= take;
732 			if (take + segoff < ds->ds_len) {
733 				segoff += take;
734 				/* Segment not completely consumed yet */
735 				break;
736 			}
737 			segoff = 0;
738 		}
739 		KASSERT(gsize == 0);
740 		KASSERT(goff == xst->xs_tx_size);
741 	}
742 	if (copycnt > 0) {
743 		if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Tx"))
744 			goto abort;
745 		copycnt = 0;
746 	}
747 
748 	/* If we got here, the whole copy was successful */
749 	for (int i = 0; i < queued; i++) {
750 		xst = &xneti->xni_xstate[i];
751 
752 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_OKAY);
753 
754 		if (xst->xs_m != NULL) {
755 			KASSERT(xst->xs_loaded);
756 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
757 			    xst->xs_dmamap);
758 
759 			if (xst->xs_tx.flags & NETTXF_csum_blank) {
760 				xennet_checksum_fill(ifp, xst->xs_m,
761 				    &xneti->xni_cnt_rx_cksum_blank,
762 				    &xneti->xni_cnt_rx_cksum_undefer);
763 			} else if (xst->xs_tx.flags & NETTXF_data_validated) {
764 				xst->xs_m->m_pkthdr.csum_flags =
765 				    XN_M_CSUM_SUPPORTED;
766 			}
767 			m_set_rcvif(xst->xs_m, ifp);
768 
769 			if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m);
770 		}
771 	}
772 
773 	return;
774 
775 abort:
776 	xennetback_tx_copy_abort(ifp, xneti, queued);
777 }
778 
779 static int
xennetback_tx_m0len_fragment(struct xnetback_instance * xneti,int m0_len,int req_cons,int * cntp)780 xennetback_tx_m0len_fragment(struct xnetback_instance *xneti,
781     int m0_len, int req_cons, int *cntp)
782 {
783 	netif_tx_request_t *txreq;
784 
785 	/* This assumes all the requests are already pushed into the ring */
786 	*cntp = 1;
787 	do {
788 		txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
789 		KASSERT(m0_len > txreq->size);
790 		m0_len -= txreq->size;
791 		req_cons++;
792 		(*cntp)++;
793 	} while (txreq->flags & NETTXF_more_data);
794 
795 	return m0_len;
796 }
797 
798 static int
xennetback_evthandler(void * arg)799 xennetback_evthandler(void *arg)
800 {
801 	struct xnetback_instance *xneti = arg;
802 	struct ifnet *ifp = &xneti->xni_if;
803 	netif_tx_request_t txreq;
804 	struct mbuf *m, *m0 = NULL, *mlast = NULL;
805 	int receive_pending;
806 	RING_IDX req_cons, req_prod;
807 	int queued = 0, m0_len = 0;
808 	struct xnetback_xstate *xst;
809 	const bool discard = ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
810 	    (IFF_UP | IFF_RUNNING));
811 
812 	XENPRINTF(("xennetback_evthandler "));
813 	req_cons = xneti->xni_txring.req_cons;
814 again:
815 	req_prod = xneti->xni_txring.sring->req_prod;
816 	xen_rmb();
817 	while (req_cons != req_prod) {
818 		RING_COPY_REQUEST(&xneti->xni_txring, req_cons,
819 		    &txreq);
820 		XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
821 		    txreq.size));
822 		req_cons++;
823 		if (__predict_false(discard)) {
824 			/* interface not up, drop all requests */
825 			if_statinc(ifp, if_iqdrops);
826 			xennetback_tx_response(xneti, txreq.id,
827 			    NETIF_RSP_DROPPED);
828 			continue;
829 		}
830 
831 		/*
832 		 * Do some sanity checks, and queue copy of the data.
833 		 */
834 		const char *msg = xennetback_tx_check_packet(&txreq);
835 		if (__predict_false(msg != NULL)) {
836 			printf("%s: packet with size %d is %s\n",
837 			    ifp->if_xname, txreq.size, msg);
838 			xennetback_tx_response(xneti, txreq.id,
839 			    NETIF_RSP_ERROR);
840 			if_statinc(ifp, if_ierrors);
841 			continue;
842 		}
843 
844 		/* get a mbuf for this fragment */
845 		MGETHDR(m, M_DONTWAIT, MT_DATA);
846 		if (__predict_false(m == NULL)) {
847 			static struct timeval lasttime;
848 mbuf_fail:
849 			if (ratecheck(&lasttime, &xni_pool_errintvl))
850 				printf("%s: mbuf alloc failed\n",
851 				    ifp->if_xname);
852 			xennetback_tx_copy_abort(ifp, xneti, queued);
853 			queued = 0;
854 			m0 = NULL;
855 			xennetback_tx_response(xneti, txreq.id,
856 			    NETIF_RSP_DROPPED);
857 			if_statinc(ifp, if_ierrors);
858 			continue;
859 		}
860 		m->m_len = m->m_pkthdr.len = txreq.size;
861 
862 		if (!m0 && (txreq.flags & NETTXF_more_data)) {
863 			/*
864 			 * The first fragment of multi-fragment Tx request
865 			 * contains total size. Need to read whole
866 			 * chain to determine actual size of the first
867 			 * (i.e. current) fragment.
868 			 */
869 			int cnt;
870 			m0_len = xennetback_tx_m0len_fragment(xneti,
871 			    txreq.size, req_cons, &cnt);
872 			m->m_len = m0_len;
873 			KASSERT(cnt <= XEN_NETIF_NR_SLOTS_MIN);
874 
875 			if (queued + cnt >= NB_XMIT_PAGES_BATCH) {
876 				/*
877 				 * Flush queue if too full to fit this
878 				 * new packet whole.
879 				 */
880 				KASSERT(m0 == NULL);
881 				xennetback_tx_copy_process(ifp, xneti, queued);
882 				queued = 0;
883 			}
884 		}
885 
886 		if (m->m_len > MHLEN) {
887 			MCLGET(m, M_DONTWAIT);
888 			if (__predict_false((m->m_flags & M_EXT) == 0)) {
889 				m_freem(m);
890 				goto mbuf_fail;
891 			}
892 			if (__predict_false(m->m_len > MCLBYTES)) {
893 				/* one more mbuf necessary */
894 				struct mbuf *mn;
895 				MGET(mn, M_DONTWAIT, MT_DATA);
896 				if (__predict_false(mn == NULL)) {
897 					m_freem(m);
898 					goto mbuf_fail;
899 				}
900 				if (m->m_len - MCLBYTES > MLEN) {
901 					MCLGET(mn, M_DONTWAIT);
902 					if ((mn->m_flags & M_EXT) == 0) {
903 						m_freem(mn);
904 						m_freem(m);
905 						goto mbuf_fail;
906 					}
907 				}
908 				mn->m_len = m->m_len - MCLBYTES;
909 				m->m_len = MCLBYTES;
910 				m->m_next = mn;
911 				KASSERT(mn->m_len <= MCLBYTES);
912 			}
913 			KASSERT(m->m_len <= MCLBYTES);
914 		}
915 
916 		if (m0 || (txreq.flags & NETTXF_more_data)) {
917 			if (m0 == NULL) {
918 				m0 = m;
919 				mlast = (m->m_next) ? m->m_next : m;
920 				KASSERT(mlast->m_next == NULL);
921 			} else {
922 				/* Coalesce like m_cat(), but without copy */
923 				KASSERT(mlast != NULL);
924 				if (M_TRAILINGSPACE(mlast) >= m->m_pkthdr.len) {
925 					mlast->m_len +=  m->m_pkthdr.len;
926 					m_freem(m);
927 				} else {
928 					mlast->m_next = m;
929 					mlast = (m->m_next) ? m->m_next : m;
930 					KASSERT(mlast->m_next == NULL);
931 				}
932 			}
933 		}
934 
935 		XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
936 		    xneti->xni_if.if_xname, txreq.offset,
937 		    txreq.size, txreq.id, req_cons & (RING_SIZE(&xneti->xni_txring) - 1)));
938 
939 		xst = &xneti->xni_xstate[queued];
940 		xst->xs_m = (m0 == NULL || m == m0) ? m : NULL;
941 		xst->xs_tx = txreq;
942 		/* Fill the length of _this_ fragment */
943 		xst->xs_tx_size = (m == m0) ? m0_len : m->m_pkthdr.len;
944 		queued++;
945 
946 		KASSERT(queued <= NB_XMIT_PAGES_BATCH);
947 		if (__predict_false(m0 &&
948 		    (txreq.flags & NETTXF_more_data) == 0)) {
949 			/* Last fragment, stop appending mbufs */
950 			m0 = NULL;
951 		}
952 		if (queued == NB_XMIT_PAGES_BATCH) {
953 			KASSERT(m0 == NULL);
954 			xennetback_tx_copy_process(ifp, xneti, queued);
955 			queued = 0;
956 		}
957 	}
958 	xen_wmb();
959 	RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, receive_pending);
960 	if (receive_pending)
961 		goto again;
962 	xneti->xni_txring.req_cons = req_cons;
963 
964 	if (m0) {
965 		/* Queue empty, and still unfinished multi-fragment request */
966 		printf("%s: dropped unfinished multi-fragment\n",
967 		    ifp->if_xname);
968 		xennetback_tx_copy_abort(ifp, xneti, queued);
969 		queued = 0;
970 		m0 = NULL;
971 	}
972 	if (queued > 0)
973 		xennetback_tx_copy_process(ifp, xneti, queued);
974 
975 	/* check to see if we can transmit more packets */
976 	if_schedule_deferred_start(ifp);
977 
978 	return 1;
979 }
980 
981 static int
xennetback_ifioctl(struct ifnet * ifp,u_long cmd,void * data)982 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
983 {
984 	//struct xnetback_instance *xneti = ifp->if_softc;
985 	//struct ifreq *ifr = (struct ifreq *)data;
986 	int s, error;
987 
988 	s = splnet();
989 	error = ether_ioctl(ifp, cmd, data);
990 	if (error == ENETRESET)
991 		error = 0;
992 	splx(s);
993 	return error;
994 }
995 
996 static void
xennetback_ifstart(struct ifnet * ifp)997 xennetback_ifstart(struct ifnet *ifp)
998 {
999 	struct xnetback_instance *xneti = ifp->if_softc;
1000 
1001 	/*
1002 	 * The Xen communication channel is much more efficient if we can
1003 	 * schedule batch of packets for the domain. Deferred start by network
1004 	 * stack will enqueue all pending mbufs in the interface's send queue
1005 	 * before it is processed by the soft interrupt handler.
1006 	 */
1007 	xennetback_ifsoftstart_copy(xneti);
1008 }
1009 
1010 static void
xennetback_rx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued,int copycnt)1011 xennetback_rx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
1012 	int queued, int copycnt)
1013 {
1014 	int notify;
1015 	struct xnetback_xstate *xst;
1016 
1017 	if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Rx") != 0) {
1018 		/* message already displayed */
1019 		goto free_mbufs;
1020 	}
1021 
1022 	/* update pointer */
1023 	xneti->xni_rxring.req_cons += queued;
1024 	xneti->xni_rxring.rsp_prod_pvt += queued;
1025 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_rxring, notify);
1026 
1027 	/* send event */
1028 	if (notify) {
1029 		xen_wmb();
1030 		XENPRINTF(("%s receive event\n",
1031 		    xneti->xni_if.if_xname));
1032 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1033 	}
1034 
1035 free_mbufs:
1036 	/* now that data was copied we can free the mbufs */
1037 	for (int j = 0; j < queued; j++) {
1038 		xst = &xneti->xni_xstate[j];
1039 		if (xst->xs_loaded) {
1040 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1041 			    xst->xs_dmamap);
1042 			xst->xs_loaded = false;
1043 		}
1044 		if (xst->xs_m != NULL) {
1045 			m_freem(xst->xs_m);
1046 			xst->xs_m = NULL;
1047 		}
1048 	}
1049 }
1050 
1051 static void
xennetback_rx_copy_queue(struct xnetback_instance * xneti,struct xnetback_xstate * xst0,int rsp_prod_pvt,int * queued,int * copycntp)1052 xennetback_rx_copy_queue(struct xnetback_instance *xneti,
1053     struct xnetback_xstate *xst0, int rsp_prod_pvt, int *queued, int *copycntp)
1054 {
1055 	struct xnetback_xstate *xst = xst0;
1056 	gnttab_copy_t *gop;
1057 	struct netif_rx_request rxreq;
1058 	netif_rx_response_t *rxresp;
1059 	paddr_t ma;
1060 	size_t goff, segoff, segsize, take, totsize;
1061 	int copycnt = *copycntp, reqcnt = *queued;
1062 	const bus_dmamap_t dm = xst0->xs_dmamap;
1063 	const bool multiseg = (dm->dm_nsegs > 1);
1064 
1065 	KASSERT(xst0 == &xneti->xni_xstate[reqcnt]);
1066 
1067 	RING_COPY_REQUEST(&xneti->xni_rxring,
1068 	    xneti->xni_rxring.req_cons + reqcnt, &rxreq);
1069 	goff = 0;
1070 	rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, rsp_prod_pvt + reqcnt);
1071 	reqcnt++;
1072 
1073 	rxresp->id = rxreq.id;
1074 	rxresp->offset = 0;
1075 	if ((xst0->xs_m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) != 0) {
1076 		rxresp->flags = NETRXF_csum_blank;
1077 	} else {
1078 		rxresp->flags = NETRXF_data_validated;
1079 	}
1080 	if (multiseg)
1081 		rxresp->flags |= NETRXF_more_data;
1082 
1083 	totsize = xst0->xs_m->m_pkthdr.len;
1084 
1085 	/*
1086 	 * Arrange for the mbuf contents to be copied into one or more
1087 	 * provided memory pages.
1088 	 */
1089 	for (int seg = 0; seg < dm->dm_nsegs; seg++) {
1090 		ma = dm->dm_segs[seg].ds_addr;
1091 		segsize = dm->dm_segs[seg].ds_len;
1092 		segoff = 0;
1093 
1094 		while (segoff < segsize) {
1095 			take = uimin(PAGE_SIZE - goff, segsize - segoff);
1096 			KASSERT(take <= totsize);
1097 
1098 			/* add copy request */
1099 			gop = &xneti->xni_gop_copy[copycnt++];
1100 			gop->flags = GNTCOPY_dest_gref;
1101 			gop->source.offset = (ma & PAGE_MASK) + segoff;
1102 			gop->source.domid = DOMID_SELF;
1103 			gop->source.u.gmfn = ma >> PAGE_SHIFT;
1104 
1105 			gop->dest.u.ref = rxreq.gref;
1106 			gop->dest.offset = goff;
1107 			gop->dest.domid = xneti->xni_domid;
1108 
1109 			gop->len = take;
1110 
1111 			segoff += take;
1112 			goff += take;
1113 			totsize -= take;
1114 
1115 			if (goff == PAGE_SIZE && totsize > 0) {
1116 				rxresp->status = goff;
1117 
1118 				/* Take next grant */
1119 				RING_COPY_REQUEST(&xneti->xni_rxring,
1120 				    xneti->xni_rxring.req_cons + reqcnt,
1121 				    &rxreq);
1122 				goff = 0;
1123 				rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1124 				    rsp_prod_pvt + reqcnt);
1125 				reqcnt++;
1126 
1127 				rxresp->id = rxreq.id;
1128 				rxresp->offset = 0;
1129 				rxresp->flags = NETRXF_more_data;
1130 
1131 				xst++;
1132 				xst->xs_m = NULL;
1133 			}
1134 		}
1135 	}
1136 	rxresp->flags &= ~NETRXF_more_data;
1137 	rxresp->status = goff;
1138 	KASSERT(totsize == 0);
1139 
1140 	KASSERT(copycnt > *copycntp);
1141 	KASSERT(reqcnt > *queued);
1142 	*copycntp = copycnt;
1143 	*queued = reqcnt;
1144 }
1145 
1146 static void
xennetback_ifsoftstart_copy(struct xnetback_instance * xneti)1147 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti)
1148 {
1149 	struct ifnet *ifp = &xneti->xni_if;
1150 	struct mbuf *m;
1151 	int queued = 0;
1152 	RING_IDX req_prod, rsp_prod_pvt;
1153 	struct xnetback_xstate *xst;
1154 	int copycnt = 0;
1155 	bool abort;
1156 
1157 	XENPRINTF(("xennetback_ifsoftstart_copy "));
1158 	int s = splnet();
1159 	if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) {
1160 		splx(s);
1161 		return;
1162 	}
1163 
1164 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1165 		XENPRINTF(("pkt\n"));
1166 		req_prod = xneti->xni_rxring.sring->req_prod;
1167 		rsp_prod_pvt = xneti->xni_rxring.rsp_prod_pvt;
1168 		xen_rmb();
1169 
1170 		abort = false;
1171 		KASSERT(queued == 0);
1172 		KASSERT(copycnt == 0);
1173 		while (copycnt < NB_XMIT_PAGES_BATCH) {
1174 #define XN_RING_FULL(cnt)	\
1175 			req_prod == xneti->xni_rxring.req_cons + (cnt) ||  \
1176 			xneti->xni_rxring.req_cons - (rsp_prod_pvt + cnt) ==  \
1177 			NET_RX_RING_SIZE
1178 
1179 			if (__predict_false(XN_RING_FULL(1))) {
1180 				/* out of ring space */
1181 				XENPRINTF(("xennetback_ifstart: ring full "
1182 				    "req_prod 0x%x req_cons 0x%x rsp_prod_pvt "
1183 				    "0x%x\n",
1184 				    req_prod,
1185 				    xneti->xni_rxring.req_cons + queued,
1186 				    rsp_prod_pvt + queued));
1187 				abort = true;
1188 				break;
1189 			}
1190 
1191 			IFQ_DEQUEUE(&ifp->if_snd, m);
1192 			if (m == NULL)
1193 				break;
1194 
1195 again:
1196 			xst = &xneti->xni_xstate[queued];
1197 
1198 			/*
1199 			 * For short packets it's always way faster passing
1200 			 * single defragmented packet, even with feature-sg.
1201 			 * Try to defragment first if the result is likely
1202 			 * to fit into a single mbuf.
1203 			 */
1204 			if (m->m_pkthdr.len < MCLBYTES && m->m_next)
1205 				(void)m_defrag(m, M_DONTWAIT);
1206 
1207 			if (bus_dmamap_load_mbuf(
1208 			    xneti->xni_xbusd->xbusd_dmat,
1209 			    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0) {
1210 				if (m_defrag(m, M_DONTWAIT) == NULL) {
1211 					m_freem(m);
1212 					static struct timeval lasttime;
1213 					if (ratecheck(&lasttime, &xni_pool_errintvl))
1214 						printf("%s: fail defrag mbuf\n",
1215 						    ifp->if_xname);
1216 					continue;
1217 				}
1218 
1219 				if (__predict_false(bus_dmamap_load_mbuf(
1220 				    xneti->xni_xbusd->xbusd_dmat,
1221 				    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1222 					printf("%s: cannot load mbuf\n",
1223 					    ifp->if_xname);
1224 					m_freem(m);
1225 					continue;
1226 				}
1227 			}
1228 			KASSERT(xst->xs_dmamap->dm_nsegs < NB_XMIT_PAGES_BATCH);
1229 			KASSERTMSG(queued <= copycnt, "queued %d > copycnt %d",
1230 			    queued, copycnt);
1231 
1232 			if (__predict_false(XN_RING_FULL(
1233 			    xst->xs_dmamap->dm_nsegs))) {
1234 				/* Ring too full to fit the packet */
1235 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1236 				    xst->xs_dmamap);
1237 				m_freem(m);
1238 				abort = true;
1239 				break;
1240 			}
1241 			if (__predict_false(copycnt + xst->xs_dmamap->dm_nsegs >
1242 			    NB_XMIT_PAGES_BATCH)) {
1243 				/* Batch already too full, flush and retry */
1244 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1245 				    xst->xs_dmamap);
1246 				xennetback_rx_copy_process(ifp, xneti, queued,
1247 				    copycnt);
1248 				queued = copycnt = 0;
1249 				goto again;
1250 			}
1251 
1252 			/* Now committed to send */
1253 			xst->xs_loaded = true;
1254 			xst->xs_m = m;
1255 			xennetback_rx_copy_queue(xneti, xst,
1256 			    rsp_prod_pvt, &queued, &copycnt);
1257 
1258 			if_statinc(ifp, if_opackets);
1259 			bpf_mtap(ifp, m, BPF_D_OUT);
1260 		}
1261 		KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
1262 		KASSERT(queued <= copycnt);
1263 		if (copycnt > 0) {
1264 			xennetback_rx_copy_process(ifp, xneti, queued, copycnt);
1265 			queued = copycnt = 0;
1266 		}
1267 		/*
1268 		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1269 		 * here, as the frontend doesn't notify when adding
1270 		 * requests anyway
1271 		 */
1272 		if (__predict_false(abort ||
1273 		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1274 			/* ring full */
1275 			ifp->if_timer = 1;
1276 			break;
1277 		}
1278 	}
1279 	splx(s);
1280 }
1281 
1282 static void
xennetback_ifwatchdog(struct ifnet * ifp)1283 xennetback_ifwatchdog(struct ifnet * ifp)
1284 {
1285 	/*
1286 	 * We can get to the following condition: transmit stalls because the
1287 	 * ring is full when the ifq is full too.
1288 	 *
1289 	 * In this case (as, unfortunately, we don't get an interrupt from xen
1290 	 * on transmit) nothing will ever call xennetback_ifstart() again.
1291 	 * Here we abuse the watchdog to get out of this condition.
1292 	 */
1293 	XENPRINTF(("xennetback_ifwatchdog\n"));
1294 	xennetback_ifstart(ifp);
1295 }
1296 
1297 static int
xennetback_ifinit(struct ifnet * ifp)1298 xennetback_ifinit(struct ifnet *ifp)
1299 {
1300 	struct xnetback_instance *xneti = ifp->if_softc;
1301 	int s = splnet();
1302 
1303 	if ((ifp->if_flags & IFF_UP) == 0) {
1304 		splx(s);
1305 		return 0;
1306 	}
1307 	if (xneti->xni_status == CONNECTED)
1308 		ifp->if_flags |= IFF_RUNNING;
1309 	splx(s);
1310 	return 0;
1311 }
1312 
1313 static void
xennetback_ifstop(struct ifnet * ifp,int disable)1314 xennetback_ifstop(struct ifnet *ifp, int disable)
1315 {
1316 	struct xnetback_instance *xneti = ifp->if_softc;
1317 	int s = splnet();
1318 
1319 	ifp->if_flags &= ~IFF_RUNNING;
1320 	ifp->if_timer = 0;
1321 	if (xneti->xni_status == CONNECTED) {
1322 		xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1323 	}
1324 	splx(s);
1325 }
1326