xref: /openbsd/sys/dev/pv/if_vio.c (revision 5a38ef86)
1 /*	$OpenBSD: if_vio.c,v 1.20 2021/11/05 11:38:29 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
5  * Copyright (c) 2010 Minoura Makoto.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "bpfilter.h"
30 #include "vlan.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/device.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <sys/timeout.h>
40 
41 #include <dev/pv/virtioreg.h>
42 #include <dev/pv/virtiovar.h>
43 
44 #include <net/if.h>
45 #include <net/if_media.h>
46 
47 #include <netinet/in.h>
48 #include <netinet/if_ether.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp.h>
51 #include <netinet/udp.h>
52 
53 #if NBPFILTER > 0
54 #include <net/bpf.h>
55 #endif
56 
57 #if VIRTIO_DEBUG
58 #define DPRINTF(x...) printf(x)
59 #else
60 #define DPRINTF(x...)
61 #endif
62 
63 /*
64  * if_vioreg.h:
65  */
66 /* Configuration registers */
67 #define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
68 #define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
69 
70 /* Feature bits */
71 #define VIRTIO_NET_F_CSUM			(1ULL<<0)
72 #define VIRTIO_NET_F_GUEST_CSUM			(1ULL<<1)
73 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS        (1ULL<<2)
74 #define VIRTIO_NET_F_MTU                        (1ULL<<3)
75 #define VIRTIO_NET_F_MAC			(1ULL<<5)
76 #define VIRTIO_NET_F_GSO			(1ULL<<6)
77 #define VIRTIO_NET_F_GUEST_TSO4			(1ULL<<7)
78 #define VIRTIO_NET_F_GUEST_TSO6			(1ULL<<8)
79 #define VIRTIO_NET_F_GUEST_ECN			(1ULL<<9)
80 #define VIRTIO_NET_F_GUEST_UFO			(1ULL<<10)
81 #define VIRTIO_NET_F_HOST_TSO4			(1ULL<<11)
82 #define VIRTIO_NET_F_HOST_TSO6			(1ULL<<12)
83 #define VIRTIO_NET_F_HOST_ECN			(1ULL<<13)
84 #define VIRTIO_NET_F_HOST_UFO			(1ULL<<14)
85 #define VIRTIO_NET_F_MRG_RXBUF			(1ULL<<15)
86 #define VIRTIO_NET_F_STATUS			(1ULL<<16)
87 #define VIRTIO_NET_F_CTRL_VQ			(1ULL<<17)
88 #define VIRTIO_NET_F_CTRL_RX			(1ULL<<18)
89 #define VIRTIO_NET_F_CTRL_VLAN			(1ULL<<19)
90 #define VIRTIO_NET_F_CTRL_RX_EXTRA		(1ULL<<20)
91 #define VIRTIO_NET_F_GUEST_ANNOUNCE		(1ULL<<21)
92 #define VIRTIO_NET_F_MQ				(1ULL<<22)
93 #define VIRTIO_NET_F_CTRL_MAC_ADDR		(1ULL<<23)
94 
95 /*
96  * Config(8) flags. The lowest byte is reserved for generic virtio stuff.
97  */
98 
99 /* Workaround for vlan related bug in qemu < version 2.0 */
100 #define CONFFLAG_QEMU_VLAN_BUG		(1<<8)
101 
102 static const struct virtio_feature_name virtio_net_feature_names[] = {
103 #if VIRTIO_DEBUG
104 	{ VIRTIO_NET_F_CSUM,			"CSum" },
105 	{ VIRTIO_NET_F_GUEST_CSUM,		"GuestCSum" },
106 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlGuestOffl" },
107 	{ VIRTIO_NET_F_MTU,			"MTU", },
108 	{ VIRTIO_NET_F_MAC,			"MAC" },
109 	{ VIRTIO_NET_F_GSO,			"GSO" },
110 	{ VIRTIO_NET_F_GUEST_TSO4,		"GuestTSO4" },
111 	{ VIRTIO_NET_F_GUEST_TSO6,		"GuestTSO6" },
112 	{ VIRTIO_NET_F_GUEST_ECN,		"GuestECN" },
113 	{ VIRTIO_NET_F_GUEST_UFO,		"GuestUFO" },
114 	{ VIRTIO_NET_F_HOST_TSO4,		"HostTSO4" },
115 	{ VIRTIO_NET_F_HOST_TSO6,		"HostTSO6" },
116 	{ VIRTIO_NET_F_HOST_ECN,		"HostECN" },
117 	{ VIRTIO_NET_F_HOST_UFO,		"HostUFO" },
118 	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRXBuf" },
119 	{ VIRTIO_NET_F_STATUS,			"Status" },
120 	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVQ" },
121 	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRX" },
122 	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLAN" },
123 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRXExtra" },
124 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce" },
125 	{ VIRTIO_NET_F_MQ,			"MQ" },
126 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMAC" },
127 #endif
128 	{ 0,				NULL }
129 };
130 
131 /* Status */
132 #define VIRTIO_NET_S_LINK_UP	1
133 
134 /* Packet header structure */
135 struct virtio_net_hdr {
136 	uint8_t		flags;
137 	uint8_t		gso_type;
138 	uint16_t	hdr_len;
139 	uint16_t	gso_size;
140 	uint16_t	csum_start;
141 	uint16_t	csum_offset;
142 
143 	/* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */
144 	uint16_t	num_buffers;
145 } __packed;
146 
147 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
148 #define VIRTIO_NET_HDR_GSO_NONE		0 /* gso_type */
149 #define VIRTIO_NET_HDR_GSO_TCPV4	1 /* gso_type */
150 #define VIRTIO_NET_HDR_GSO_UDP		3 /* gso_type */
151 #define VIRTIO_NET_HDR_GSO_TCPV6	4 /* gso_type */
152 #define VIRTIO_NET_HDR_GSO_ECN		0x80 /* gso_type, |'ed */
153 
154 #define VIRTIO_NET_MAX_GSO_LEN		(65536+ETHER_HDR_LEN)
155 
156 /* Control virtqueue */
157 struct virtio_net_ctrl_cmd {
158 	uint8_t	class;
159 	uint8_t	command;
160 } __packed;
161 #define VIRTIO_NET_CTRL_RX		0
162 # define VIRTIO_NET_CTRL_RX_PROMISC	0
163 # define VIRTIO_NET_CTRL_RX_ALLMULTI	1
164 
165 #define VIRTIO_NET_CTRL_MAC		1
166 # define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
167 
168 #define VIRTIO_NET_CTRL_VLAN		2
169 # define VIRTIO_NET_CTRL_VLAN_ADD	0
170 # define VIRTIO_NET_CTRL_VLAN_DEL	1
171 
172 struct virtio_net_ctrl_status {
173 	uint8_t	ack;
174 } __packed;
175 #define VIRTIO_NET_OK			0
176 #define VIRTIO_NET_ERR			1
177 
178 struct virtio_net_ctrl_rx {
179 	uint8_t	onoff;
180 } __packed;
181 
182 struct virtio_net_ctrl_mac_tbl {
183 	uint32_t nentries;
184 	uint8_t macs[][ETHER_ADDR_LEN];
185 } __packed;
186 
187 struct virtio_net_ctrl_vlan {
188 	uint16_t id;
189 } __packed;
190 
191 /*
192  * if_viovar.h:
193  */
194 enum vio_ctrl_state {
195 	FREE, INUSE, DONE, RESET
196 };
197 
198 struct vio_softc {
199 	struct device		sc_dev;
200 
201 	struct virtio_softc	*sc_virtio;
202 #define	VQRX	0
203 #define	VQTX	1
204 #define	VQCTL	2
205 	struct virtqueue	sc_vq[3];
206 
207 	struct arpcom		sc_ac;
208 	struct ifmedia		sc_media;
209 
210 	short			sc_ifflags;
211 
212 	/* bus_dmamem */
213 	bus_dma_segment_t	sc_dma_seg;
214 	bus_dmamap_t		sc_dma_map;
215 	size_t			sc_dma_size;
216 	caddr_t			sc_dma_kva;
217 
218 	int			sc_hdr_size;
219 	struct virtio_net_hdr	*sc_tx_hdrs;
220 	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
221 	struct virtio_net_ctrl_status *sc_ctrl_status;
222 	struct virtio_net_ctrl_rx *sc_ctrl_rx;
223 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
224 #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
225 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
226 
227 	/* kmem */
228 	bus_dmamap_t		*sc_arrays;
229 #define sc_rx_dmamaps sc_arrays
230 	bus_dmamap_t		*sc_tx_dmamaps;
231 	struct mbuf		**sc_rx_mbufs;
232 	struct mbuf		**sc_tx_mbufs;
233 	struct if_rxring	sc_rx_ring;
234 
235 	enum vio_ctrl_state	sc_ctrl_inuse;
236 
237 	struct timeout		sc_txtick, sc_rxtick;
238 };
239 
240 #define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva)
241 #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)		\
242 	bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,	\
243 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
244 #define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write)	\
245 	virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map,	\
246 	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (write))
247 #define VIO_HAVE_MRG_RXBUF(sc)					\
248 	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
249 
250 #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
251 #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
252 #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
253 
254 #define VIO_CTRL_MAC_INFO_SIZE					\
255 	(2*sizeof(struct virtio_net_ctrl_mac_tbl) +		\
256 	 (VIRTIO_NET_CTRL_MAC_MC_ENTRIES +			\
257 	  VIRTIO_NET_CTRL_MAC_UC_ENTRIES) * ETHER_ADDR_LEN)
258 
259 /* cfattach interface functions */
260 int	vio_match(struct device *, void *, void *);
261 void	vio_attach(struct device *, struct device *, void *);
262 
263 /* ifnet interface functions */
264 int	vio_init(struct ifnet *);
265 void	vio_stop(struct ifnet *, int);
266 void	vio_start(struct ifnet *);
267 int	vio_ioctl(struct ifnet *, u_long, caddr_t);
268 void	vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc);
269 void	vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc);
270 
271 /* rx */
272 int	vio_add_rx_mbuf(struct vio_softc *, int);
273 void	vio_free_rx_mbuf(struct vio_softc *, int);
274 void	vio_populate_rx_mbufs(struct vio_softc *);
275 int	vio_rxeof(struct vio_softc *);
276 int	vio_rx_intr(struct virtqueue *);
277 void	vio_rx_drain(struct vio_softc *);
278 void	vio_rxtick(void *);
279 
280 /* tx */
281 int	vio_tx_intr(struct virtqueue *);
282 int	vio_txeof(struct virtqueue *);
283 void	vio_tx_drain(struct vio_softc *);
284 int	vio_encap(struct vio_softc *, int, struct mbuf *);
285 void	vio_txtick(void *);
286 
287 /* other control */
288 void	vio_link_state(struct ifnet *);
289 int	vio_config_change(struct virtio_softc *);
290 int	vio_ctrl_rx(struct vio_softc *, int, int);
291 int	vio_set_rx_filter(struct vio_softc *);
292 void	vio_iff(struct vio_softc *);
293 int	vio_media_change(struct ifnet *);
294 void	vio_media_status(struct ifnet *, struct ifmediareq *);
295 int	vio_ctrleof(struct virtqueue *);
296 int	vio_wait_ctrl(struct vio_softc *sc);
297 int	vio_wait_ctrl_done(struct vio_softc *sc);
298 void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
299 int	vio_alloc_mem(struct vio_softc *);
300 int	vio_alloc_dmamem(struct vio_softc *);
301 void	vio_free_dmamem(struct vio_softc *);
302 
303 #if VIRTIO_DEBUG
304 void	vio_dump(struct vio_softc *);
305 #endif
306 
307 int
308 vio_match(struct device *parent, void *match, void *aux)
309 {
310 	struct virtio_softc *va = aux;
311 
312 	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
313 		return 1;
314 
315 	return 0;
316 }
317 
318 const struct cfattach vio_ca = {
319 	sizeof(struct vio_softc), vio_match, vio_attach, NULL
320 };
321 
322 struct cfdriver vio_cd = {
323 	NULL, "vio", DV_IFNET
324 };
325 
326 int
327 vio_alloc_dmamem(struct vio_softc *sc)
328 {
329 	struct virtio_softc *vsc = sc->sc_virtio;
330 	int nsegs;
331 
332 	if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
333 	    sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
334 	    &sc->sc_dma_map) != 0)
335 		goto err;
336 	if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
337 	    &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
338 		goto destroy;
339 	if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
340 	    sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
341 		goto free;
342 	if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva,
343 	    sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
344 		goto unmap;
345 	return (0);
346 
347 unmap:
348 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
349 free:
350 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
351 destroy:
352 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
353 err:
354 	return (1);
355 }
356 
357 void
358 vio_free_dmamem(struct vio_softc *sc)
359 {
360 	struct virtio_softc *vsc = sc->sc_virtio;
361 	bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map);
362 	bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
363 	bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
364 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
365 }
366 
367 /* allocate memory */
368 /*
369  * dma memory is used for:
370  *   sc_tx_hdrs[slot]:	 metadata array for frames to be sent (WRITE)
371  *   sc_ctrl_cmd:	 command to be sent via ctrl vq (WRITE)
372  *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
373  *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
374  *			 (WRITE)
375  *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
376  *			 class command (WRITE)
377  *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
378  *			 class command (WRITE)
379  * sc_ctrl_* structures are allocated only one each; they are protected by
380  * sc_ctrl_inuse, which must only be accessed at splnet
381  *
382  * metadata headers for received frames are stored at the start of the
383  * rx mbufs.
384  */
385 /*
386  * dynamically allocated memory is used for:
387  *   sc_rx_dmamaps[slot]:	bus_dmamap_t array for received payload
388  *   sc_tx_dmamaps[slot]:	bus_dmamap_t array for sent payload
389  *   sc_rx_mbufs[slot]:		mbuf pointer array for received frames
390  *   sc_tx_mbufs[slot]:		mbuf pointer array for sent frames
391  */
392 int
393 vio_alloc_mem(struct vio_softc *sc)
394 {
395 	struct virtio_softc *vsc = sc->sc_virtio;
396 	struct ifnet *ifp = &sc->sc_ac.ac_if;
397 	int allocsize, r, i, txsize;
398 	unsigned int offset = 0;
399 	int rxqsize, txqsize;
400 	caddr_t kva;
401 
402 	rxqsize = vsc->sc_vqs[0].vq_num;
403 	txqsize = vsc->sc_vqs[1].vq_num;
404 
405 	/*
406 	 * For simplicity, we always allocate the full virtio_net_hdr size
407 	 * even if VIRTIO_NET_F_MRG_RXBUF is not negotiated and
408 	 * only a part of the memory is ever used.
409 	 */
410 	allocsize = sizeof(struct virtio_net_hdr) * txqsize;
411 
412 	if (vsc->sc_nvqs == 3) {
413 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
414 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
415 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
416 		allocsize += VIO_CTRL_MAC_INFO_SIZE;
417 	}
418 	sc->sc_dma_size = allocsize;
419 
420 	if (vio_alloc_dmamem(sc) != 0) {
421 		printf("unable to allocate dma region\n");
422 		return  -1;
423 	}
424 
425 	kva = sc->sc_dma_kva;
426 	sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset);
427 	offset += sizeof(struct virtio_net_hdr) * txqsize;
428 	if (vsc->sc_nvqs == 3) {
429 		sc->sc_ctrl_cmd = (void*)(kva + offset);
430 		offset += sizeof(*sc->sc_ctrl_cmd);
431 		sc->sc_ctrl_status = (void*)(kva + offset);
432 		offset += sizeof(*sc->sc_ctrl_status);
433 		sc->sc_ctrl_rx = (void*)(kva + offset);
434 		offset += sizeof(*sc->sc_ctrl_rx);
435 		sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
436 		offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
437 		    ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
438 		sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset);
439 	}
440 
441 	sc->sc_arrays = mallocarray(rxqsize + txqsize,
442 	    2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF,
443 	    M_WAITOK | M_CANFAIL | M_ZERO);
444 	if (sc->sc_arrays == NULL) {
445 		printf("unable to allocate mem for dmamaps\n");
446 		goto err_hdr;
447 	}
448 	allocsize = (rxqsize + txqsize) *
449 	    (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *));
450 
451 	sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize;
452 	sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
453 	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
454 
455 	for (i = 0; i < rxqsize; i++) {
456 		r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
457 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
458 		if (r != 0)
459 			goto err_reqs;
460 	}
461 
462 	txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
463 	for (i = 0; i < txqsize; i++) {
464 		r = bus_dmamap_create(vsc->sc_dmat, txsize,
465 		    VIRTIO_NET_TX_MAXNSEGS, txsize, 0,
466 		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
467 		    &sc->sc_tx_dmamaps[i]);
468 		if (r != 0)
469 			goto err_reqs;
470 	}
471 
472 	return 0;
473 
474 err_reqs:
475 	printf("dmamap creation failed, error %d\n", r);
476 	for (i = 0; i < txqsize; i++) {
477 		if (sc->sc_tx_dmamaps[i])
478 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
479 	}
480 	for (i = 0; i < rxqsize; i++) {
481 		if (sc->sc_rx_dmamaps[i])
482 			bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]);
483 	}
484 	if (sc->sc_arrays) {
485 		free(sc->sc_arrays, M_DEVBUF, allocsize);
486 		sc->sc_arrays = 0;
487 	}
488 err_hdr:
489 	vio_free_dmamem(sc);
490 	return -1;
491 }
492 
493 void
494 vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc)
495 {
496 	int i;
497 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
498 		ac->ac_enaddr[i] = virtio_read_device_config_1(vsc,
499 		    VIRTIO_NET_CONFIG_MAC + i);
500 	}
501 }
502 
503 void
504 vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc)
505 {
506 	int i;
507 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
508 		virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i,
509 		     ac->ac_enaddr[i]);
510 	}
511 }
512 
513 void
514 vio_attach(struct device *parent, struct device *self, void *aux)
515 {
516 	struct vio_softc *sc = (struct vio_softc *)self;
517 	struct virtio_softc *vsc = (struct virtio_softc *)parent;
518 	int i;
519 	struct ifnet *ifp = &sc->sc_ac.ac_if;
520 
521 	if (vsc->sc_child != NULL) {
522 		printf(": child already attached for %s; something wrong...\n",
523 		    parent->dv_xname);
524 		return;
525 	}
526 
527 	sc->sc_virtio = vsc;
528 
529 	vsc->sc_child = self;
530 	vsc->sc_ipl = IPL_NET;
531 	vsc->sc_vqs = &sc->sc_vq[0];
532 	vsc->sc_config_change = 0;
533 	vsc->sc_driver_features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
534 	    VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX |
535 	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
536 	    VIRTIO_F_RING_EVENT_IDX;
537 
538 	virtio_negotiate_features(vsc, virtio_net_feature_names);
539 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
540 		vio_get_lladr(&sc->sc_ac, vsc);
541 	} else {
542 		ether_fakeaddr(ifp);
543 		vio_put_lladr(&sc->sc_ac, vsc);
544 	}
545 	printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
546 
547 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
548 	    vsc->sc_version_1) {
549 		sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
550 	} else {
551 		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
552 	}
553 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
554 		ifp->if_hardmtu = 16000; /* arbitrary limit */
555 	else
556 		ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
557 
558 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
559 		goto err;
560 	vsc->sc_nvqs = 1;
561 	sc->sc_vq[VQRX].vq_done = vio_rx_intr;
562 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1,
563 	    sc->sc_hdr_size + ifp->if_hardmtu + ETHER_HDR_LEN,
564 	    VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
565 		goto err;
566 	}
567 	vsc->sc_nvqs = 2;
568 	sc->sc_vq[VQTX].vq_done = vio_tx_intr;
569 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
570 	if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
571 		virtio_postpone_intr_far(&sc->sc_vq[VQTX]);
572 	else
573 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
574 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)
575 	    && virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_RX)) {
576 		if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1,
577 		    "control") == 0) {
578 			sc->sc_vq[VQCTL].vq_done = vio_ctrleof;
579 			virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
580 			vsc->sc_nvqs = 3;
581 		}
582 	}
583 
584 	if (vio_alloc_mem(sc) < 0)
585 		goto err;
586 
587 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
588 	ifp->if_softc = sc;
589 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
590 	ifp->if_start = vio_start;
591 	ifp->if_ioctl = vio_ioctl;
592 	ifp->if_capabilities = IFCAP_VLAN_MTU;
593 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
594 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4;
595 	ifq_set_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
596 	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
597 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
598 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
599 	vsc->sc_config_change = vio_config_change;
600 	timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]);
601 	timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]);
602 
603 	if_attach(ifp);
604 	ether_ifattach(ifp);
605 
606 	return;
607 
608 err:
609 	for (i = 0; i < vsc->sc_nvqs; i++)
610 		virtio_free_vq(vsc, &sc->sc_vq[i]);
611 	vsc->sc_nvqs = 0;
612 	vsc->sc_child = VIRTIO_CHILD_ERROR;
613 	return;
614 }
615 
616 /* check link status */
617 void
618 vio_link_state(struct ifnet *ifp)
619 {
620 	struct vio_softc *sc = ifp->if_softc;
621 	struct virtio_softc *vsc = sc->sc_virtio;
622 	int link_state = LINK_STATE_FULL_DUPLEX;
623 
624 	if (virtio_has_feature(vsc, VIRTIO_NET_F_STATUS)) {
625 		int status = virtio_read_device_config_2(vsc,
626 		    VIRTIO_NET_CONFIG_STATUS);
627 		if (!(status & VIRTIO_NET_S_LINK_UP))
628 			link_state = LINK_STATE_DOWN;
629 	}
630 	if (ifp->if_link_state != link_state) {
631 		ifp->if_link_state = link_state;
632 		if_link_state_change(ifp);
633 	}
634 }
635 
636 int
637 vio_config_change(struct virtio_softc *vsc)
638 {
639 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
640 	vio_link_state(&sc->sc_ac.ac_if);
641 	return 1;
642 }
643 
644 int
645 vio_media_change(struct ifnet *ifp)
646 {
647 	/* Ignore */
648 	return (0);
649 }
650 
651 void
652 vio_media_status(struct ifnet *ifp, struct ifmediareq *imr)
653 {
654 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
655 	imr->ifm_status = IFM_AVALID;
656 
657 	vio_link_state(ifp);
658 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP)
659 		imr->ifm_status |= IFM_ACTIVE|IFM_FDX;
660 }
661 
662 /*
663  * Interface functions for ifnet
664  */
665 int
666 vio_init(struct ifnet *ifp)
667 {
668 	struct vio_softc *sc = ifp->if_softc;
669 
670 	vio_stop(ifp, 0);
671 	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
672 	    sc->sc_vq[VQRX].vq_num);
673 	vio_populate_rx_mbufs(sc);
674 	ifp->if_flags |= IFF_RUNNING;
675 	ifq_clr_oactive(&ifp->if_snd);
676 	vio_iff(sc);
677 	vio_link_state(ifp);
678 	return 0;
679 }
680 
681 void
682 vio_stop(struct ifnet *ifp, int disable)
683 {
684 	struct vio_softc *sc = ifp->if_softc;
685 	struct virtio_softc *vsc = sc->sc_virtio;
686 
687 	timeout_del(&sc->sc_txtick);
688 	timeout_del(&sc->sc_rxtick);
689 	ifp->if_flags &= ~IFF_RUNNING;
690 	ifq_clr_oactive(&ifp->if_snd);
691 	/* only way to stop I/O and DMA is resetting... */
692 	virtio_reset(vsc);
693 	vio_rxeof(sc);
694 	if (vsc->sc_nvqs >= 3)
695 		vio_ctrleof(&sc->sc_vq[VQCTL]);
696 	vio_tx_drain(sc);
697 	if (disable)
698 		vio_rx_drain(sc);
699 
700 	virtio_reinit_start(vsc);
701 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
702 	virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
703 	if (vsc->sc_nvqs >= 3)
704 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
705 	virtio_reinit_end(vsc);
706 	if (vsc->sc_nvqs >= 3) {
707 		if (sc->sc_ctrl_inuse != FREE)
708 			sc->sc_ctrl_inuse = RESET;
709 		wakeup(&sc->sc_ctrl_inuse);
710 	}
711 }
712 
713 void
714 vio_start(struct ifnet *ifp)
715 {
716 	struct vio_softc *sc = ifp->if_softc;
717 	struct virtio_softc *vsc = sc->sc_virtio;
718 	struct virtqueue *vq = &sc->sc_vq[VQTX];
719 	struct mbuf *m;
720 	int queued = 0;
721 
722 	vio_txeof(vq);
723 
724 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
725 		return;
726 	if (ifq_empty(&ifp->if_snd))
727 		return;
728 
729 again:
730 	for (;;) {
731 		int slot, r;
732 		struct virtio_net_hdr *hdr;
733 
734 		m = ifq_deq_begin(&ifp->if_snd);
735 		if (m == NULL)
736 			break;
737 
738 		r = virtio_enqueue_prep(vq, &slot);
739 		if (r == EAGAIN) {
740 			ifq_deq_rollback(&ifp->if_snd, m);
741 			ifq_set_oactive(&ifp->if_snd);
742 			break;
743 		}
744 		if (r != 0)
745 			panic("enqueue_prep for a tx buffer: %d", r);
746 
747 		hdr = &sc->sc_tx_hdrs[slot];
748 		memset(hdr, 0, sc->sc_hdr_size);
749 		if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
750 			struct mbuf *mip;
751 			struct ip *ip;
752 			int ehdrlen = ETHER_HDR_LEN;
753 			int ipoff;
754 #if NVLAN > 0
755 			struct ether_vlan_header *eh;
756 
757 			eh = mtod(m, struct ether_vlan_header *);
758 			if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
759 				ehdrlen += ETHER_VLAN_ENCAP_LEN;
760 #endif
761 
762 			if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
763 				hdr->csum_offset = offsetof(struct tcphdr, th_sum);
764 			else
765 				hdr->csum_offset = offsetof(struct udphdr, uh_sum);
766 
767 			mip = m_getptr(m, ehdrlen, &ipoff);
768 			KASSERT(mip != NULL && mip->m_len - ipoff >= sizeof(*ip));
769 			ip = (struct ip *)(mip->m_data + ipoff);
770 			hdr->csum_start = ehdrlen + (ip->ip_hl << 2);
771 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
772 		}
773 
774 		r = vio_encap(sc, slot, m);
775 		if (r != 0) {
776 			virtio_enqueue_abort(vq, slot);
777 			ifq_deq_commit(&ifp->if_snd, m);
778 			m_freem(m);
779 			ifp->if_oerrors++;
780 			continue;
781 		}
782 		r = virtio_enqueue_reserve(vq, slot,
783 		    sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
784 		if (r != 0) {
785 			bus_dmamap_unload(vsc->sc_dmat,
786 			    sc->sc_tx_dmamaps[slot]);
787 			ifq_deq_rollback(&ifp->if_snd, m);
788 			sc->sc_tx_mbufs[slot] = NULL;
789 			ifq_set_oactive(&ifp->if_snd);
790 			break;
791 		}
792 		ifq_deq_commit(&ifp->if_snd, m);
793 
794 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
795 		    sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE);
796 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
797 		    BUS_DMASYNC_PREWRITE);
798 		VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sc->sc_hdr_size, 1);
799 		virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1);
800 		virtio_enqueue_commit(vsc, vq, slot, 0);
801 		queued++;
802 #if NBPFILTER > 0
803 		if (ifp->if_bpf)
804 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
805 #endif
806 	}
807 	if (ifq_is_oactive(&ifp->if_snd)) {
808 		int r;
809 		if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
810 			r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]);
811 		else
812 			r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]);
813 		if (r) {
814 			vio_txeof(vq);
815 			goto again;
816 		}
817 	}
818 
819 	if (queued > 0) {
820 		virtio_notify(vsc, vq);
821 		timeout_add_sec(&sc->sc_txtick, 1);
822 	}
823 }
824 
825 #if VIRTIO_DEBUG
826 void
827 vio_dump(struct vio_softc *sc)
828 {
829 	struct ifnet *ifp = &sc->sc_ac.ac_if;
830 	struct virtio_softc *vsc = sc->sc_virtio;
831 
832 	printf("%s status dump:\n", ifp->if_xname);
833 	printf("TX virtqueue:\n");
834 	virtio_vq_dump(&vsc->sc_vqs[VQTX]);
835 	printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick));
836 	printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick));
837 	printf("RX virtqueue:\n");
838 	virtio_vq_dump(&vsc->sc_vqs[VQRX]);
839 	if (vsc->sc_nvqs == 3) {
840 		printf("CTL virtqueue:\n");
841 		virtio_vq_dump(&vsc->sc_vqs[VQCTL]);
842 		printf("ctrl_inuse: %d\n", sc->sc_ctrl_inuse);
843 	}
844 }
845 #endif
846 
847 int
848 vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
849 {
850 	struct vio_softc *sc = ifp->if_softc;
851 	struct ifreq *ifr = (struct ifreq *)data;
852 	int s, r = 0;
853 
854 	s = splnet();
855 	switch (cmd) {
856 	case SIOCSIFADDR:
857 		ifp->if_flags |= IFF_UP;
858 		if (!(ifp->if_flags & IFF_RUNNING))
859 			vio_init(ifp);
860 		break;
861 	case SIOCSIFFLAGS:
862 		if (ifp->if_flags & IFF_UP) {
863 #if VIRTIO_DEBUG
864 			if (ifp->if_flags & IFF_DEBUG)
865 				vio_dump(sc);
866 #endif
867 			if (ifp->if_flags & IFF_RUNNING)
868 				r = ENETRESET;
869 			else
870 				vio_init(ifp);
871 		} else {
872 			if (ifp->if_flags & IFF_RUNNING)
873 				vio_stop(ifp, 1);
874 		}
875 		break;
876 	case SIOCGIFMEDIA:
877 	case SIOCSIFMEDIA:
878 		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
879 		break;
880 	case SIOCGIFRXR:
881 		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
882 		    NULL, MCLBYTES, &sc->sc_rx_ring);
883 		break;
884 	default:
885 		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
886 	}
887 
888 	if (r == ENETRESET) {
889 		if (ifp->if_flags & IFF_RUNNING)
890 			vio_iff(sc);
891 		r = 0;
892 	}
893 	splx(s);
894 	return r;
895 }
896 
897 /*
898  * Recieve implementation
899  */
900 /* allocate and initialize a mbuf for receive */
901 int
902 vio_add_rx_mbuf(struct vio_softc *sc, int i)
903 {
904 	struct mbuf *m;
905 	int r;
906 
907 	m = MCLGETL(NULL, M_DONTWAIT, MCLBYTES);
908 	if (m == NULL)
909 		return ENOBUFS;
910 	sc->sc_rx_mbufs[i] = m;
911 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
912 	r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i],
913 	    m, BUS_DMA_READ|BUS_DMA_NOWAIT);
914 	if (r) {
915 		m_freem(m);
916 		sc->sc_rx_mbufs[i] = 0;
917 		return r;
918 	}
919 
920 	return 0;
921 }
922 
923 /* free a mbuf for receive */
924 void
925 vio_free_rx_mbuf(struct vio_softc *sc, int i)
926 {
927 	bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
928 	m_freem(sc->sc_rx_mbufs[i]);
929 	sc->sc_rx_mbufs[i] = NULL;
930 }
931 
932 /* add mbufs for all the empty receive slots */
933 void
934 vio_populate_rx_mbufs(struct vio_softc *sc)
935 {
936 	struct virtio_softc *vsc = sc->sc_virtio;
937 	int r, done = 0;
938 	u_int slots;
939 	struct virtqueue *vq = &sc->sc_vq[VQRX];
940 	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
941 
942 	for (slots = if_rxr_get(&sc->sc_rx_ring, vq->vq_num);
943 	    slots > 0; slots--) {
944 		int slot;
945 		r = virtio_enqueue_prep(vq, &slot);
946 		if (r == EAGAIN)
947 			break;
948 		if (r != 0)
949 			panic("enqueue_prep for rx buffers: %d", r);
950 		if (sc->sc_rx_mbufs[slot] == NULL) {
951 			r = vio_add_rx_mbuf(sc, slot);
952 			if (r != 0) {
953 				virtio_enqueue_abort(vq, slot);
954 				break;
955 			}
956 		}
957 		r = virtio_enqueue_reserve(vq, slot,
958 		    sc->sc_rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
959 		if (r != 0) {
960 			vio_free_rx_mbuf(sc, slot);
961 			break;
962 		}
963 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
964 		    MCLBYTES, BUS_DMASYNC_PREREAD);
965 		if (mrg_rxbuf) {
966 			virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0);
967 		} else {
968 			/*
969 			 * Buggy kvm wants a buffer of exactly the size of
970 			 * the header in this case, so we have to split in
971 			 * two.
972 			 */
973 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
974 			    0, sc->sc_hdr_size, 0);
975 			virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
976 			    sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0);
977 		}
978 		virtio_enqueue_commit(vsc, vq, slot, 0);
979 		done = 1;
980 	}
981 	if_rxr_put(&sc->sc_rx_ring, slots);
982 
983 	if (done)
984 		virtio_notify(vsc, vq);
985 	timeout_add_sec(&sc->sc_rxtick, 1);
986 }
987 
988 /* dequeue received packets */
989 int
990 vio_rxeof(struct vio_softc *sc)
991 {
992 	struct virtio_softc *vsc = sc->sc_virtio;
993 	struct virtqueue *vq = &sc->sc_vq[VQRX];
994 	struct ifnet *ifp = &sc->sc_ac.ac_if;
995 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
996 	struct mbuf *m, *m0 = NULL, *mlast;
997 	int r = 0;
998 	int slot, len, bufs_left;
999 	struct virtio_net_hdr *hdr;
1000 
1001 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1002 		r = 1;
1003 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
1004 		    MCLBYTES, BUS_DMASYNC_POSTREAD);
1005 		m = sc->sc_rx_mbufs[slot];
1006 		KASSERT(m != NULL);
1007 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
1008 		sc->sc_rx_mbufs[slot] = NULL;
1009 		virtio_dequeue_commit(vq, slot);
1010 		if_rxr_put(&sc->sc_rx_ring, 1);
1011 		m->m_len = m->m_pkthdr.len = len;
1012 		m->m_pkthdr.csum_flags = 0;
1013 		if (m0 == NULL) {
1014 			hdr = mtod(m, struct virtio_net_hdr *);
1015 			m_adj(m, sc->sc_hdr_size);
1016 			m0 = mlast = m;
1017 			if (VIO_HAVE_MRG_RXBUF(sc))
1018 				bufs_left = hdr->num_buffers - 1;
1019 			else
1020 				bufs_left = 0;
1021 		} else {
1022 			m->m_flags &= ~M_PKTHDR;
1023 			m0->m_pkthdr.len += m->m_len;
1024 			mlast->m_next = m;
1025 			mlast = m;
1026 			bufs_left--;
1027 		}
1028 
1029 		if (bufs_left == 0) {
1030 			ml_enqueue(&ml, m0);
1031 			m0 = NULL;
1032 		}
1033 	}
1034 	if (m0 != NULL) {
1035 		DPRINTF("%s: expected %d buffers, got %d\n", __func__,
1036 		    (int)hdr->num_buffers,
1037 		    (int)hdr->num_buffers - bufs_left);
1038 		ifp->if_ierrors++;
1039 		m_freem(m0);
1040 	}
1041 
1042 	if (ifiq_input(&ifp->if_rcv, &ml))
1043 		if_rxr_livelocked(&sc->sc_rx_ring);
1044 
1045 	return r;
1046 }
1047 
1048 int
1049 vio_rx_intr(struct virtqueue *vq)
1050 {
1051 	struct virtio_softc *vsc = vq->vq_owner;
1052 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1053 	int r, sum = 0;
1054 
1055 again:
1056 	r = vio_rxeof(sc);
1057 	sum += r;
1058 	if (r) {
1059 		vio_populate_rx_mbufs(sc);
1060 		/* set used event index to the next slot */
1061 		if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) {
1062 			if (virtio_start_vq_intr(vq->vq_owner, vq))
1063 				goto again;
1064 		}
1065 	}
1066 
1067 	return sum;
1068 }
1069 
1070 void
1071 vio_rxtick(void *arg)
1072 {
1073 	struct virtqueue *vq = arg;
1074 	struct virtio_softc *vsc = vq->vq_owner;
1075 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1076 	int s;
1077 
1078 	s = splnet();
1079 	vio_populate_rx_mbufs(sc);
1080 	splx(s);
1081 }
1082 
1083 /* free all the mbufs; called from if_stop(disable) */
1084 void
1085 vio_rx_drain(struct vio_softc *sc)
1086 {
1087 	struct virtqueue *vq = &sc->sc_vq[VQRX];
1088 	int i;
1089 
1090 	for (i = 0; i < vq->vq_num; i++) {
1091 		if (sc->sc_rx_mbufs[i] == NULL)
1092 			continue;
1093 		vio_free_rx_mbuf(sc, i);
1094 	}
1095 }
1096 
1097 /*
1098  * Transmition implementation
1099  */
1100 /* actual transmission is done in if_start */
1101 /* tx interrupt; dequeue and free mbufs */
1102 /*
1103  * tx interrupt is actually disabled unless the tx queue is full, i.e.
1104  * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs
1105  * are dequeued and freed even if no further transfer happens.
1106  */
1107 int
1108 vio_tx_intr(struct virtqueue *vq)
1109 {
1110 	struct virtio_softc *vsc = vq->vq_owner;
1111 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1112 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1113 	int r;
1114 
1115 	r = vio_txeof(vq);
1116 	vio_start(ifp);
1117 	return r;
1118 }
1119 
1120 void
1121 vio_txtick(void *arg)
1122 {
1123 	struct virtqueue *vq = arg;
1124 	int s = splnet();
1125 	vio_tx_intr(vq);
1126 	splx(s);
1127 }
1128 
1129 int
1130 vio_txeof(struct virtqueue *vq)
1131 {
1132 	struct virtio_softc *vsc = vq->vq_owner;
1133 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1134 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1135 	struct mbuf *m;
1136 	int r = 0;
1137 	int slot, len;
1138 
1139 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
1140 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
1141 		r++;
1142 		VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
1143 		    BUS_DMASYNC_POSTWRITE);
1144 		bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
1145 		    sc->sc_tx_dmamaps[slot]->dm_mapsize,
1146 		    BUS_DMASYNC_POSTWRITE);
1147 		m = sc->sc_tx_mbufs[slot];
1148 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
1149 		sc->sc_tx_mbufs[slot] = 0;
1150 		virtio_dequeue_commit(vq, slot);
1151 		m_freem(m);
1152 	}
1153 
1154 	if (r) {
1155 		ifq_clr_oactive(&ifp->if_snd);
1156 		virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
1157 	}
1158 	if (vq->vq_used_idx == vq->vq_avail_idx)
1159 		timeout_del(&sc->sc_txtick);
1160 	else if (r)
1161 		timeout_add_sec(&sc->sc_txtick, 1);
1162 	return r;
1163 }
1164 
1165 int
1166 vio_encap(struct vio_softc *sc, int slot, struct mbuf *m)
1167 {
1168 	struct virtio_softc	*vsc = sc->sc_virtio;
1169 	bus_dmamap_t		 dmap= sc->sc_tx_dmamaps[slot];
1170 	int			 r;
1171 
1172 	r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1173 	    BUS_DMA_WRITE|BUS_DMA_NOWAIT);
1174 	switch (r) {
1175 	case 0:
1176 		break;
1177 	case EFBIG:
1178 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1179 		    bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
1180 		    BUS_DMA_WRITE|BUS_DMA_NOWAIT) == 0)
1181 			break;
1182 
1183 		/* FALLTHROUGH */
1184 	default:
1185 		return ENOBUFS;
1186 	}
1187 	sc->sc_tx_mbufs[slot] = m;
1188 	return 0;
1189 }
1190 
1191 /* free all the mbufs already put on vq; called from if_stop(disable) */
1192 void
1193 vio_tx_drain(struct vio_softc *sc)
1194 {
1195 	struct virtio_softc *vsc = sc->sc_virtio;
1196 	struct virtqueue *vq = &sc->sc_vq[VQTX];
1197 	int i;
1198 
1199 	for (i = 0; i < vq->vq_num; i++) {
1200 		if (sc->sc_tx_mbufs[i] == NULL)
1201 			continue;
1202 		bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
1203 		m_freem(sc->sc_tx_mbufs[i]);
1204 		sc->sc_tx_mbufs[i] = NULL;
1205 	}
1206 }
1207 
1208 /*
1209  * Control vq
1210  */
1211 /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
1212 int
1213 vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
1214 {
1215 	struct virtio_softc *vsc = sc->sc_virtio;
1216 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1217 	int r, slot;
1218 
1219 	splassert(IPL_NET);
1220 
1221 	if ((r = vio_wait_ctrl(sc)) != 0)
1222 		return r;
1223 
1224 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
1225 	sc->sc_ctrl_cmd->command = cmd;
1226 	sc->sc_ctrl_rx->onoff = onoff;
1227 
1228 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1229 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1230 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1231 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE);
1232 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1233 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1234 
1235 	r = virtio_enqueue_prep(vq, &slot);
1236 	if (r != 0)
1237 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1238 	r = virtio_enqueue_reserve(vq, slot, 3);
1239 	if (r != 0)
1240 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1241 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1242 	    sizeof(*sc->sc_ctrl_cmd), 1);
1243 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx,
1244 	    sizeof(*sc->sc_ctrl_rx), 1);
1245 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1246 	    sizeof(*sc->sc_ctrl_status), 0);
1247 	virtio_enqueue_commit(vsc, vq, slot, 1);
1248 
1249 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1250 		goto out;
1251 
1252 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1253 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1254 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
1255 	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
1256 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1257 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1258 
1259 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1260 		r = 0;
1261 	} else {
1262 		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
1263 		r = EIO;
1264 	}
1265 
1266 	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
1267 out:
1268 	vio_ctrl_wakeup(sc, FREE);
1269 	return r;
1270 }
1271 
1272 /*
1273  * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
1274  * NET_LOCK() deadlocks are possible.  So release it here.
1275  */
1276 static inline int
1277 vio_sleep(struct vio_softc *sc, const char *wmesg)
1278 {
1279 	int status = rw_status(&netlock);
1280 
1281 	if (status != RW_WRITE && status != RW_READ)
1282 		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
1283 		    INFSLP);
1284 
1285 	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
1286 	    INFSLP);
1287 }
1288 
1289 int
1290 vio_wait_ctrl(struct vio_softc *sc)
1291 {
1292 	int r = 0;
1293 
1294 	while (sc->sc_ctrl_inuse != FREE) {
1295 		r = vio_sleep(sc, "viowait");
1296 		if (r == EINTR)
1297 			return r;
1298 	}
1299 	sc->sc_ctrl_inuse = INUSE;
1300 
1301 	return r;
1302 }
1303 
1304 int
1305 vio_wait_ctrl_done(struct vio_softc *sc)
1306 {
1307 	int r = 0;
1308 
1309 	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
1310 		if (sc->sc_ctrl_inuse == RESET) {
1311 			r = 1;
1312 			break;
1313 		}
1314 		r = vio_sleep(sc, "viodone");
1315 		if (r == EINTR)
1316 			break;
1317 	}
1318 	return r;
1319 }
1320 
1321 void
1322 vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
1323 {
1324 	sc->sc_ctrl_inuse = new;
1325 	wakeup(&sc->sc_ctrl_inuse);
1326 }
1327 
1328 int
1329 vio_ctrleof(struct virtqueue *vq)
1330 {
1331 	struct virtio_softc *vsc = vq->vq_owner;
1332 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
1333 	int r = 0, ret, slot;
1334 
1335 again:
1336 	ret = virtio_dequeue(vsc, vq, &slot, NULL);
1337 	if (ret == ENOENT)
1338 		return r;
1339 	virtio_dequeue_commit(vq, slot);
1340 	r++;
1341 	vio_ctrl_wakeup(sc, DONE);
1342 	if (virtio_start_vq_intr(vsc, vq))
1343 		goto again;
1344 
1345 	return r;
1346 }
1347 
1348 /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
1349 int
1350 vio_set_rx_filter(struct vio_softc *sc)
1351 {
1352 	/* filter already set in sc_ctrl_mac_tbl */
1353 	struct virtio_softc *vsc = sc->sc_virtio;
1354 	struct virtqueue *vq = &sc->sc_vq[VQCTL];
1355 	int r, slot;
1356 
1357 	splassert(IPL_NET);
1358 
1359 	if ((r = vio_wait_ctrl(sc)) != 0)
1360 		return r;
1361 
1362 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
1363 	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1364 
1365 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1366 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
1367 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1368 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE);
1369 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1370 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
1371 
1372 	r = virtio_enqueue_prep(vq, &slot);
1373 	if (r != 0)
1374 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1375 	r = virtio_enqueue_reserve(vq, slot, 4);
1376 	if (r != 0)
1377 		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
1378 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
1379 	    sizeof(*sc->sc_ctrl_cmd), 1);
1380 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
1381 	    sizeof(*sc->sc_ctrl_mac_tbl_uc) +
1382 	    sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
1383 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
1384 	    sizeof(*sc->sc_ctrl_mac_tbl_mc) +
1385 	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
1386 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
1387 	    sizeof(*sc->sc_ctrl_status), 0);
1388 	virtio_enqueue_commit(vsc, vq, slot, 1);
1389 
1390 	if ((r = vio_wait_ctrl_done(sc)) != 0)
1391 		goto out;
1392 
1393 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
1394 	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
1395 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
1396 	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
1397 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
1398 	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
1399 
1400 	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
1401 		r = 0;
1402 	} else {
1403 		/* The host's filter table is not large enough */
1404 		printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
1405 		r = EIO;
1406 	}
1407 
1408 out:
1409 	vio_ctrl_wakeup(sc, FREE);
1410 	return r;
1411 }
1412 
1413 void
1414 vio_iff(struct vio_softc *sc)
1415 {
1416 	struct virtio_softc *vsc = sc->sc_virtio;
1417 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1418 	struct arpcom *ac = &sc->sc_ac;
1419 	struct ether_multi *enm;
1420 	struct ether_multistep step;
1421 	int nentries = 0;
1422 	int promisc = 0, allmulti = 0, rxfilter = 0;
1423 	int r;
1424 
1425 	splassert(IPL_NET);
1426 
1427 	ifp->if_flags &= ~IFF_ALLMULTI;
1428 
1429 	if (vsc->sc_nvqs < 3) {
1430 		/* no ctrl vq; always promisc */
1431 		ifp->if_flags |= IFF_ALLMULTI | IFF_PROMISC;
1432 		return;
1433 	}
1434 
1435 	if (sc->sc_dev.dv_cfdata->cf_flags & CONFFLAG_QEMU_VLAN_BUG)
1436 		ifp->if_flags |= IFF_PROMISC;
1437 
1438 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1439 	    ac->ac_multicnt >= VIRTIO_NET_CTRL_MAC_MC_ENTRIES) {
1440 		ifp->if_flags |= IFF_ALLMULTI;
1441 		if (ifp->if_flags & IFF_PROMISC)
1442 			promisc = 1;
1443 		else
1444 			allmulti = 1;
1445 	} else {
1446 		rxfilter = 1;
1447 
1448 		ETHER_FIRST_MULTI(step, ac, enm);
1449 		while (enm != NULL) {
1450 			memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries++],
1451 			    enm->enm_addrlo, ETHER_ADDR_LEN);
1452 
1453 			ETHER_NEXT_MULTI(step, enm);
1454 		}
1455 	}
1456 
1457 	/* set unicast address, VirtualBox wants that */
1458 	memcpy(sc->sc_ctrl_mac_tbl_uc->macs[0], ac->ac_enaddr, ETHER_ADDR_LEN);
1459 	sc->sc_ctrl_mac_tbl_uc->nentries = 1;
1460 
1461 	sc->sc_ctrl_mac_tbl_mc->nentries = rxfilter ? nentries : 0;
1462 
1463 	if (vsc->sc_nvqs < 3)
1464 		return;
1465 
1466 	r = vio_set_rx_filter(sc);
1467 	if (r == EIO)
1468 		allmulti = 1; /* fallback */
1469 	else if (r != 0)
1470 		return;
1471 
1472 	r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, allmulti);
1473 	if (r == EIO)
1474 		promisc = 1; /* fallback */
1475 	else if (r != 0)
1476 		return;
1477 
1478 	vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc);
1479 }
1480