xref: /openbsd/sys/dev/pv/virtio.c (revision 771fbea0)
1 /*	$OpenBSD: virtio.c,v 1.20 2021/05/16 15:10:20 deraadt Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/device.h>
34 #include <sys/mutex.h>
35 #include <sys/atomic.h>
36 #include <sys/malloc.h>
37 
38 #include <dev/pv/virtioreg.h>
39 #include <dev/pv/virtiovar.h>
40 
41 #if VIRTIO_DEBUG
42 #define VIRTIO_ASSERT(x)	KASSERT(x)
43 #else
44 #define VIRTIO_ASSERT(x)
45 #endif
46 
47 void		 virtio_init_vq(struct virtio_softc *,
48 				struct virtqueue *);
49 void		 vq_free_entry(struct virtqueue *, struct vq_entry *);
50 struct vq_entry	*vq_alloc_entry(struct virtqueue *);
51 
52 struct cfdriver virtio_cd = {
53 	NULL, "virtio", DV_DULL
54 };
55 
56 static const char * const virtio_device_name[] = {
57 	"Unknown (0)",		/* 0 */
58 	"Network",		/* 1 */
59 	"Block",		/* 2 */
60 	"Console",		/* 3 */
61 	"Entropy",		/* 4 */
62 	"Memory Balloon",	/* 5 */
63 	"IO Memory",		/* 6 */
64 	"Rpmsg",		/* 7 */
65 	"SCSI host",		/* 8 */
66 	"9P Transport",		/* 9 */
67 	"mac80211 wlan"		/* 10 */
68 };
69 #define NDEVNAMES	(sizeof(virtio_device_name)/sizeof(char*))
70 
71 const char *
72 virtio_device_string(int id)
73 {
74 	return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
75 }
76 
77 #if VIRTIO_DEBUG
78 static const struct virtio_feature_name transport_feature_names[] = {
79 	{ VIRTIO_F_NOTIFY_ON_EMPTY,	"NotifyOnEmpty"},
80 	{ VIRTIO_F_RING_INDIRECT_DESC,	"RingIndirectDesc"},
81 	{ VIRTIO_F_RING_EVENT_IDX,	"RingEventIdx"},
82 	{ VIRTIO_F_BAD_FEATURE,		"BadFeature"},
83 	{ VIRTIO_F_VERSION_1,		"Version1"},
84 	{ 0,				NULL}
85 };
86 
87 void
88 virtio_log_features(uint64_t host, uint64_t neg,
89     const struct virtio_feature_name *guest_feature_names)
90 {
91 	const struct virtio_feature_name *namep;
92 	int i;
93 	char c;
94 	uint32_t bit;
95 
96 	for (i = 0; i < 64; i++) {
97 		if (i == 30) {
98 			/*
99 			 * VIRTIO_F_BAD_FEATURE is only used for
100 			 * checking correct negotiation
101 			 */
102 			continue;
103 		}
104 		bit = 1 << i;
105 		if ((host&bit) == 0)
106 			continue;
107 		namep = (i < 24 || i > 37) ? guest_feature_names :
108 		    transport_feature_names;
109 		while (namep->bit && namep->bit != bit)
110 			namep++;
111 		c = (neg&bit) ? '+' : '-';
112 		if (namep->name)
113 			printf(" %c%s", c, namep->name);
114 		else
115 			printf(" %cUnknown(%d)", c, i);
116 	}
117 }
118 #endif
119 
120 /*
121  * Reset the device.
122  */
123 /*
124  * To reset the device to a known state, do following:
125  *	virtio_reset(sc);	     // this will stop the device activity
126  *	<dequeue finished requests>; // virtio_dequeue() still can be called
127  *	<revoke pending requests in the vqs if any>;
128  *	virtio_reinit_start(sc);     // dequeue prohibitted
129  *	<some other initialization>;
130  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
131  * Once attached, features are assumed to not change again.
132  */
133 void
134 virtio_reset(struct virtio_softc *sc)
135 {
136 	virtio_device_reset(sc);
137 	sc->sc_active_features = 0;
138 }
139 
140 void
141 virtio_reinit_start(struct virtio_softc *sc)
142 {
143 	int i;
144 
145 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
146 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
147 	virtio_negotiate_features(sc, NULL);
148 	for (i = 0; i < sc->sc_nvqs; i++) {
149 		int n;
150 		struct virtqueue *vq = &sc->sc_vqs[i];
151 		n = virtio_read_queue_size(sc, vq->vq_index);
152 		if (n == 0)	/* vq disappeared */
153 			continue;
154 		if (n != vq->vq_num) {
155 			panic("%s: virtqueue size changed, vq index %d",
156 			    sc->sc_dev.dv_xname, vq->vq_index);
157 		}
158 		virtio_init_vq(sc, vq);
159 		virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
160 	}
161 }
162 
163 void
164 virtio_reinit_end(struct virtio_softc *sc)
165 {
166 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
167 }
168 
169 /*
170  * dmamap sync operations for a virtqueue.
171  */
172 static inline void
173 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
174 {
175 	/* availoffset == sizeof(vring_desc)*vq_num */
176 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
177 	    ops);
178 }
179 
180 static inline void
181 vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
182 {
183 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_availoffset,
184 	    offsetof(struct vring_avail, ring) + vq->vq_num * sizeof(uint16_t),
185 	    ops);
186 }
187 
188 static inline void
189 vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
190 {
191 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_usedoffset,
192 	    offsetof(struct vring_used, ring) + vq->vq_num *
193 	    sizeof(struct vring_used_elem), ops);
194 }
195 
196 static inline void
197 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
198     int ops)
199 {
200 	int offset = vq->vq_indirectoffset +
201 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
202 
203 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, offset,
204 	    sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
205 }
206 
207 /*
208  * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
209  * and calls (*vq_done)() if some entries are consumed.
210  * For use in transport specific irq handlers.
211  */
212 int
213 virtio_check_vqs(struct virtio_softc *sc)
214 {
215 	struct virtqueue *vq;
216 	int i, r = 0;
217 
218 	/* going backwards is better for if_vio */
219 	for (i = sc->sc_nvqs - 1; i >= 0; i--) {
220 		vq = &sc->sc_vqs[i];
221 		if (vq->vq_queued) {
222 			vq->vq_queued = 0;
223 			vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
224 		}
225 		vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
226 		if (vq->vq_used_idx != vq->vq_used->idx) {
227 			if (vq->vq_done)
228 				r |= (vq->vq_done)(vq);
229 		}
230 	}
231 
232 	return r;
233 }
234 
235 /*
236  * Initialize vq structure.
237  */
238 void
239 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
240 {
241 	int i, j;
242 	int vq_size = vq->vq_num;
243 
244 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
245 
246 	/* build the indirect descriptor chain */
247 	if (vq->vq_indirect != NULL) {
248 		struct vring_desc *vd;
249 
250 		for (i = 0; i < vq_size; i++) {
251 			vd = vq->vq_indirect;
252 			vd += vq->vq_maxnsegs * i;
253 			for (j = 0; j < vq->vq_maxnsegs-1; j++)
254 				vd[j].next = j + 1;
255 		}
256 	}
257 
258 	/* free slot management */
259 	SLIST_INIT(&vq->vq_freelist);
260 	/*
261 	 * virtio_enqueue_trim needs monotonely raising entries, therefore
262 	 * initialize in reverse order
263 	 */
264 	for (i = vq_size - 1; i >= 0; i--) {
265 		SLIST_INSERT_HEAD(&vq->vq_freelist, &vq->vq_entries[i],
266 		    qe_list);
267 		vq->vq_entries[i].qe_index = i;
268 	}
269 
270 	/* enqueue/dequeue status */
271 	vq->vq_avail_idx = 0;
272 	vq->vq_used_idx = 0;
273 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
274 	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
275 	vq->vq_queued = 1;
276 }
277 
278 /*
279  * Allocate/free a vq.
280  *
281  * maxnsegs denotes how much space should be allocated for indirect
282  * descriptors. maxnsegs == 1 can be used to disable use indirect
283  * descriptors for this queue.
284  */
285 int
286 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
287     int maxsegsize, int maxnsegs, const char *name)
288 {
289 	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
290 	int rsegs, r, hdrlen;
291 #define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
292 				 ~(VIRTIO_PAGE_SIZE-1))
293 
294 	memset(vq, 0, sizeof(*vq));
295 
296 	vq_size = virtio_read_queue_size(sc, index);
297 	if (vq_size == 0) {
298 		printf("virtqueue not exist, index %d for %s\n", index, name);
299 		goto err;
300 	}
301 	if (((vq_size - 1) & vq_size) != 0)
302 		panic("vq_size not power of two: %d", vq_size);
303 
304 	hdrlen = virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;
305 
306 	/* allocsize1: descriptor table + avail ring + pad */
307 	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
308 	    + sizeof(uint16_t) * (hdrlen + vq_size));
309 	/* allocsize2: used ring + pad */
310 	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
311 	    + sizeof(struct vring_used_elem) * vq_size);
312 	/* allocsize3: indirect table */
313 	if (sc->sc_indirect && maxnsegs > 1)
314 		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
315 	else
316 		allocsize3 = 0;
317 	allocsize = allocsize1 + allocsize2 + allocsize3;
318 
319 	/* alloc and map the memory */
320 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
321 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
322 	if (r != 0) {
323 		printf("virtqueue %d for %s allocation failed, error %d\n",
324 		       index, name, r);
325 		goto err;
326 	}
327 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
328 	    (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
329 	if (r != 0) {
330 		printf("virtqueue %d for %s map failed, error %d\n", index,
331 		    name, r);
332 		goto err;
333 	}
334 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
335 	    BUS_DMA_NOWAIT, &vq->vq_dmamap);
336 	if (r != 0) {
337 		printf("virtqueue %d for %s dmamap creation failed, "
338 		    "error %d\n", index, name, r);
339 		goto err;
340 	}
341 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap, vq->vq_vaddr,
342 	    allocsize, NULL, BUS_DMA_NOWAIT);
343 	if (r != 0) {
344 		printf("virtqueue %d for %s dmamap load failed, error %d\n",
345 		    index, name, r);
346 		goto err;
347 	}
348 
349 	/* remember addresses and offsets for later use */
350 	vq->vq_owner = sc;
351 	vq->vq_num = vq_size;
352 	vq->vq_mask = vq_size - 1;
353 	vq->vq_index = index;
354 	vq->vq_desc = vq->vq_vaddr;
355 	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
356 	vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
357 	    vq->vq_availoffset);
358 	vq->vq_usedoffset = allocsize1;
359 	vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
360 	    vq->vq_usedoffset);
361 	if (allocsize3 > 0) {
362 		vq->vq_indirectoffset = allocsize1 + allocsize2;
363 		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
364 		    + vq->vq_indirectoffset);
365 	}
366 	vq->vq_bytesize = allocsize;
367 	vq->vq_maxnsegs = maxnsegs;
368 
369 	/* free slot management */
370 	vq->vq_entries = mallocarray(vq_size, sizeof(struct vq_entry),
371 	    M_DEVBUF, M_NOWAIT | M_ZERO);
372 	if (vq->vq_entries == NULL) {
373 		r = ENOMEM;
374 		goto err;
375 	}
376 
377 	virtio_init_vq(sc, vq);
378 	virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
379 
380 #if VIRTIO_DEBUG
381 	printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
382 	    allocsize, index, name, vq_size);
383 	if (allocsize3 > 0)
384 		printf("using %d byte (%d entries) indirect descriptors\n",
385 		    allocsize3, maxnsegs * vq_size);
386 #endif
387 	return 0;
388 
389 err:
390 	if (vq->vq_dmamap)
391 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
392 	if (vq->vq_vaddr)
393 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
394 	if (vq->vq_segs[0].ds_addr)
395 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
396 	memset(vq, 0, sizeof(*vq));
397 
398 	return -1;
399 }
400 
401 int
402 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
403 {
404 	struct vq_entry *qe;
405 	int i = 0;
406 
407 	/* device must be already deactivated */
408 	/* confirm the vq is empty */
409 	SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) {
410 		i++;
411 	}
412 	if (i != vq->vq_num) {
413 		printf("%s: freeing non-empty vq, index %d\n",
414 		    sc->sc_dev.dv_xname, vq->vq_index);
415 		return EBUSY;
416 	}
417 
418 	/* tell device that there's no virtqueue any longer */
419 	virtio_setup_queue(sc, vq, 0);
420 
421 	free(vq->vq_entries, M_DEVBUF, 0);
422 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
423 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
424 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
425 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
426 	memset(vq, 0, sizeof(*vq));
427 
428 	return 0;
429 }
430 
431 /*
432  * Free descriptor management.
433  */
434 struct vq_entry *
435 vq_alloc_entry(struct virtqueue *vq)
436 {
437 	struct vq_entry *qe;
438 
439 	if (SLIST_EMPTY(&vq->vq_freelist))
440 		return NULL;
441 	qe = SLIST_FIRST(&vq->vq_freelist);
442 	SLIST_REMOVE_HEAD(&vq->vq_freelist, qe_list);
443 
444 	return qe;
445 }
446 
447 void
448 vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
449 {
450 	SLIST_INSERT_HEAD(&vq->vq_freelist, qe, qe_list);
451 }
452 
453 /*
454  * Enqueue several dmamaps as a single request.
455  */
456 /*
457  * Typical usage:
458  *  <queue size> number of followings are stored in arrays
459  *  - command blocks (in dmamem) should be pre-allocated and mapped
460  *  - dmamaps for command blocks should be pre-allocated and loaded
461  *  - dmamaps for payload should be pre-allocated
462  *	r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
463  *	if (r)		// currently 0 or EAGAIN
464  *	  return r;
465  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
466  *	if (r) {
467  *	  virtio_enqueue_abort(sc, vq, slot);
468  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
469  *	  return r;
470  *	}
471  *	r = virtio_enqueue_reserve(sc, vq, slot,
472  *				   dmamap_payload[slot]->dm_nsegs+1);
473  *							// ^ +1 for command
474  *	if (r) {	// currently 0 or EAGAIN
475  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
476  *	  return r;					// do not call abort()
477  *	}
478  *	<setup and prepare commands>
479  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
480  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
481  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
482  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
483  *	virtio_enqueue_commit(sc, vq, slot, 1);
484  *
485  * Alternative usage with statically allocated slots:
486  *	<during initialization>
487  *	// while not out of slots, do
488  *	virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
489  *	virtio_enqueue_reserve(sc, vq, slot, max_segs);	// reserve all slots
490  *						that may ever be needed
491  *
492  *	<when enqueing a request>
493  *	// Don't call virtio_enqueue_prep()
494  *	bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
495  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
496  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
497  *	virtio_enqueue_trim(sc, vq, slot, num_segs_needed);
498  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
499  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
500  *	virtio_enqueue_commit(sc, vq, slot, 1);
501  *
502  *	<when dequeuing>
503  *	// don't call virtio_dequeue_commit()
504  */
505 
506 /*
507  * enqueue_prep: allocate a slot number
508  */
509 int
510 virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
511 {
512 	struct vq_entry *qe1;
513 
514 	VIRTIO_ASSERT(slotp != NULL);
515 
516 	qe1 = vq_alloc_entry(vq);
517 	if (qe1 == NULL)
518 		return EAGAIN;
519 	/* next slot is not allocated yet */
520 	qe1->qe_next = -1;
521 	*slotp = qe1->qe_index;
522 
523 	return 0;
524 }
525 
526 /*
527  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
528  * Calls virtio_enqueue_abort() on failure.
529  */
530 int
531 virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
532 {
533 	struct vq_entry *qe1 = &vq->vq_entries[slot];
534 
535 	VIRTIO_ASSERT(qe1->qe_next == -1);
536 	VIRTIO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);
537 
538 	if (vq->vq_indirect != NULL && nsegs > 1 && nsegs <= vq->vq_maxnsegs) {
539 		struct vring_desc *vd;
540 		int i;
541 
542 		qe1->qe_indirect = 1;
543 
544 		vd = &vq->vq_desc[qe1->qe_index];
545 		vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr +
546 		    vq->vq_indirectoffset;
547 		vd->addr += sizeof(struct vring_desc) * vq->vq_maxnsegs *
548 		    qe1->qe_index;
549 		vd->len = sizeof(struct vring_desc) * nsegs;
550 		vd->flags = VRING_DESC_F_INDIRECT;
551 
552 		vd = vq->vq_indirect;
553 		vd += vq->vq_maxnsegs * qe1->qe_index;
554 		qe1->qe_desc_base = vd;
555 
556 		for (i = 0; i < nsegs-1; i++)
557 			vd[i].flags = VRING_DESC_F_NEXT;
558 		vd[i].flags = 0;
559 		qe1->qe_next = 0;
560 
561 		return 0;
562 	} else {
563 		struct vring_desc *vd;
564 		struct vq_entry *qe;
565 		int i, s;
566 
567 		qe1->qe_indirect = 0;
568 
569 		vd = &vq->vq_desc[0];
570 		qe1->qe_desc_base = vd;
571 		qe1->qe_next = qe1->qe_index;
572 		s = slot;
573 		for (i = 0; i < nsegs - 1; i++) {
574 			qe = vq_alloc_entry(vq);
575 			if (qe == NULL) {
576 				vd[s].flags = 0;
577 				virtio_enqueue_abort(vq, slot);
578 				return EAGAIN;
579 			}
580 			vd[s].flags = VRING_DESC_F_NEXT;
581 			vd[s].next = qe->qe_index;
582 			s = qe->qe_index;
583 		}
584 		vd[s].flags = 0;
585 
586 		return 0;
587 	}
588 }
589 
590 /*
591  * enqueue: enqueue a single dmamap.
592  */
593 int
594 virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
595 {
596 	struct vq_entry *qe1 = &vq->vq_entries[slot];
597 	struct vring_desc *vd = qe1->qe_desc_base;
598 	int i;
599 	int s = qe1->qe_next;
600 
601 	VIRTIO_ASSERT(s >= 0);
602 	VIRTIO_ASSERT(dmamap->dm_nsegs > 0);
603 	if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
604 #if VIRTIO_DEBUG
605 		for (i = 0; i < dmamap->dm_nsegs; i++) {
606 			printf(" %d (%d): %p %lx \n", i, write,
607 			    (void *)dmamap->dm_segs[i].ds_addr,
608 			    dmamap->dm_segs[i].ds_len);
609 		}
610 #endif
611 		panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d",
612 		    dmamap->dm_nsegs, vq->vq_maxnsegs);
613 	}
614 
615 	for (i = 0; i < dmamap->dm_nsegs; i++) {
616 		vd[s].addr = dmamap->dm_segs[i].ds_addr;
617 		vd[s].len = dmamap->dm_segs[i].ds_len;
618 		if (!write)
619 			vd[s].flags |= VRING_DESC_F_WRITE;
620 		s = vd[s].next;
621 	}
622 	qe1->qe_next = s;
623 
624 	return 0;
625 }
626 
627 int
628 virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
629     bus_addr_t start, bus_size_t len, int write)
630 {
631 	struct vq_entry *qe1 = &vq->vq_entries[slot];
632 	struct vring_desc *vd = qe1->qe_desc_base;
633 	int s = qe1->qe_next;
634 
635 	VIRTIO_ASSERT(s >= 0);
636 	/* XXX todo: handle more segments */
637 	VIRTIO_ASSERT(dmamap->dm_nsegs == 1);
638 	VIRTIO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
639 	    (dmamap->dm_segs[0].ds_len >= start + len));
640 
641 	vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
642 	vd[s].len = len;
643 	if (!write)
644 		vd[s].flags |= VRING_DESC_F_WRITE;
645 	qe1->qe_next = vd[s].next;
646 
647 	return 0;
648 }
649 
650 static void
651 publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
652 {
653 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
654 
655 	virtio_membar_producer();
656 	vq->vq_avail->idx = vq->vq_avail_idx;
657 	vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
658 	vq->vq_queued = 1;
659 }
660 
661 /*
662  * enqueue_commit: add it to the aring.
663  */
664 void
665 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
666     int notifynow)
667 {
668 	struct vq_entry *qe1;
669 
670 	if (slot < 0)
671 		goto notify;
672 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
673 	qe1 = &vq->vq_entries[slot];
674 	if (qe1->qe_indirect)
675 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
676 	vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;
677 
678 notify:
679 	if (notifynow) {
680 		if (virtio_has_feature(vq->vq_owner, VIRTIO_F_RING_EVENT_IDX)) {
681 			uint16_t o = vq->vq_avail->idx;
682 			uint16_t n = vq->vq_avail_idx;
683 			uint16_t t;
684 			publish_avail_idx(sc, vq);
685 
686 			virtio_membar_sync();
687 			t = VQ_AVAIL_EVENT(vq) + 1;
688 			if ((uint16_t)(n - t) < (uint16_t)(n - o))
689 				sc->sc_ops->kick(sc, vq->vq_index);
690 		} else {
691 			publish_avail_idx(sc, vq);
692 
693 			virtio_membar_sync();
694 			if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
695 				sc->sc_ops->kick(sc, vq->vq_index);
696 		}
697 	}
698 }
699 
700 /*
701  * enqueue_abort: rollback.
702  */
703 int
704 virtio_enqueue_abort(struct virtqueue *vq, int slot)
705 {
706 	struct vq_entry *qe = &vq->vq_entries[slot];
707 	struct vring_desc *vd;
708 	int s;
709 
710 	if (qe->qe_next < 0) {
711 		vq_free_entry(vq, qe);
712 		return 0;
713 	}
714 
715 	s = slot;
716 	vd = &vq->vq_desc[0];
717 	while (vd[s].flags & VRING_DESC_F_NEXT) {
718 		s = vd[s].next;
719 		vq_free_entry(vq, qe);
720 		qe = &vq->vq_entries[s];
721 	}
722 	vq_free_entry(vq, qe);
723 	return 0;
724 }
725 
726 /*
727  * enqueue_trim: adjust buffer size to given # of segments, a.k.a.
728  * descriptors.
729  */
730 void
731 virtio_enqueue_trim(struct virtqueue *vq, int slot, int nsegs)
732 {
733 	struct vq_entry *qe1 = &vq->vq_entries[slot];
734 	struct vring_desc *vd = &vq->vq_desc[0];
735 	int i;
736 
737 	if ((vd[slot].flags & VRING_DESC_F_INDIRECT) == 0) {
738 		qe1->qe_next = qe1->qe_index;
739 		/*
740 		 * N.B.: the vq_entries are ASSUMED to be a contiguous
741 		 *       block with slot being the index to the first one.
742 		 */
743 	} else {
744 		qe1->qe_next = 0;
745 		vd = &vq->vq_desc[qe1->qe_index];
746 		vd->len = sizeof(struct vring_desc) * nsegs;
747 		vd = qe1->qe_desc_base;
748 		slot = 0;
749 	}
750 
751 	for (i = 0; i < nsegs -1 ; i++) {
752 		vd[slot].flags = VRING_DESC_F_NEXT;
753 		slot++;
754 	}
755 	vd[slot].flags = 0;
756 }
757 
758 /*
759  * Dequeue a request.
760  */
761 /*
762  * dequeue: dequeue a request from uring; dmamap_sync for uring is
763  *	    already done in the interrupt handler.
764  */
765 int
766 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
767     int *slotp, int *lenp)
768 {
769 	uint16_t slot, usedidx;
770 	struct vq_entry *qe;
771 
772 	if (vq->vq_used_idx == vq->vq_used->idx)
773 		return ENOENT;
774 	usedidx = vq->vq_used_idx++;
775 	usedidx &= vq->vq_mask;
776 
777 	virtio_membar_consumer();
778 	slot = vq->vq_used->ring[usedidx].id;
779 	qe = &vq->vq_entries[slot];
780 
781 	if (qe->qe_indirect)
782 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
783 
784 	if (slotp)
785 		*slotp = slot;
786 	if (lenp)
787 		*lenp = vq->vq_used->ring[usedidx].len;
788 
789 	return 0;
790 }
791 
792 /*
793  * dequeue_commit: complete dequeue; the slot is recycled for future use.
794  *                 if you forget to call this the slot will be leaked.
795  *
796  *                 Don't call this if you use statically allocated slots
797  *                 and virtio_dequeue_trim().
798  */
799 int
800 virtio_dequeue_commit(struct virtqueue *vq, int slot)
801 {
802 	struct vq_entry *qe = &vq->vq_entries[slot];
803 	struct vring_desc *vd = &vq->vq_desc[0];
804 	int s = slot;
805 
806 	while (vd[s].flags & VRING_DESC_F_NEXT) {
807 		s = vd[s].next;
808 		vq_free_entry(vq, qe);
809 		qe = &vq->vq_entries[s];
810 	}
811 	vq_free_entry(vq, qe);
812 
813 	return 0;
814 }
815 
816 /*
817  * Increase the event index in order to delay interrupts.
818  * Returns 0 on success; returns 1 if the used ring has already advanced
819  * too far, and the caller must process the queue again (otherewise, no
820  * more interrupts will happen).
821  */
822 int
823 virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
824 {
825 	uint16_t	idx;
826 
827 	idx = vq->vq_used_idx + nslots;
828 
829 	/* set the new event index: avail_ring->used_event = idx */
830 	VQ_USED_EVENT(vq) = idx;
831 	virtio_membar_sync();
832 
833 	vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
834 	vq->vq_queued++;
835 
836 	if (nslots < virtio_nused(vq))
837 		return 1;
838 
839 	return 0;
840 }
841 
842 /*
843  * Postpone interrupt until 3/4 of the available descriptors have been
844  * consumed.
845  */
846 int
847 virtio_postpone_intr_smart(struct virtqueue *vq)
848 {
849 	uint16_t	nslots;
850 
851 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;
852 
853 	return virtio_postpone_intr(vq, nslots);
854 }
855 
856 /*
857  * Postpone interrupt until all of the available descriptors have been
858  * consumed.
859  */
860 int
861 virtio_postpone_intr_far(struct virtqueue *vq)
862 {
863 	uint16_t	nslots;
864 
865 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);
866 
867 	return virtio_postpone_intr(vq, nslots);
868 }
869 
870 
871 /*
872  * Start/stop vq interrupt.  No guarantee.
873  */
874 void
875 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
876 {
877 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX)) {
878 		/*
879 		 * No way to disable the interrupt completely with
880 		 * RingEventIdx. Instead advance used_event by half
881 		 * the possible value. This won't happen soon and
882 		 * is far enough in the past to not trigger a spurios
883 		 * interrupt.
884 		 */
885 		VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
886 	} else {
887 		vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
888 	}
889 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
890 	vq->vq_queued++;
891 }
892 
893 int
894 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
895 {
896 	/*
897 	 * If event index feature is negotiated, enabling
898 	 * interrupts is done through setting the latest
899 	 * consumed index in the used_event field
900 	 */
901 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX))
902 		VQ_USED_EVENT(vq) = vq->vq_used_idx;
903 	else
904 		vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
905 
906 	virtio_membar_sync();
907 
908 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
909 	vq->vq_queued++;
910 
911 	if (vq->vq_used_idx != vq->vq_used->idx)
912 		return 1;
913 
914 	return 0;
915 }
916 
917 /*
918  * Returns a number of slots in the used ring available to
919  * be supplied to the avail ring.
920  */
921 int
922 virtio_nused(struct virtqueue *vq)
923 {
924 	uint16_t	n;
925 
926 	n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
927 	VIRTIO_ASSERT(n <= vq->vq_num);
928 
929 	return n;
930 }
931 
932 #if VIRTIO_DEBUG
933 void
934 virtio_vq_dump(struct virtqueue *vq)
935 {
936 	/* Common fields */
937 	printf(" + vq num: %d\n", vq->vq_num);
938 	printf(" + vq mask: 0x%X\n", vq->vq_mask);
939 	printf(" + vq index: %d\n", vq->vq_index);
940 	printf(" + vq used idx: %d\n", vq->vq_used_idx);
941 	printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
942 	printf(" + vq queued: %d\n",vq->vq_queued);
943 	/* Avail ring fields */
944 	printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
945 	printf(" + avail idx: %d\n", vq->vq_avail->idx);
946 	printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
947 	/* Used ring fields */
948 	printf(" + used flags: 0x%X\n",vq->vq_used->flags);
949 	printf(" + used idx: %d\n",vq->vq_used->idx);
950 	printf(" + used event: %d\n", VQ_USED_EVENT(vq));
951 	printf(" +++++++++++++++++++++++++++\n");
952 }
953 #endif
954