xref: /openbsd/sys/dev/pv/virtio.c (revision 9c7bd2b0)
1 /*	$OpenBSD: virtio.c,v 1.37 2025/01/09 10:55:22 sf Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/device.h>
33 #include <sys/atomic.h>
34 #include <sys/malloc.h>
35 
36 #include <dev/pv/virtioreg.h>
37 #include <dev/pv/virtiovar.h>
38 
39 #if VIRTIO_DEBUG
40 #define VIRTIO_ASSERT(x)	KASSERT(x)
41 #else
42 #define VIRTIO_ASSERT(x)
43 #endif
44 
45 void		 virtio_init_vq(struct virtio_softc *,
46 				struct virtqueue *);
47 void		 vq_free_entry(struct virtqueue *, struct vq_entry *);
48 struct vq_entry	*vq_alloc_entry(struct virtqueue *);
49 
50 struct cfdriver virtio_cd = {
51 	NULL, "virtio", DV_DULL
52 };
53 
54 static const char * const virtio_device_name[] = {
55 	"Unknown (0)",		/* 0 */
56 	"Network",		/* 1 */
57 	"Block",		/* 2 */
58 	"Console",		/* 3 */
59 	"Entropy",		/* 4 */
60 	"Memory Balloon",	/* 5 */
61 	"IO Memory",		/* 6 */
62 	"Rpmsg",		/* 7 */
63 	"SCSI host",		/* 8 */
64 	"9P Transport",		/* 9 */
65 	"mac80211 wlan",	/* 10 */
66 	NULL,			/* 11 */
67 	NULL,			/* 12 */
68 	NULL,			/* 13 */
69 	NULL,			/* 14 */
70 	NULL,			/* 15 */
71 	"GPU",			/* 16 */
72 };
73 #define NDEVNAMES	(sizeof(virtio_device_name)/sizeof(char*))
74 
75 const char *
76 virtio_device_string(int id)
77 {
78 	return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
79 }
80 
81 #if VIRTIO_DEBUG
82 static const struct virtio_feature_name transport_feature_names[] = {
83 	{ VIRTIO_F_NOTIFY_ON_EMPTY,	"NotifyOnEmpty"},
84 	{ VIRTIO_F_ANY_LAYOUT,		"AnyLayout"},
85 	{ VIRTIO_F_RING_INDIRECT_DESC,	"RingIndirectDesc"},
86 	{ VIRTIO_F_RING_EVENT_IDX,	"RingEventIdx"},
87 	{ VIRTIO_F_BAD_FEATURE,		"BadFeature"},
88 	{ VIRTIO_F_VERSION_1,		"Version1"},
89 	{ VIRTIO_F_ACCESS_PLATFORM,	"AccessPlatf"},
90 	{ VIRTIO_F_RING_PACKED,		"RingPacked"},
91 	{ VIRTIO_F_IN_ORDER,		"InOrder"},
92 	{ VIRTIO_F_ORDER_PLATFORM,	"OrderPlatf"},
93 	{ VIRTIO_F_SR_IOV,		"SrIov"},
94 	{ VIRTIO_F_NOTIFICATION_DATA,	"NotifData"},
95 	{ VIRTIO_F_NOTIF_CONFIG_DATA,	"NotifConfData"},
96 	{ VIRTIO_F_RING_RESET,		"RingReset"},
97 	{ 0,				NULL}
98 };
99 
100 void
101 virtio_log_features(uint64_t host, uint64_t neg,
102     const struct virtio_feature_name *guest_feature_names)
103 {
104 	const struct virtio_feature_name *namep;
105 	int i;
106 	char c;
107 	uint64_t bit;
108 
109 	for (i = 0; i < 64; i++) {
110 		if (i == 30) {
111 			/*
112 			 * VIRTIO_F_BAD_FEATURE is only used for
113 			 * checking correct negotiation
114 			 */
115 			continue;
116 		}
117 		bit = 1ULL << i;
118 		if ((host&bit) == 0)
119 			continue;
120 		namep = guest_feature_names;
121 		while (namep->bit && namep->bit != bit)
122 			namep++;
123 		if (namep->name == NULL) {
124 			namep = transport_feature_names;
125 			while (namep->bit && namep->bit != bit)
126 				namep++;
127 		}
128 		c = (neg&bit) ? '+' : '-';
129 		if (namep->name)
130 			printf(" %c%s", c, namep->name);
131 		else
132 			printf(" %cUnknown(%d)", c, i);
133 	}
134 }
135 #endif
136 
137 /*
138  * Reset the device.
139  */
140 /*
141  * To reset the device to a known state, do following:
142  *	virtio_reset(sc);	     // this will stop the device activity
143  *	<dequeue finished requests>; // virtio_dequeue() still can be called
144  *	<revoke pending requests in the vqs if any>;
145  *	virtio_reinit_start(sc);     // dequeue prohibited
146  *	<some other initialization>;
147  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
148  * Once attached, features are assumed to not change again.
149  */
150 void
151 virtio_reset(struct virtio_softc *sc)
152 {
153 	virtio_device_reset(sc);
154 	sc->sc_active_features = 0;
155 }
156 
157 int
158 virtio_attach_finish(struct virtio_softc *sc, struct virtio_attach_args *va)
159 {
160 	int i, ret;
161 
162 	ret = sc->sc_ops->attach_finish(sc, va);
163 	if (ret != 0)
164 		return ret;
165 
166 	sc->sc_ops->setup_intrs(sc);
167 	for (i = 0; i < sc->sc_nvqs; i++) {
168 		struct virtqueue *vq = &sc->sc_vqs[i];
169 
170 		if (vq->vq_num == 0)
171 			continue;
172 		virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
173 	}
174 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
175 	return 0;
176 }
177 
178 void
179 virtio_reinit_start(struct virtio_softc *sc)
180 {
181 	int i;
182 
183 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
184 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
185 	virtio_negotiate_features(sc, NULL);
186 	sc->sc_ops->setup_intrs(sc);
187 	for (i = 0; i < sc->sc_nvqs; i++) {
188 		int n;
189 		struct virtqueue *vq = &sc->sc_vqs[i];
190 		if (vq->vq_num == 0)	/* not used */
191 			continue;
192 		n = virtio_read_queue_size(sc, vq->vq_index);
193 		if (n != vq->vq_num) {
194 			panic("%s: virtqueue size changed, vq index %d",
195 			    sc->sc_dev.dv_xname, vq->vq_index);
196 		}
197 		virtio_init_vq(sc, vq);
198 		virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
199 	}
200 }
201 
202 void
203 virtio_reinit_end(struct virtio_softc *sc)
204 {
205 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
206 }
207 
208 /*
209  * dmamap sync operations for a virtqueue.
210  *
211  * XXX These should be more fine grained. Syncing the whole ring if we
212  * XXX only need a few bytes is inefficient if we use bounce buffers.
213  */
214 static inline void
215 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
216 {
217 	/* availoffset == sizeof(vring_desc)*vq_num */
218 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
219 	    ops);
220 }
221 
222 static inline void
223 vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
224 {
225 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_availoffset,
226 	    offsetof(struct vring_avail, ring) + vq->vq_num * sizeof(uint16_t),
227 	    ops);
228 }
229 
230 static inline void
231 vq_sync_aring_used_event(struct virtio_softc *sc, struct virtqueue *vq, int ops)
232 {
233 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_availoffset +
234 	    offsetof(struct vring_avail, ring) + vq->vq_num * sizeof(uint16_t),
235 	    sizeof(uint16_t), ops);
236 }
237 
238 
239 static inline void
240 vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
241 {
242 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_usedoffset,
243 	    offsetof(struct vring_used, ring) + vq->vq_num *
244 	    sizeof(struct vring_used_elem), ops);
245 }
246 
247 static inline void
248 vq_sync_uring_avail_event(struct virtio_softc *sc, struct virtqueue *vq, int ops)
249 {
250 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
251 	    vq->vq_usedoffset + offsetof(struct vring_used, ring) +
252 	    vq->vq_num * sizeof(struct vring_used_elem), sizeof(uint16_t),
253 	    ops);
254 }
255 
256 
257 static inline void
258 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
259     int ops)
260 {
261 	int offset = vq->vq_indirectoffset +
262 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
263 
264 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, offset,
265 	    sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
266 }
267 
268 /*
269  * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
270  * and calls (*vq_done)() if some entries are consumed.
271  * For use in transport specific irq handlers.
272  */
273 int
274 virtio_check_vqs(struct virtio_softc *sc)
275 {
276 	int i, r = 0;
277 
278 	/* going backwards is better for if_vio */
279 	for (i = sc->sc_nvqs - 1; i >= 0; i--) {
280 		if (sc->sc_vqs[i].vq_num == 0)	/* not used */
281 			continue;
282 		r |= virtio_check_vq(sc, &sc->sc_vqs[i]);
283 	}
284 
285 	return r;
286 }
287 
288 int
289 virtio_check_vq(struct virtio_softc *sc, struct virtqueue *vq)
290 {
291 	if (vq->vq_queued) {
292 		vq->vq_queued = 0;
293 		vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
294 	}
295 	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
296 	if (vq->vq_used_idx != vq->vq_used->idx) {
297 		if (vq->vq_done)
298 			return (vq->vq_done)(vq);
299 	}
300 
301 	return 0;
302 }
303 
304 /*
305  * Initialize vq structure.
306  */
307 void
308 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
309 {
310 	int i, j;
311 	int vq_size = vq->vq_num;
312 
313 	VIRTIO_ASSERT(vq_size > 0);
314 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
315 
316 	/* build the indirect descriptor chain */
317 	if (vq->vq_indirect != NULL) {
318 		struct vring_desc *vd;
319 
320 		for (i = 0; i < vq_size; i++) {
321 			vd = vq->vq_indirect;
322 			vd += vq->vq_maxnsegs * i;
323 			for (j = 0; j < vq->vq_maxnsegs-1; j++)
324 				vd[j].next = j + 1;
325 		}
326 	}
327 
328 	/* free slot management */
329 	SLIST_INIT(&vq->vq_freelist);
330 	/*
331 	 * virtio_enqueue_trim needs monotonely raising entries, therefore
332 	 * initialize in reverse order
333 	 */
334 	for (i = vq_size - 1; i >= 0; i--) {
335 		SLIST_INSERT_HEAD(&vq->vq_freelist, &vq->vq_entries[i],
336 		    qe_list);
337 		vq->vq_entries[i].qe_index = i;
338 	}
339 
340 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_bytesize,
341 	    BUS_DMASYNC_PREWRITE);
342 	/* enqueue/dequeue status */
343 	vq->vq_avail_idx = 0;
344 	vq->vq_used_idx = 0;
345 	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
346 	vq->vq_queued = 1;
347 }
348 
349 /*
350  * Allocate/free a vq.
351  *
352  * maxnsegs denotes how much space should be allocated for indirect
353  * descriptors. maxnsegs == 1 can be used to disable use indirect
354  * descriptors for this queue.
355  */
356 int
357 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
358     int maxnsegs, const char *name)
359 {
360 	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
361 	int rsegs, r, hdrlen;
362 #define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
363 				 ~(VIRTIO_PAGE_SIZE-1))
364 
365 	memset(vq, 0, sizeof(*vq));
366 
367 	vq_size = virtio_read_queue_size(sc, index);
368 	if (vq_size == 0) {
369 		printf("virtqueue not exist, index %d for %s\n", index, name);
370 		goto err;
371 	}
372 	if (((vq_size - 1) & vq_size) != 0)
373 		panic("vq_size not power of two: %d", vq_size);
374 
375 	hdrlen = virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;
376 
377 	/* allocsize1: descriptor table + avail ring + pad */
378 	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
379 	    + sizeof(uint16_t) * (hdrlen + vq_size));
380 	/* allocsize2: used ring + pad */
381 	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
382 	    + sizeof(struct vring_used_elem) * vq_size);
383 	/* allocsize3: indirect table */
384 	if (sc->sc_indirect && maxnsegs > 1)
385 		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
386 	else
387 		allocsize3 = 0;
388 	allocsize = allocsize1 + allocsize2 + allocsize3;
389 
390 	/* alloc and map the memory */
391 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
392 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
393 	if (r != 0) {
394 		printf("virtqueue %d for %s allocation failed, error %d\n",
395 		       index, name, r);
396 		goto err;
397 	}
398 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
399 	    (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
400 	if (r != 0) {
401 		printf("virtqueue %d for %s map failed, error %d\n", index,
402 		    name, r);
403 		goto err;
404 	}
405 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
406 	    BUS_DMA_NOWAIT, &vq->vq_dmamap);
407 	if (r != 0) {
408 		printf("virtqueue %d for %s dmamap creation failed, "
409 		    "error %d\n", index, name, r);
410 		goto err;
411 	}
412 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap, vq->vq_vaddr,
413 	    allocsize, NULL, BUS_DMA_NOWAIT);
414 	if (r != 0) {
415 		printf("virtqueue %d for %s dmamap load failed, error %d\n",
416 		    index, name, r);
417 		goto err;
418 	}
419 
420 	/* remember addresses and offsets for later use */
421 	vq->vq_owner = sc;
422 	vq->vq_num = vq_size;
423 	vq->vq_mask = vq_size - 1;
424 	vq->vq_index = index;
425 	vq->vq_desc = vq->vq_vaddr;
426 	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
427 	vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
428 	    vq->vq_availoffset);
429 	vq->vq_usedoffset = allocsize1;
430 	vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
431 	    vq->vq_usedoffset);
432 	if (allocsize3 > 0) {
433 		vq->vq_indirectoffset = allocsize1 + allocsize2;
434 		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
435 		    + vq->vq_indirectoffset);
436 	}
437 	vq->vq_bytesize = allocsize;
438 	vq->vq_maxnsegs = maxnsegs;
439 
440 	/* free slot management */
441 	vq->vq_entries = mallocarray(vq_size, sizeof(struct vq_entry),
442 	    M_DEVBUF, M_NOWAIT | M_ZERO);
443 	if (vq->vq_entries == NULL) {
444 		r = ENOMEM;
445 		goto err;
446 	}
447 
448 	virtio_init_vq(sc, vq);
449 
450 #if VIRTIO_DEBUG
451 	printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
452 	    allocsize, index, name, vq_size);
453 	if (allocsize3 > 0)
454 		printf("using %d byte (%d entries) indirect descriptors\n",
455 		    allocsize3, maxnsegs * vq_size);
456 #endif
457 	return 0;
458 
459 err:
460 	if (vq->vq_dmamap)
461 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
462 	if (vq->vq_vaddr)
463 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
464 	if (vq->vq_segs[0].ds_addr)
465 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
466 	memset(vq, 0, sizeof(*vq));
467 
468 	return -1;
469 }
470 
471 int
472 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
473 {
474 	struct vq_entry *qe;
475 	int i = 0;
476 
477 	if (vq->vq_num == 0) {
478 		/* virtio_alloc_vq() was never called */
479 		return 0;
480 	}
481 
482 	/* device must be already deactivated */
483 	/* confirm the vq is empty */
484 	SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) {
485 		i++;
486 	}
487 	if (i != vq->vq_num) {
488 		printf("%s: freeing non-empty vq, index %d\n",
489 		    sc->sc_dev.dv_xname, vq->vq_index);
490 		return EBUSY;
491 	}
492 
493 	/* tell device that there's no virtqueue any longer */
494 	virtio_setup_queue(sc, vq, 0);
495 
496 	free(vq->vq_entries, M_DEVBUF, 0);
497 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
498 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
499 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
500 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
501 	memset(vq, 0, sizeof(*vq));
502 
503 	return 0;
504 }
505 
506 /*
507  * Free descriptor management.
508  */
509 struct vq_entry *
510 vq_alloc_entry(struct virtqueue *vq)
511 {
512 	struct vq_entry *qe;
513 
514 	if (SLIST_EMPTY(&vq->vq_freelist))
515 		return NULL;
516 	qe = SLIST_FIRST(&vq->vq_freelist);
517 	SLIST_REMOVE_HEAD(&vq->vq_freelist, qe_list);
518 
519 	return qe;
520 }
521 
522 void
523 vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
524 {
525 	SLIST_INSERT_HEAD(&vq->vq_freelist, qe, qe_list);
526 }
527 
528 /*
529  * Enqueue several dmamaps as a single request.
530  */
531 /*
532  * Typical usage:
533  *  <queue size> number of followings are stored in arrays
534  *  - command blocks (in dmamem) should be pre-allocated and mapped
535  *  - dmamaps for command blocks should be pre-allocated and loaded
536  *  - dmamaps for payload should be pre-allocated
537  *	r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
538  *	if (r)		// currently 0 or EAGAIN
539  *	  return r;
540  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
541  *	if (r) {
542  *	  virtio_enqueue_abort(sc, vq, slot);
543  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
544  *	  return r;
545  *	}
546  *	r = virtio_enqueue_reserve(sc, vq, slot,
547  *				   dmamap_payload[slot]->dm_nsegs+1);
548  *							// ^ +1 for command
549  *	if (r) {	// currently 0 or EAGAIN
550  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
551  *	  return r;					// do not call abort()
552  *	}
553  *	<setup and prepare commands>
554  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
555  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
556  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
557  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
558  *	virtio_enqueue_commit(sc, vq, slot, 1);
559  *
560  * Alternative usage with statically allocated slots:
561  *	<during initialization>
562  *	// while not out of slots, do
563  *	virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
564  *	virtio_enqueue_reserve(sc, vq, slot, max_segs);	// reserve all slots
565  *						that may ever be needed
566  *
567  *	<when enqueuing a request>
568  *	// Don't call virtio_enqueue_prep()
569  *	bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
570  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
571  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
572  *	virtio_enqueue_trim(sc, vq, slot, num_segs_needed);
573  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
574  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
575  *	virtio_enqueue_commit(sc, vq, slot, 1);
576  *
577  *	<when dequeuing>
578  *	// don't call virtio_dequeue_commit()
579  */
580 
581 /*
582  * enqueue_prep: allocate a slot number
583  */
584 int
585 virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
586 {
587 	struct vq_entry *qe1;
588 
589 	VIRTIO_ASSERT(slotp != NULL);
590 
591 	qe1 = vq_alloc_entry(vq);
592 	if (qe1 == NULL)
593 		return EAGAIN;
594 	/* next slot is not allocated yet */
595 	qe1->qe_next = -1;
596 	*slotp = qe1->qe_index;
597 
598 	return 0;
599 }
600 
601 /*
602  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
603  * Calls virtio_enqueue_abort() on failure.
604  */
605 int
606 virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
607 {
608 	struct vq_entry *qe1 = &vq->vq_entries[slot];
609 
610 	VIRTIO_ASSERT(qe1->qe_next == -1);
611 	VIRTIO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);
612 
613 	if (vq->vq_indirect != NULL && nsegs > 1 && nsegs <= vq->vq_maxnsegs) {
614 		struct vring_desc *vd;
615 		int i;
616 
617 		qe1->qe_indirect = 1;
618 
619 		vd = &vq->vq_desc[qe1->qe_index];
620 		vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr +
621 		    vq->vq_indirectoffset;
622 		vd->addr += sizeof(struct vring_desc) * vq->vq_maxnsegs *
623 		    qe1->qe_index;
624 		vd->len = sizeof(struct vring_desc) * nsegs;
625 		vd->flags = VRING_DESC_F_INDIRECT;
626 
627 		vd = vq->vq_indirect;
628 		vd += vq->vq_maxnsegs * qe1->qe_index;
629 		qe1->qe_desc_base = vd;
630 
631 		for (i = 0; i < nsegs-1; i++)
632 			vd[i].flags = VRING_DESC_F_NEXT;
633 		vd[i].flags = 0;
634 		qe1->qe_next = 0;
635 
636 		return 0;
637 	} else {
638 		struct vring_desc *vd;
639 		struct vq_entry *qe;
640 		int i, s;
641 
642 		qe1->qe_indirect = 0;
643 
644 		vd = &vq->vq_desc[0];
645 		qe1->qe_desc_base = vd;
646 		qe1->qe_next = qe1->qe_index;
647 		s = slot;
648 		for (i = 0; i < nsegs - 1; i++) {
649 			qe = vq_alloc_entry(vq);
650 			if (qe == NULL) {
651 				vd[s].flags = 0;
652 				virtio_enqueue_abort(vq, slot);
653 				return EAGAIN;
654 			}
655 			vd[s].flags = VRING_DESC_F_NEXT;
656 			vd[s].next = qe->qe_index;
657 			s = qe->qe_index;
658 		}
659 		vd[s].flags = 0;
660 
661 		return 0;
662 	}
663 }
664 
665 /*
666  * enqueue: enqueue a single dmamap.
667  */
668 int
669 virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
670 {
671 	struct vq_entry *qe1 = &vq->vq_entries[slot];
672 	struct vring_desc *vd = qe1->qe_desc_base;
673 	int i;
674 	int s = qe1->qe_next;
675 
676 	VIRTIO_ASSERT(s >= 0);
677 	VIRTIO_ASSERT(dmamap->dm_nsegs > 0);
678 	if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
679 #if VIRTIO_DEBUG
680 		for (i = 0; i < dmamap->dm_nsegs; i++) {
681 			printf(" %d (%d): %p %lx \n", i, write,
682 			    (void *)dmamap->dm_segs[i].ds_addr,
683 			    dmamap->dm_segs[i].ds_len);
684 		}
685 #endif
686 		panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d",
687 		    dmamap->dm_nsegs, vq->vq_maxnsegs);
688 	}
689 
690 	for (i = 0; i < dmamap->dm_nsegs; i++) {
691 		vd[s].addr = dmamap->dm_segs[i].ds_addr;
692 		vd[s].len = dmamap->dm_segs[i].ds_len;
693 		if (!write)
694 			vd[s].flags |= VRING_DESC_F_WRITE;
695 		s = vd[s].next;
696 	}
697 	qe1->qe_next = s;
698 
699 	return 0;
700 }
701 
702 int
703 virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
704     bus_addr_t start, bus_size_t len, int write)
705 {
706 	struct vq_entry *qe1 = &vq->vq_entries[slot];
707 	struct vring_desc *vd = qe1->qe_desc_base;
708 	int s = qe1->qe_next;
709 
710 	VIRTIO_ASSERT(s >= 0);
711 	/* XXX todo: handle more segments */
712 	VIRTIO_ASSERT(dmamap->dm_nsegs == 1);
713 	VIRTIO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
714 	    (dmamap->dm_segs[0].ds_len >= start + len));
715 
716 	vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
717 	vd[s].len = len;
718 	if (!write)
719 		vd[s].flags |= VRING_DESC_F_WRITE;
720 	qe1->qe_next = vd[s].next;
721 
722 	return 0;
723 }
724 
725 static void
726 publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
727 {
728 	/* first make sure the avail ring entries are visible to the device */
729 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
730 
731 	virtio_membar_producer();
732 	vq->vq_avail->idx = vq->vq_avail_idx;
733 	/* make the avail idx visible to the device */
734 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
735 	vq->vq_queued = 1;
736 }
737 
738 /*
739  * enqueue_commit: add it to the aring.
740  */
741 void
742 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
743     int notifynow)
744 {
745 	struct vq_entry *qe1;
746 
747 	if (slot < 0)
748 		goto notify;
749 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
750 	qe1 = &vq->vq_entries[slot];
751 	if (qe1->qe_indirect)
752 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
753 	vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;
754 
755 notify:
756 	if (notifynow) {
757 		if (virtio_has_feature(vq->vq_owner, VIRTIO_F_RING_EVENT_IDX)) {
758 			uint16_t o = vq->vq_avail->idx;
759 			uint16_t n = vq->vq_avail_idx;
760 			uint16_t t;
761 			publish_avail_idx(sc, vq);
762 
763 			virtio_membar_sync();
764 			vq_sync_uring_avail_event(sc, vq, BUS_DMASYNC_POSTREAD);
765 			t = VQ_AVAIL_EVENT(vq) + 1;
766 			if ((uint16_t)(n - t) < (uint16_t)(n - o))
767 				sc->sc_ops->kick(sc, vq->vq_index);
768 		} else {
769 			publish_avail_idx(sc, vq);
770 
771 			virtio_membar_sync();
772 			vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
773 			if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
774 				sc->sc_ops->kick(sc, vq->vq_index);
775 		}
776 	}
777 }
778 
779 /*
780  * enqueue_abort: rollback.
781  */
782 int
783 virtio_enqueue_abort(struct virtqueue *vq, int slot)
784 {
785 	struct vq_entry *qe = &vq->vq_entries[slot];
786 	struct vring_desc *vd;
787 	int s;
788 
789 	if (qe->qe_next < 0) {
790 		vq_free_entry(vq, qe);
791 		return 0;
792 	}
793 
794 	s = slot;
795 	vd = &vq->vq_desc[0];
796 	while (vd[s].flags & VRING_DESC_F_NEXT) {
797 		s = vd[s].next;
798 		vq_free_entry(vq, qe);
799 		qe = &vq->vq_entries[s];
800 	}
801 	vq_free_entry(vq, qe);
802 	return 0;
803 }
804 
805 /*
806  * enqueue_trim: adjust buffer size to given # of segments, a.k.a.
807  * descriptors.
808  */
809 void
810 virtio_enqueue_trim(struct virtqueue *vq, int slot, int nsegs)
811 {
812 	struct vq_entry *qe1 = &vq->vq_entries[slot];
813 	struct vring_desc *vd = &vq->vq_desc[0];
814 	int i;
815 
816 	if ((vd[slot].flags & VRING_DESC_F_INDIRECT) == 0) {
817 		qe1->qe_next = qe1->qe_index;
818 		/*
819 		 * N.B.: the vq_entries are ASSUMED to be a contiguous
820 		 *       block with slot being the index to the first one.
821 		 */
822 	} else {
823 		qe1->qe_next = 0;
824 		vd = &vq->vq_desc[qe1->qe_index];
825 		vd->len = sizeof(struct vring_desc) * nsegs;
826 		vd = qe1->qe_desc_base;
827 		slot = 0;
828 	}
829 
830 	for (i = 0; i < nsegs -1 ; i++) {
831 		vd[slot].flags = VRING_DESC_F_NEXT;
832 		slot++;
833 	}
834 	vd[slot].flags = 0;
835 }
836 
837 /*
838  * Dequeue a request.
839  */
840 /*
841  * dequeue: dequeue a request from uring; bus_dmamap_sync for uring must
842  * 	    already have been done, usually by virtio_check_vq()
843  * 	    in the interrupt handler. This means that polling virtio_dequeue()
844  * 	    repeatedly until it returns 0 does not work.
845  */
846 int
847 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
848     int *slotp, int *lenp)
849 {
850 	uint16_t slot, usedidx;
851 	struct vq_entry *qe;
852 
853 	if (vq->vq_used_idx == vq->vq_used->idx)
854 		return ENOENT;
855 	usedidx = vq->vq_used_idx++;
856 	usedidx &= vq->vq_mask;
857 
858 	virtio_membar_consumer();
859 	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
860 	slot = vq->vq_used->ring[usedidx].id;
861 	qe = &vq->vq_entries[slot];
862 
863 	if (qe->qe_indirect)
864 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
865 
866 	if (slotp)
867 		*slotp = slot;
868 	if (lenp)
869 		*lenp = vq->vq_used->ring[usedidx].len;
870 
871 	return 0;
872 }
873 
874 /*
875  * dequeue_commit: complete dequeue; the slot is recycled for future use.
876  *                 if you forget to call this the slot will be leaked.
877  *
878  *                 Don't call this if you use statically allocated slots
879  *                 and virtio_enqueue_trim().
880  *
881  *                 returns the number of freed slots.
882  */
883 int
884 virtio_dequeue_commit(struct virtqueue *vq, int slot)
885 {
886 	struct vq_entry *qe = &vq->vq_entries[slot];
887 	struct vring_desc *vd = &vq->vq_desc[0];
888 	int s = slot, r = 1;
889 
890 	while (vd[s].flags & VRING_DESC_F_NEXT) {
891 		s = vd[s].next;
892 		vq_free_entry(vq, qe);
893 		qe = &vq->vq_entries[s];
894 		r++;
895 	}
896 	vq_free_entry(vq, qe);
897 
898 	return r;
899 }
900 
901 /*
902  * Increase the event index in order to delay interrupts.
903  * Returns 0 on success; returns 1 if the used ring has already advanced
904  * too far, and the caller must process the queue again (otherwise, no
905  * more interrupts will happen).
906  */
907 int
908 virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
909 {
910 	uint16_t	idx;
911 
912 	idx = vq->vq_used_idx + nslots;
913 
914 	/* set the new event index: avail_ring->used_event = idx */
915 	VQ_USED_EVENT(vq) = idx;
916 	virtio_membar_sync();
917 
918 	vq_sync_aring_used_event(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
919 	vq->vq_queued++;
920 
921 	if (nslots < virtio_nused(vq))
922 		return 1;
923 
924 	return 0;
925 }
926 
927 /*
928  * Postpone interrupt until 3/4 of the available descriptors have been
929  * consumed.
930  */
931 int
932 virtio_postpone_intr_smart(struct virtqueue *vq)
933 {
934 	uint16_t	nslots;
935 
936 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;
937 
938 	return virtio_postpone_intr(vq, nslots);
939 }
940 
941 /*
942  * Postpone interrupt until all of the available descriptors have been
943  * consumed.
944  */
945 int
946 virtio_postpone_intr_far(struct virtqueue *vq)
947 {
948 	uint16_t	nslots;
949 
950 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);
951 
952 	return virtio_postpone_intr(vq, nslots);
953 }
954 
955 
956 /*
957  * Start/stop vq interrupt.  No guarantee.
958  */
959 void
960 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
961 {
962 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX)) {
963 		/*
964 		 * No way to disable the interrupt completely with
965 		 * RingEventIdx. Instead advance used_event by half
966 		 * the possible value. This won't happen soon and
967 		 * is far enough in the past to not trigger a spurious
968 		 * interrupt.
969 		 */
970 		VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
971 		vq_sync_aring_used_event(sc, vq, BUS_DMASYNC_PREWRITE);
972 	} else {
973 		vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
974 	}
975 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
976 	vq->vq_queued++;
977 }
978 
979 int
980 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
981 {
982 	/*
983 	 * If event index feature is negotiated, enabling
984 	 * interrupts is done through setting the latest
985 	 * consumed index in the used_event field
986 	 */
987 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX)) {
988 		VQ_USED_EVENT(vq) = vq->vq_used_idx;
989 		vq_sync_aring_used_event(sc, vq, BUS_DMASYNC_PREWRITE);
990 	} else {
991 		vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
992 		vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
993 	}
994 
995 	virtio_membar_sync();
996 
997 	vq->vq_queued++;
998 
999 	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
1000 	if (vq->vq_used_idx != vq->vq_used->idx)
1001 		return 1;
1002 
1003 	return 0;
1004 }
1005 
1006 /*
1007  * Returns a number of slots in the used ring available to
1008  * be supplied to the avail ring.
1009  */
1010 int
1011 virtio_nused(struct virtqueue *vq)
1012 {
1013 	uint16_t	n;
1014 
1015 	vq_sync_uring(vq->vq_owner, vq, BUS_DMASYNC_POSTREAD);
1016 	n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
1017 	VIRTIO_ASSERT(n <= vq->vq_num);
1018 
1019 	return n;
1020 }
1021 
1022 #if VIRTIO_DEBUG
1023 void
1024 virtio_vq_dump(struct virtqueue *vq)
1025 {
1026 #if VIRTIO_DEBUG >= 2
1027 	int i;
1028 #endif
1029 	/* Common fields */
1030 	printf(" + addr: %p\n", vq);
1031 	if (vq->vq_num == 0) {
1032 		printf(" + vq is unused\n");
1033 		return;
1034 	}
1035 	printf(" + vq num: %d\n", vq->vq_num);
1036 	printf(" + vq mask: 0x%X\n", vq->vq_mask);
1037 	printf(" + vq index: %d\n", vq->vq_index);
1038 	printf(" + vq used idx: %d\n", vq->vq_used_idx);
1039 	printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
1040 	printf(" + vq queued: %d\n",vq->vq_queued);
1041 #if VIRTIO_DEBUG >= 2
1042 	for (i = 0; i < vq->vq_num; i++) {
1043 		struct vring_desc *desc = &vq->vq_desc[i];
1044 		printf("  D%-3d len:%d flags:%d next:%d\n", i, desc->len,
1045 		    desc->flags, desc->next);
1046 	}
1047 #endif
1048 	/* Avail ring fields */
1049 	printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
1050 	printf(" + avail idx: %d\n", vq->vq_avail->idx);
1051 	printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
1052 #if VIRTIO_DEBUG >= 2
1053 	for (i = 0; i < vq->vq_num; i++)
1054 		printf("  A%-3d idx:%d\n", i, vq->vq_avail->ring[i]);
1055 #endif
1056 	/* Used ring fields */
1057 	printf(" + used flags: 0x%X\n",vq->vq_used->flags);
1058 	printf(" + used idx: %d\n",vq->vq_used->idx);
1059 	printf(" + used event: %d\n", VQ_USED_EVENT(vq));
1060 #if VIRTIO_DEBUG >= 2
1061 	for (i = 0; i < vq->vq_num; i++) {
1062 		printf("  U%-3d id:%d len:%d\n", i,
1063 				vq->vq_used->ring[i].id,
1064 				vq->vq_used->ring[i].len);
1065 	}
1066 #endif
1067 	printf(" +++++++++++++++++++++++++++\n");
1068 }
1069 #endif
1070