xref: /qemu/hw/virtio/virtio.c (revision 33848cee)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26 #include "sysemu/dma.h"
27 
28 /*
29  * The alignment to use between consumer and producer parts of vring.
30  * x86 pagesize again. This is the default, used by transports like PCI
31  * which don't provide a means for the guest to tell the host the alignment.
32  */
33 #define VIRTIO_PCI_VRING_ALIGN         4096
34 
35 typedef struct VRingDesc
36 {
37     uint64_t addr;
38     uint32_t len;
39     uint16_t flags;
40     uint16_t next;
41 } VRingDesc;
42 
43 typedef struct VRingAvail
44 {
45     uint16_t flags;
46     uint16_t idx;
47     uint16_t ring[0];
48 } VRingAvail;
49 
50 typedef struct VRingUsedElem
51 {
52     uint32_t id;
53     uint32_t len;
54 } VRingUsedElem;
55 
56 typedef struct VRingUsed
57 {
58     uint16_t flags;
59     uint16_t idx;
60     VRingUsedElem ring[0];
61 } VRingUsed;
62 
63 typedef struct VRing
64 {
65     unsigned int num;
66     unsigned int num_default;
67     unsigned int align;
68     hwaddr desc;
69     hwaddr avail;
70     hwaddr used;
71 } VRing;
72 
73 struct VirtQueue
74 {
75     VRing vring;
76 
77     /* Next head to pop */
78     uint16_t last_avail_idx;
79 
80     /* Last avail_idx read from VQ. */
81     uint16_t shadow_avail_idx;
82 
83     uint16_t used_idx;
84 
85     /* Last used index value we have signalled on */
86     uint16_t signalled_used;
87 
88     /* Last used index value we have signalled on */
89     bool signalled_used_valid;
90 
91     /* Nested host->guest notification disabled counter */
92     unsigned int notification_disabled;
93 
94     uint16_t queue_index;
95 
96     int inuse;
97 
98     uint16_t vector;
99     VirtIOHandleOutput handle_output;
100     VirtIOHandleOutput handle_aio_output;
101     VirtIODevice *vdev;
102     EventNotifier guest_notifier;
103     EventNotifier host_notifier;
104     QLIST_ENTRY(VirtQueue) node;
105 };
106 
107 /* virt queue functions */
108 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
109 {
110     VRing *vring = &vdev->vq[n].vring;
111 
112     if (!vring->desc) {
113         /* not yet setup -> nothing to do */
114         return;
115     }
116     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
117     vring->used = vring_align(vring->avail +
118                               offsetof(VRingAvail, ring[vring->num]),
119                               vring->align);
120 }
121 
122 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
123                             hwaddr desc_pa, int i)
124 {
125     address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
126                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
127     virtio_tswap64s(vdev, &desc->addr);
128     virtio_tswap32s(vdev, &desc->len);
129     virtio_tswap16s(vdev, &desc->flags);
130     virtio_tswap16s(vdev, &desc->next);
131 }
132 
133 static inline uint16_t vring_avail_flags(VirtQueue *vq)
134 {
135     hwaddr pa;
136     pa = vq->vring.avail + offsetof(VRingAvail, flags);
137     return virtio_lduw_phys(vq->vdev, pa);
138 }
139 
140 static inline uint16_t vring_avail_idx(VirtQueue *vq)
141 {
142     hwaddr pa;
143     pa = vq->vring.avail + offsetof(VRingAvail, idx);
144     vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
145     return vq->shadow_avail_idx;
146 }
147 
148 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
149 {
150     hwaddr pa;
151     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
152     return virtio_lduw_phys(vq->vdev, pa);
153 }
154 
155 static inline uint16_t vring_get_used_event(VirtQueue *vq)
156 {
157     return vring_avail_ring(vq, vq->vring.num);
158 }
159 
160 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
161                                     int i)
162 {
163     hwaddr pa;
164     virtio_tswap32s(vq->vdev, &uelem->id);
165     virtio_tswap32s(vq->vdev, &uelem->len);
166     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
167     address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
168                        (void *)uelem, sizeof(VRingUsedElem));
169 }
170 
171 static uint16_t vring_used_idx(VirtQueue *vq)
172 {
173     hwaddr pa;
174     pa = vq->vring.used + offsetof(VRingUsed, idx);
175     return virtio_lduw_phys(vq->vdev, pa);
176 }
177 
178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
179 {
180     hwaddr pa;
181     pa = vq->vring.used + offsetof(VRingUsed, idx);
182     virtio_stw_phys(vq->vdev, pa, val);
183     vq->used_idx = val;
184 }
185 
186 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
187 {
188     VirtIODevice *vdev = vq->vdev;
189     hwaddr pa;
190     pa = vq->vring.used + offsetof(VRingUsed, flags);
191     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
192 }
193 
194 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
195 {
196     VirtIODevice *vdev = vq->vdev;
197     hwaddr pa;
198     pa = vq->vring.used + offsetof(VRingUsed, flags);
199     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
200 }
201 
202 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
203 {
204     hwaddr pa;
205     if (vq->notification_disabled) {
206         return;
207     }
208     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
209     virtio_stw_phys(vq->vdev, pa, val);
210 }
211 
212 void virtio_queue_set_notification(VirtQueue *vq, int enable)
213 {
214     if (enable) {
215         assert(vq->notification_disabled > 0);
216         vq->notification_disabled--;
217     } else {
218         vq->notification_disabled++;
219     }
220 
221     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
222         vring_set_avail_event(vq, vring_avail_idx(vq));
223     } else if (enable) {
224         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
225     } else {
226         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
227     }
228     if (enable) {
229         /* Expose avail event/used flags before caller checks the avail idx. */
230         smp_mb();
231     }
232 }
233 
234 int virtio_queue_ready(VirtQueue *vq)
235 {
236     return vq->vring.avail != 0;
237 }
238 
239 /* Fetch avail_idx from VQ memory only when we really need to know if
240  * guest has added some buffers. */
241 int virtio_queue_empty(VirtQueue *vq)
242 {
243     if (vq->shadow_avail_idx != vq->last_avail_idx) {
244         return 0;
245     }
246 
247     return vring_avail_idx(vq) == vq->last_avail_idx;
248 }
249 
250 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
251                                unsigned int len)
252 {
253     AddressSpace *dma_as = vq->vdev->dma_as;
254     unsigned int offset;
255     int i;
256 
257     offset = 0;
258     for (i = 0; i < elem->in_num; i++) {
259         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
260 
261         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
262                          elem->in_sg[i].iov_len,
263                          DMA_DIRECTION_FROM_DEVICE, size);
264 
265         offset += size;
266     }
267 
268     for (i = 0; i < elem->out_num; i++)
269         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
270                          elem->out_sg[i].iov_len,
271                          DMA_DIRECTION_TO_DEVICE,
272                          elem->out_sg[i].iov_len);
273 }
274 
275 /* virtqueue_detach_element:
276  * @vq: The #VirtQueue
277  * @elem: The #VirtQueueElement
278  * @len: number of bytes written
279  *
280  * Detach the element from the virtqueue.  This function is suitable for device
281  * reset or other situations where a #VirtQueueElement is simply freed and will
282  * not be pushed or discarded.
283  */
284 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
285                               unsigned int len)
286 {
287     vq->inuse--;
288     virtqueue_unmap_sg(vq, elem, len);
289 }
290 
291 /* virtqueue_unpop:
292  * @vq: The #VirtQueue
293  * @elem: The #VirtQueueElement
294  * @len: number of bytes written
295  *
296  * Pretend the most recent element wasn't popped from the virtqueue.  The next
297  * call to virtqueue_pop() will refetch the element.
298  */
299 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
300                      unsigned int len)
301 {
302     vq->last_avail_idx--;
303     virtqueue_detach_element(vq, elem, len);
304 }
305 
306 /* virtqueue_rewind:
307  * @vq: The #VirtQueue
308  * @num: Number of elements to push back
309  *
310  * Pretend that elements weren't popped from the virtqueue.  The next
311  * virtqueue_pop() will refetch the oldest element.
312  *
313  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
314  *
315  * Returns: true on success, false if @num is greater than the number of in use
316  * elements.
317  */
318 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
319 {
320     if (num > vq->inuse) {
321         return false;
322     }
323     vq->last_avail_idx -= num;
324     vq->inuse -= num;
325     return true;
326 }
327 
328 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
329                     unsigned int len, unsigned int idx)
330 {
331     VRingUsedElem uelem;
332 
333     trace_virtqueue_fill(vq, elem, len, idx);
334 
335     virtqueue_unmap_sg(vq, elem, len);
336 
337     if (unlikely(vq->vdev->broken)) {
338         return;
339     }
340 
341     idx = (idx + vq->used_idx) % vq->vring.num;
342 
343     uelem.id = elem->index;
344     uelem.len = len;
345     vring_used_write(vq, &uelem, idx);
346 }
347 
348 void virtqueue_flush(VirtQueue *vq, unsigned int count)
349 {
350     uint16_t old, new;
351 
352     if (unlikely(vq->vdev->broken)) {
353         vq->inuse -= count;
354         return;
355     }
356 
357     /* Make sure buffer is written before we update index. */
358     smp_wmb();
359     trace_virtqueue_flush(vq, count);
360     old = vq->used_idx;
361     new = old + count;
362     vring_used_idx_set(vq, new);
363     vq->inuse -= count;
364     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
365         vq->signalled_used_valid = false;
366 }
367 
368 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
369                     unsigned int len)
370 {
371     virtqueue_fill(vq, elem, len, 0);
372     virtqueue_flush(vq, 1);
373 }
374 
375 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
376 {
377     uint16_t num_heads = vring_avail_idx(vq) - idx;
378 
379     /* Check it isn't doing very strange things with descriptor numbers. */
380     if (num_heads > vq->vring.num) {
381         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
382                      idx, vq->shadow_avail_idx);
383         return -EINVAL;
384     }
385     /* On success, callers read a descriptor at vq->last_avail_idx.
386      * Make sure descriptor read does not bypass avail index read. */
387     if (num_heads) {
388         smp_rmb();
389     }
390 
391     return num_heads;
392 }
393 
394 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
395                                unsigned int *head)
396 {
397     /* Grab the next descriptor number they're advertising, and increment
398      * the index we've seen. */
399     *head = vring_avail_ring(vq, idx % vq->vring.num);
400 
401     /* If their number is silly, that's a fatal mistake. */
402     if (*head >= vq->vring.num) {
403         virtio_error(vq->vdev, "Guest says index %u is available", *head);
404         return false;
405     }
406 
407     return true;
408 }
409 
410 enum {
411     VIRTQUEUE_READ_DESC_ERROR = -1,
412     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
413     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
414 };
415 
416 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
417                                     hwaddr desc_pa, unsigned int max,
418                                     unsigned int *next)
419 {
420     /* If this descriptor says it doesn't chain, we're done. */
421     if (!(desc->flags & VRING_DESC_F_NEXT)) {
422         return VIRTQUEUE_READ_DESC_DONE;
423     }
424 
425     /* Check they're not leading us off end of descriptors. */
426     *next = desc->next;
427     /* Make sure compiler knows to grab that: we don't want it changing! */
428     smp_wmb();
429 
430     if (*next >= max) {
431         virtio_error(vdev, "Desc next is %u", *next);
432         return VIRTQUEUE_READ_DESC_ERROR;
433     }
434 
435     vring_desc_read(vdev, desc, desc_pa, *next);
436     return VIRTQUEUE_READ_DESC_MORE;
437 }
438 
439 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
440                                unsigned int *out_bytes,
441                                unsigned max_in_bytes, unsigned max_out_bytes)
442 {
443     unsigned int idx;
444     unsigned int total_bufs, in_total, out_total;
445     int rc;
446 
447     idx = vq->last_avail_idx;
448 
449     total_bufs = in_total = out_total = 0;
450     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
451         VirtIODevice *vdev = vq->vdev;
452         unsigned int max, num_bufs, indirect = 0;
453         VRingDesc desc;
454         hwaddr desc_pa;
455         unsigned int i;
456 
457         max = vq->vring.num;
458         num_bufs = total_bufs;
459 
460         if (!virtqueue_get_head(vq, idx++, &i)) {
461             goto err;
462         }
463 
464         desc_pa = vq->vring.desc;
465         vring_desc_read(vdev, &desc, desc_pa, i);
466 
467         if (desc.flags & VRING_DESC_F_INDIRECT) {
468             if (desc.len % sizeof(VRingDesc)) {
469                 virtio_error(vdev, "Invalid size for indirect buffer table");
470                 goto err;
471             }
472 
473             /* If we've got too many, that implies a descriptor loop. */
474             if (num_bufs >= max) {
475                 virtio_error(vdev, "Looped descriptor");
476                 goto err;
477             }
478 
479             /* loop over the indirect descriptor table */
480             indirect = 1;
481             max = desc.len / sizeof(VRingDesc);
482             desc_pa = desc.addr;
483             num_bufs = i = 0;
484             vring_desc_read(vdev, &desc, desc_pa, i);
485         }
486 
487         do {
488             /* If we've got too many, that implies a descriptor loop. */
489             if (++num_bufs > max) {
490                 virtio_error(vdev, "Looped descriptor");
491                 goto err;
492             }
493 
494             if (desc.flags & VRING_DESC_F_WRITE) {
495                 in_total += desc.len;
496             } else {
497                 out_total += desc.len;
498             }
499             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
500                 goto done;
501             }
502 
503             rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
504         } while (rc == VIRTQUEUE_READ_DESC_MORE);
505 
506         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
507             goto err;
508         }
509 
510         if (!indirect)
511             total_bufs = num_bufs;
512         else
513             total_bufs++;
514     }
515 
516     if (rc < 0) {
517         goto err;
518     }
519 
520 done:
521     if (in_bytes) {
522         *in_bytes = in_total;
523     }
524     if (out_bytes) {
525         *out_bytes = out_total;
526     }
527     return;
528 
529 err:
530     in_total = out_total = 0;
531     goto done;
532 }
533 
534 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
535                           unsigned int out_bytes)
536 {
537     unsigned int in_total, out_total;
538 
539     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
540     return in_bytes <= in_total && out_bytes <= out_total;
541 }
542 
543 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
544                                hwaddr *addr, struct iovec *iov,
545                                unsigned int max_num_sg, bool is_write,
546                                hwaddr pa, size_t sz)
547 {
548     bool ok = false;
549     unsigned num_sg = *p_num_sg;
550     assert(num_sg <= max_num_sg);
551 
552     if (!sz) {
553         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
554         goto out;
555     }
556 
557     while (sz) {
558         hwaddr len = sz;
559 
560         if (num_sg == max_num_sg) {
561             virtio_error(vdev, "virtio: too many write descriptors in "
562                                "indirect table");
563             goto out;
564         }
565 
566         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
567                                               is_write ?
568                                               DMA_DIRECTION_FROM_DEVICE :
569                                               DMA_DIRECTION_TO_DEVICE);
570         if (!iov[num_sg].iov_base) {
571             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
572             goto out;
573         }
574 
575         iov[num_sg].iov_len = len;
576         addr[num_sg] = pa;
577 
578         sz -= len;
579         pa += len;
580         num_sg++;
581     }
582     ok = true;
583 
584 out:
585     *p_num_sg = num_sg;
586     return ok;
587 }
588 
589 /* Only used by error code paths before we have a VirtQueueElement (therefore
590  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
591  * yet.
592  */
593 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
594                                     struct iovec *iov)
595 {
596     unsigned int i;
597 
598     for (i = 0; i < out_num + in_num; i++) {
599         int is_write = i >= out_num;
600 
601         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
602         iov++;
603     }
604 }
605 
606 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
607                                 hwaddr *addr, unsigned int *num_sg,
608                                 unsigned int max_size, int is_write)
609 {
610     unsigned int i;
611     hwaddr len;
612 
613     /* Note: this function MUST validate input, some callers
614      * are passing in num_sg values received over the network.
615      */
616     /* TODO: teach all callers that this can fail, and return failure instead
617      * of asserting here.
618      * When we do, we might be able to re-enable NDEBUG below.
619      */
620 #ifdef NDEBUG
621 #error building with NDEBUG is not supported
622 #endif
623     assert(*num_sg <= max_size);
624 
625     for (i = 0; i < *num_sg; i++) {
626         len = sg[i].iov_len;
627         sg[i].iov_base = dma_memory_map(vdev->dma_as,
628                                         addr[i], &len, is_write ?
629                                         DMA_DIRECTION_FROM_DEVICE :
630                                         DMA_DIRECTION_TO_DEVICE);
631         if (!sg[i].iov_base) {
632             error_report("virtio: error trying to map MMIO memory");
633             exit(1);
634         }
635         if (len != sg[i].iov_len) {
636             error_report("virtio: unexpected memory split");
637             exit(1);
638         }
639     }
640 }
641 
642 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
643 {
644     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num,
645                         MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
646                         1);
647     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num,
648                         MIN(ARRAY_SIZE(elem->out_sg),
649                         ARRAY_SIZE(elem->out_addr)),
650                         0);
651 }
652 
653 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
654 {
655     VirtQueueElement *elem;
656     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
657     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
658     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
659     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
660     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
661     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
662 
663     assert(sz >= sizeof(VirtQueueElement));
664     elem = g_malloc(out_sg_end);
665     elem->out_num = out_num;
666     elem->in_num = in_num;
667     elem->in_addr = (void *)elem + in_addr_ofs;
668     elem->out_addr = (void *)elem + out_addr_ofs;
669     elem->in_sg = (void *)elem + in_sg_ofs;
670     elem->out_sg = (void *)elem + out_sg_ofs;
671     return elem;
672 }
673 
674 void *virtqueue_pop(VirtQueue *vq, size_t sz)
675 {
676     unsigned int i, head, max;
677     hwaddr desc_pa = vq->vring.desc;
678     VirtIODevice *vdev = vq->vdev;
679     VirtQueueElement *elem;
680     unsigned out_num, in_num;
681     hwaddr addr[VIRTQUEUE_MAX_SIZE];
682     struct iovec iov[VIRTQUEUE_MAX_SIZE];
683     VRingDesc desc;
684     int rc;
685 
686     if (unlikely(vdev->broken)) {
687         return NULL;
688     }
689     if (virtio_queue_empty(vq)) {
690         return NULL;
691     }
692     /* Needed after virtio_queue_empty(), see comment in
693      * virtqueue_num_heads(). */
694     smp_rmb();
695 
696     /* When we start there are none of either input nor output. */
697     out_num = in_num = 0;
698 
699     max = vq->vring.num;
700 
701     if (vq->inuse >= vq->vring.num) {
702         virtio_error(vdev, "Virtqueue size exceeded");
703         return NULL;
704     }
705 
706     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
707         return NULL;
708     }
709 
710     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
711         vring_set_avail_event(vq, vq->last_avail_idx);
712     }
713 
714     i = head;
715     vring_desc_read(vdev, &desc, desc_pa, i);
716     if (desc.flags & VRING_DESC_F_INDIRECT) {
717         if (desc.len % sizeof(VRingDesc)) {
718             virtio_error(vdev, "Invalid size for indirect buffer table");
719             return NULL;
720         }
721 
722         /* loop over the indirect descriptor table */
723         max = desc.len / sizeof(VRingDesc);
724         desc_pa = desc.addr;
725         i = 0;
726         vring_desc_read(vdev, &desc, desc_pa, i);
727     }
728 
729     /* Collect all the descriptors */
730     do {
731         bool map_ok;
732 
733         if (desc.flags & VRING_DESC_F_WRITE) {
734             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
735                                         iov + out_num,
736                                         VIRTQUEUE_MAX_SIZE - out_num, true,
737                                         desc.addr, desc.len);
738         } else {
739             if (in_num) {
740                 virtio_error(vdev, "Incorrect order for descriptors");
741                 goto err_undo_map;
742             }
743             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
744                                         VIRTQUEUE_MAX_SIZE, false,
745                                         desc.addr, desc.len);
746         }
747         if (!map_ok) {
748             goto err_undo_map;
749         }
750 
751         /* If we've got too many, that implies a descriptor loop. */
752         if ((in_num + out_num) > max) {
753             virtio_error(vdev, "Looped descriptor");
754             goto err_undo_map;
755         }
756 
757         rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
758     } while (rc == VIRTQUEUE_READ_DESC_MORE);
759 
760     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
761         goto err_undo_map;
762     }
763 
764     /* Now copy what we have collected and mapped */
765     elem = virtqueue_alloc_element(sz, out_num, in_num);
766     elem->index = head;
767     for (i = 0; i < out_num; i++) {
768         elem->out_addr[i] = addr[i];
769         elem->out_sg[i] = iov[i];
770     }
771     for (i = 0; i < in_num; i++) {
772         elem->in_addr[i] = addr[out_num + i];
773         elem->in_sg[i] = iov[out_num + i];
774     }
775 
776     vq->inuse++;
777 
778     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
779     return elem;
780 
781 err_undo_map:
782     virtqueue_undo_map_desc(out_num, in_num, iov);
783     return NULL;
784 }
785 
786 /* Reading and writing a structure directly to QEMUFile is *awful*, but
787  * it is what QEMU has always done by mistake.  We can change it sooner
788  * or later by bumping the version number of the affected vm states.
789  * In the meanwhile, since the in-memory layout of VirtQueueElement
790  * has changed, we need to marshal to and from the layout that was
791  * used before the change.
792  */
793 typedef struct VirtQueueElementOld {
794     unsigned int index;
795     unsigned int out_num;
796     unsigned int in_num;
797     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
798     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
799     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
800     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
801 } VirtQueueElementOld;
802 
803 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
804 {
805     VirtQueueElement *elem;
806     VirtQueueElementOld data;
807     int i;
808 
809     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
810 
811     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
812     elem->index = data.index;
813 
814     for (i = 0; i < elem->in_num; i++) {
815         elem->in_addr[i] = data.in_addr[i];
816     }
817 
818     for (i = 0; i < elem->out_num; i++) {
819         elem->out_addr[i] = data.out_addr[i];
820     }
821 
822     for (i = 0; i < elem->in_num; i++) {
823         /* Base is overwritten by virtqueue_map.  */
824         elem->in_sg[i].iov_base = 0;
825         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
826     }
827 
828     for (i = 0; i < elem->out_num; i++) {
829         /* Base is overwritten by virtqueue_map.  */
830         elem->out_sg[i].iov_base = 0;
831         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
832     }
833 
834     virtqueue_map(vdev, elem);
835     return elem;
836 }
837 
838 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
839 {
840     VirtQueueElementOld data;
841     int i;
842 
843     memset(&data, 0, sizeof(data));
844     data.index = elem->index;
845     data.in_num = elem->in_num;
846     data.out_num = elem->out_num;
847 
848     for (i = 0; i < elem->in_num; i++) {
849         data.in_addr[i] = elem->in_addr[i];
850     }
851 
852     for (i = 0; i < elem->out_num; i++) {
853         data.out_addr[i] = elem->out_addr[i];
854     }
855 
856     for (i = 0; i < elem->in_num; i++) {
857         /* Base is overwritten by virtqueue_map when loading.  Do not
858          * save it, as it would leak the QEMU address space layout.  */
859         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
860     }
861 
862     for (i = 0; i < elem->out_num; i++) {
863         /* Do not save iov_base as above.  */
864         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
865     }
866     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
867 }
868 
869 /* virtio device */
870 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
871 {
872     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
873     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
874 
875     if (unlikely(vdev->broken)) {
876         return;
877     }
878 
879     if (k->notify) {
880         k->notify(qbus->parent, vector);
881     }
882 }
883 
884 void virtio_update_irq(VirtIODevice *vdev)
885 {
886     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
887 }
888 
889 static int virtio_validate_features(VirtIODevice *vdev)
890 {
891     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
892 
893     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
894         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
895         return -EFAULT;
896     }
897 
898     if (k->validate_features) {
899         return k->validate_features(vdev);
900     } else {
901         return 0;
902     }
903 }
904 
905 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
906 {
907     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
908     trace_virtio_set_status(vdev, val);
909 
910     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
911         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
912             val & VIRTIO_CONFIG_S_FEATURES_OK) {
913             int ret = virtio_validate_features(vdev);
914 
915             if (ret) {
916                 return ret;
917             }
918         }
919     }
920     if (k->set_status) {
921         k->set_status(vdev, val);
922     }
923     vdev->status = val;
924     return 0;
925 }
926 
927 bool target_words_bigendian(void);
928 static enum virtio_device_endian virtio_default_endian(void)
929 {
930     if (target_words_bigendian()) {
931         return VIRTIO_DEVICE_ENDIAN_BIG;
932     } else {
933         return VIRTIO_DEVICE_ENDIAN_LITTLE;
934     }
935 }
936 
937 static enum virtio_device_endian virtio_current_cpu_endian(void)
938 {
939     CPUClass *cc = CPU_GET_CLASS(current_cpu);
940 
941     if (cc->virtio_is_big_endian(current_cpu)) {
942         return VIRTIO_DEVICE_ENDIAN_BIG;
943     } else {
944         return VIRTIO_DEVICE_ENDIAN_LITTLE;
945     }
946 }
947 
948 void virtio_reset(void *opaque)
949 {
950     VirtIODevice *vdev = opaque;
951     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
952     int i;
953 
954     virtio_set_status(vdev, 0);
955     if (current_cpu) {
956         /* Guest initiated reset */
957         vdev->device_endian = virtio_current_cpu_endian();
958     } else {
959         /* System reset */
960         vdev->device_endian = virtio_default_endian();
961     }
962 
963     if (k->reset) {
964         k->reset(vdev);
965     }
966 
967     vdev->broken = false;
968     vdev->guest_features = 0;
969     vdev->queue_sel = 0;
970     vdev->status = 0;
971     atomic_set(&vdev->isr, 0);
972     vdev->config_vector = VIRTIO_NO_VECTOR;
973     virtio_notify_vector(vdev, vdev->config_vector);
974 
975     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
976         vdev->vq[i].vring.desc = 0;
977         vdev->vq[i].vring.avail = 0;
978         vdev->vq[i].vring.used = 0;
979         vdev->vq[i].last_avail_idx = 0;
980         vdev->vq[i].shadow_avail_idx = 0;
981         vdev->vq[i].used_idx = 0;
982         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
983         vdev->vq[i].signalled_used = 0;
984         vdev->vq[i].signalled_used_valid = false;
985         vdev->vq[i].notification_disabled = 0;
986         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
987         vdev->vq[i].inuse = 0;
988     }
989 }
990 
991 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
992 {
993     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
994     uint8_t val;
995 
996     if (addr + sizeof(val) > vdev->config_len) {
997         return (uint32_t)-1;
998     }
999 
1000     k->get_config(vdev, vdev->config);
1001 
1002     val = ldub_p(vdev->config + addr);
1003     return val;
1004 }
1005 
1006 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
1007 {
1008     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1009     uint16_t val;
1010 
1011     if (addr + sizeof(val) > vdev->config_len) {
1012         return (uint32_t)-1;
1013     }
1014 
1015     k->get_config(vdev, vdev->config);
1016 
1017     val = lduw_p(vdev->config + addr);
1018     return val;
1019 }
1020 
1021 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1022 {
1023     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1024     uint32_t val;
1025 
1026     if (addr + sizeof(val) > vdev->config_len) {
1027         return (uint32_t)-1;
1028     }
1029 
1030     k->get_config(vdev, vdev->config);
1031 
1032     val = ldl_p(vdev->config + addr);
1033     return val;
1034 }
1035 
1036 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1037 {
1038     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1039     uint8_t val = data;
1040 
1041     if (addr + sizeof(val) > vdev->config_len) {
1042         return;
1043     }
1044 
1045     stb_p(vdev->config + addr, val);
1046 
1047     if (k->set_config) {
1048         k->set_config(vdev, vdev->config);
1049     }
1050 }
1051 
1052 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1053 {
1054     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1055     uint16_t val = data;
1056 
1057     if (addr + sizeof(val) > vdev->config_len) {
1058         return;
1059     }
1060 
1061     stw_p(vdev->config + addr, val);
1062 
1063     if (k->set_config) {
1064         k->set_config(vdev, vdev->config);
1065     }
1066 }
1067 
1068 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1069 {
1070     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1071     uint32_t val = data;
1072 
1073     if (addr + sizeof(val) > vdev->config_len) {
1074         return;
1075     }
1076 
1077     stl_p(vdev->config + addr, val);
1078 
1079     if (k->set_config) {
1080         k->set_config(vdev, vdev->config);
1081     }
1082 }
1083 
1084 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1085 {
1086     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1087     uint8_t val;
1088 
1089     if (addr + sizeof(val) > vdev->config_len) {
1090         return (uint32_t)-1;
1091     }
1092 
1093     k->get_config(vdev, vdev->config);
1094 
1095     val = ldub_p(vdev->config + addr);
1096     return val;
1097 }
1098 
1099 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1100 {
1101     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1102     uint16_t val;
1103 
1104     if (addr + sizeof(val) > vdev->config_len) {
1105         return (uint32_t)-1;
1106     }
1107 
1108     k->get_config(vdev, vdev->config);
1109 
1110     val = lduw_le_p(vdev->config + addr);
1111     return val;
1112 }
1113 
1114 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1115 {
1116     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1117     uint32_t val;
1118 
1119     if (addr + sizeof(val) > vdev->config_len) {
1120         return (uint32_t)-1;
1121     }
1122 
1123     k->get_config(vdev, vdev->config);
1124 
1125     val = ldl_le_p(vdev->config + addr);
1126     return val;
1127 }
1128 
1129 void virtio_config_modern_writeb(VirtIODevice *vdev,
1130                                  uint32_t addr, uint32_t data)
1131 {
1132     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1133     uint8_t val = data;
1134 
1135     if (addr + sizeof(val) > vdev->config_len) {
1136         return;
1137     }
1138 
1139     stb_p(vdev->config + addr, val);
1140 
1141     if (k->set_config) {
1142         k->set_config(vdev, vdev->config);
1143     }
1144 }
1145 
1146 void virtio_config_modern_writew(VirtIODevice *vdev,
1147                                  uint32_t addr, uint32_t data)
1148 {
1149     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1150     uint16_t val = data;
1151 
1152     if (addr + sizeof(val) > vdev->config_len) {
1153         return;
1154     }
1155 
1156     stw_le_p(vdev->config + addr, val);
1157 
1158     if (k->set_config) {
1159         k->set_config(vdev, vdev->config);
1160     }
1161 }
1162 
1163 void virtio_config_modern_writel(VirtIODevice *vdev,
1164                                  uint32_t addr, uint32_t data)
1165 {
1166     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1167     uint32_t val = data;
1168 
1169     if (addr + sizeof(val) > vdev->config_len) {
1170         return;
1171     }
1172 
1173     stl_le_p(vdev->config + addr, val);
1174 
1175     if (k->set_config) {
1176         k->set_config(vdev, vdev->config);
1177     }
1178 }
1179 
1180 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1181 {
1182     vdev->vq[n].vring.desc = addr;
1183     virtio_queue_update_rings(vdev, n);
1184 }
1185 
1186 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1187 {
1188     return vdev->vq[n].vring.desc;
1189 }
1190 
1191 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1192                             hwaddr avail, hwaddr used)
1193 {
1194     vdev->vq[n].vring.desc = desc;
1195     vdev->vq[n].vring.avail = avail;
1196     vdev->vq[n].vring.used = used;
1197 }
1198 
1199 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1200 {
1201     /* Don't allow guest to flip queue between existent and
1202      * nonexistent states, or to set it to an invalid size.
1203      */
1204     if (!!num != !!vdev->vq[n].vring.num ||
1205         num > VIRTQUEUE_MAX_SIZE ||
1206         num < 0) {
1207         return;
1208     }
1209     vdev->vq[n].vring.num = num;
1210 }
1211 
1212 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1213 {
1214     return QLIST_FIRST(&vdev->vector_queues[vector]);
1215 }
1216 
1217 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1218 {
1219     return QLIST_NEXT(vq, node);
1220 }
1221 
1222 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1223 {
1224     return vdev->vq[n].vring.num;
1225 }
1226 
1227 int virtio_get_num_queues(VirtIODevice *vdev)
1228 {
1229     int i;
1230 
1231     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1232         if (!virtio_queue_get_num(vdev, i)) {
1233             break;
1234         }
1235     }
1236 
1237     return i;
1238 }
1239 
1240 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1241 {
1242     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1243     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1244 
1245     /* virtio-1 compliant devices cannot change the alignment */
1246     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1247         error_report("tried to modify queue alignment for virtio-1 device");
1248         return;
1249     }
1250     /* Check that the transport told us it was going to do this
1251      * (so a buggy transport will immediately assert rather than
1252      * silently failing to migrate this state)
1253      */
1254     assert(k->has_variable_vring_alignment);
1255 
1256     vdev->vq[n].vring.align = align;
1257     virtio_queue_update_rings(vdev, n);
1258 }
1259 
1260 static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1261 {
1262     if (vq->vring.desc && vq->handle_aio_output) {
1263         VirtIODevice *vdev = vq->vdev;
1264 
1265         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1266         vq->handle_aio_output(vdev, vq);
1267     }
1268 }
1269 
1270 static void virtio_queue_notify_vq(VirtQueue *vq)
1271 {
1272     if (vq->vring.desc && vq->handle_output) {
1273         VirtIODevice *vdev = vq->vdev;
1274 
1275         if (unlikely(vdev->broken)) {
1276             return;
1277         }
1278 
1279         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1280         vq->handle_output(vdev, vq);
1281     }
1282 }
1283 
1284 void virtio_queue_notify(VirtIODevice *vdev, int n)
1285 {
1286     virtio_queue_notify_vq(&vdev->vq[n]);
1287 }
1288 
1289 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1290 {
1291     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1292         VIRTIO_NO_VECTOR;
1293 }
1294 
1295 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1296 {
1297     VirtQueue *vq = &vdev->vq[n];
1298 
1299     if (n < VIRTIO_QUEUE_MAX) {
1300         if (vdev->vector_queues &&
1301             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1302             QLIST_REMOVE(vq, node);
1303         }
1304         vdev->vq[n].vector = vector;
1305         if (vdev->vector_queues &&
1306             vector != VIRTIO_NO_VECTOR) {
1307             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1308         }
1309     }
1310 }
1311 
1312 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1313                             VirtIOHandleOutput handle_output)
1314 {
1315     int i;
1316 
1317     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1318         if (vdev->vq[i].vring.num == 0)
1319             break;
1320     }
1321 
1322     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1323         abort();
1324 
1325     vdev->vq[i].vring.num = queue_size;
1326     vdev->vq[i].vring.num_default = queue_size;
1327     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1328     vdev->vq[i].handle_output = handle_output;
1329     vdev->vq[i].handle_aio_output = NULL;
1330 
1331     return &vdev->vq[i];
1332 }
1333 
1334 void virtio_del_queue(VirtIODevice *vdev, int n)
1335 {
1336     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1337         abort();
1338     }
1339 
1340     vdev->vq[n].vring.num = 0;
1341     vdev->vq[n].vring.num_default = 0;
1342 }
1343 
1344 static void virtio_set_isr(VirtIODevice *vdev, int value)
1345 {
1346     uint8_t old = atomic_read(&vdev->isr);
1347 
1348     /* Do not write ISR if it does not change, so that its cacheline remains
1349      * shared in the common case where the guest does not read it.
1350      */
1351     if ((old & value) != value) {
1352         atomic_or(&vdev->isr, value);
1353     }
1354 }
1355 
1356 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1357 {
1358     uint16_t old, new;
1359     bool v;
1360     /* We need to expose used array entries before checking used event. */
1361     smp_mb();
1362     /* Always notify when queue is empty (when feature acknowledge) */
1363     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1364         !vq->inuse && virtio_queue_empty(vq)) {
1365         return true;
1366     }
1367 
1368     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1369         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1370     }
1371 
1372     v = vq->signalled_used_valid;
1373     vq->signalled_used_valid = true;
1374     old = vq->signalled_used;
1375     new = vq->signalled_used = vq->used_idx;
1376     return !v || vring_need_event(vring_get_used_event(vq), new, old);
1377 }
1378 
1379 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1380 {
1381     if (!virtio_should_notify(vdev, vq)) {
1382         return;
1383     }
1384 
1385     trace_virtio_notify_irqfd(vdev, vq);
1386 
1387     /*
1388      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1389      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1390      * incorrectly polling this bit during crashdump and hibernation
1391      * in MSI mode, causing a hang if this bit is never updated.
1392      * Recent releases of Windows do not really shut down, but rather
1393      * log out and hibernate to make the next startup faster.  Hence,
1394      * this manifested as a more serious hang during shutdown with
1395      *
1396      * Next driver release from 2016 fixed this problem, so working around it
1397      * is not a must, but it's easy to do so let's do it here.
1398      *
1399      * Note: it's safe to update ISR from any thread as it was switched
1400      * to an atomic operation.
1401      */
1402     virtio_set_isr(vq->vdev, 0x1);
1403     event_notifier_set(&vq->guest_notifier);
1404 }
1405 
1406 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1407 {
1408     if (!virtio_should_notify(vdev, vq)) {
1409         return;
1410     }
1411 
1412     trace_virtio_notify(vdev, vq);
1413     virtio_set_isr(vq->vdev, 0x1);
1414     virtio_notify_vector(vdev, vq->vector);
1415 }
1416 
1417 void virtio_notify_config(VirtIODevice *vdev)
1418 {
1419     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1420         return;
1421 
1422     virtio_set_isr(vdev, 0x3);
1423     vdev->generation++;
1424     virtio_notify_vector(vdev, vdev->config_vector);
1425 }
1426 
1427 static bool virtio_device_endian_needed(void *opaque)
1428 {
1429     VirtIODevice *vdev = opaque;
1430 
1431     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1432     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1433         return vdev->device_endian != virtio_default_endian();
1434     }
1435     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1436     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1437 }
1438 
1439 static bool virtio_64bit_features_needed(void *opaque)
1440 {
1441     VirtIODevice *vdev = opaque;
1442 
1443     return (vdev->host_features >> 32) != 0;
1444 }
1445 
1446 static bool virtio_virtqueue_needed(void *opaque)
1447 {
1448     VirtIODevice *vdev = opaque;
1449 
1450     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1451 }
1452 
1453 static bool virtio_ringsize_needed(void *opaque)
1454 {
1455     VirtIODevice *vdev = opaque;
1456     int i;
1457 
1458     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1459         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1460             return true;
1461         }
1462     }
1463     return false;
1464 }
1465 
1466 static bool virtio_extra_state_needed(void *opaque)
1467 {
1468     VirtIODevice *vdev = opaque;
1469     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1470     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1471 
1472     return k->has_extra_state &&
1473         k->has_extra_state(qbus->parent);
1474 }
1475 
1476 static bool virtio_broken_needed(void *opaque)
1477 {
1478     VirtIODevice *vdev = opaque;
1479 
1480     return vdev->broken;
1481 }
1482 
1483 static const VMStateDescription vmstate_virtqueue = {
1484     .name = "virtqueue_state",
1485     .version_id = 1,
1486     .minimum_version_id = 1,
1487     .fields = (VMStateField[]) {
1488         VMSTATE_UINT64(vring.avail, struct VirtQueue),
1489         VMSTATE_UINT64(vring.used, struct VirtQueue),
1490         VMSTATE_END_OF_LIST()
1491     }
1492 };
1493 
1494 static const VMStateDescription vmstate_virtio_virtqueues = {
1495     .name = "virtio/virtqueues",
1496     .version_id = 1,
1497     .minimum_version_id = 1,
1498     .needed = &virtio_virtqueue_needed,
1499     .fields = (VMStateField[]) {
1500         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1501                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1502         VMSTATE_END_OF_LIST()
1503     }
1504 };
1505 
1506 static const VMStateDescription vmstate_ringsize = {
1507     .name = "ringsize_state",
1508     .version_id = 1,
1509     .minimum_version_id = 1,
1510     .fields = (VMStateField[]) {
1511         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1512         VMSTATE_END_OF_LIST()
1513     }
1514 };
1515 
1516 static const VMStateDescription vmstate_virtio_ringsize = {
1517     .name = "virtio/ringsize",
1518     .version_id = 1,
1519     .minimum_version_id = 1,
1520     .needed = &virtio_ringsize_needed,
1521     .fields = (VMStateField[]) {
1522         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1523                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1524         VMSTATE_END_OF_LIST()
1525     }
1526 };
1527 
1528 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1529 {
1530     VirtIODevice *vdev = pv;
1531     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1532     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1533 
1534     if (!k->load_extra_state) {
1535         return -1;
1536     } else {
1537         return k->load_extra_state(qbus->parent, f);
1538     }
1539 }
1540 
1541 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1542 {
1543     VirtIODevice *vdev = pv;
1544     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1545     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1546 
1547     k->save_extra_state(qbus->parent, f);
1548 }
1549 
1550 static const VMStateInfo vmstate_info_extra_state = {
1551     .name = "virtqueue_extra_state",
1552     .get = get_extra_state,
1553     .put = put_extra_state,
1554 };
1555 
1556 static const VMStateDescription vmstate_virtio_extra_state = {
1557     .name = "virtio/extra_state",
1558     .version_id = 1,
1559     .minimum_version_id = 1,
1560     .needed = &virtio_extra_state_needed,
1561     .fields = (VMStateField[]) {
1562         {
1563             .name         = "extra_state",
1564             .version_id   = 0,
1565             .field_exists = NULL,
1566             .size         = 0,
1567             .info         = &vmstate_info_extra_state,
1568             .flags        = VMS_SINGLE,
1569             .offset       = 0,
1570         },
1571         VMSTATE_END_OF_LIST()
1572     }
1573 };
1574 
1575 static const VMStateDescription vmstate_virtio_device_endian = {
1576     .name = "virtio/device_endian",
1577     .version_id = 1,
1578     .minimum_version_id = 1,
1579     .needed = &virtio_device_endian_needed,
1580     .fields = (VMStateField[]) {
1581         VMSTATE_UINT8(device_endian, VirtIODevice),
1582         VMSTATE_END_OF_LIST()
1583     }
1584 };
1585 
1586 static const VMStateDescription vmstate_virtio_64bit_features = {
1587     .name = "virtio/64bit_features",
1588     .version_id = 1,
1589     .minimum_version_id = 1,
1590     .needed = &virtio_64bit_features_needed,
1591     .fields = (VMStateField[]) {
1592         VMSTATE_UINT64(guest_features, VirtIODevice),
1593         VMSTATE_END_OF_LIST()
1594     }
1595 };
1596 
1597 static const VMStateDescription vmstate_virtio_broken = {
1598     .name = "virtio/broken",
1599     .version_id = 1,
1600     .minimum_version_id = 1,
1601     .needed = &virtio_broken_needed,
1602     .fields = (VMStateField[]) {
1603         VMSTATE_BOOL(broken, VirtIODevice),
1604         VMSTATE_END_OF_LIST()
1605     }
1606 };
1607 
1608 static const VMStateDescription vmstate_virtio = {
1609     .name = "virtio",
1610     .version_id = 1,
1611     .minimum_version_id = 1,
1612     .minimum_version_id_old = 1,
1613     .fields = (VMStateField[]) {
1614         VMSTATE_END_OF_LIST()
1615     },
1616     .subsections = (const VMStateDescription*[]) {
1617         &vmstate_virtio_device_endian,
1618         &vmstate_virtio_64bit_features,
1619         &vmstate_virtio_virtqueues,
1620         &vmstate_virtio_ringsize,
1621         &vmstate_virtio_broken,
1622         &vmstate_virtio_extra_state,
1623         NULL
1624     }
1625 };
1626 
1627 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1628 {
1629     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1630     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1631     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1632     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1633     int i;
1634 
1635     if (k->save_config) {
1636         k->save_config(qbus->parent, f);
1637     }
1638 
1639     qemu_put_8s(f, &vdev->status);
1640     qemu_put_8s(f, &vdev->isr);
1641     qemu_put_be16s(f, &vdev->queue_sel);
1642     qemu_put_be32s(f, &guest_features_lo);
1643     qemu_put_be32(f, vdev->config_len);
1644     qemu_put_buffer(f, vdev->config, vdev->config_len);
1645 
1646     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1647         if (vdev->vq[i].vring.num == 0)
1648             break;
1649     }
1650 
1651     qemu_put_be32(f, i);
1652 
1653     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1654         if (vdev->vq[i].vring.num == 0)
1655             break;
1656 
1657         qemu_put_be32(f, vdev->vq[i].vring.num);
1658         if (k->has_variable_vring_alignment) {
1659             qemu_put_be32(f, vdev->vq[i].vring.align);
1660         }
1661         /* XXX virtio-1 devices */
1662         qemu_put_be64(f, vdev->vq[i].vring.desc);
1663         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1664         if (k->save_queue) {
1665             k->save_queue(qbus->parent, i, f);
1666         }
1667     }
1668 
1669     if (vdc->save != NULL) {
1670         vdc->save(vdev, f);
1671     }
1672 
1673     if (vdc->vmsd) {
1674         vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1675     }
1676 
1677     /* Subsections */
1678     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1679 }
1680 
1681 /* A wrapper for use as a VMState .put function */
1682 static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1683 {
1684     virtio_save(VIRTIO_DEVICE(opaque), f);
1685 }
1686 
1687 /* A wrapper for use as a VMState .get function */
1688 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1689 {
1690     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1691     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1692 
1693     return virtio_load(vdev, f, dc->vmsd->version_id);
1694 }
1695 
1696 const VMStateInfo  virtio_vmstate_info = {
1697     .name = "virtio",
1698     .get = virtio_device_get,
1699     .put = virtio_device_put,
1700 };
1701 
1702 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1703 {
1704     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1705     bool bad = (val & ~(vdev->host_features)) != 0;
1706 
1707     val &= vdev->host_features;
1708     if (k->set_features) {
1709         k->set_features(vdev, val);
1710     }
1711     vdev->guest_features = val;
1712     return bad ? -1 : 0;
1713 }
1714 
1715 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1716 {
1717    /*
1718      * The driver must not attempt to set features after feature negotiation
1719      * has finished.
1720      */
1721     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1722         return -EINVAL;
1723     }
1724     return virtio_set_features_nocheck(vdev, val);
1725 }
1726 
1727 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1728 {
1729     int i, ret;
1730     int32_t config_len;
1731     uint32_t num;
1732     uint32_t features;
1733     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1734     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1735     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1736 
1737     /*
1738      * We poison the endianness to ensure it does not get used before
1739      * subsections have been loaded.
1740      */
1741     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1742 
1743     if (k->load_config) {
1744         ret = k->load_config(qbus->parent, f);
1745         if (ret)
1746             return ret;
1747     }
1748 
1749     qemu_get_8s(f, &vdev->status);
1750     qemu_get_8s(f, &vdev->isr);
1751     qemu_get_be16s(f, &vdev->queue_sel);
1752     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1753         return -1;
1754     }
1755     qemu_get_be32s(f, &features);
1756 
1757     /*
1758      * Temporarily set guest_features low bits - needed by
1759      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1760      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1761      *
1762      * Note: devices should always test host features in future - don't create
1763      * new dependencies like this.
1764      */
1765     vdev->guest_features = features;
1766 
1767     config_len = qemu_get_be32(f);
1768 
1769     /*
1770      * There are cases where the incoming config can be bigger or smaller
1771      * than what we have; so load what we have space for, and skip
1772      * any excess that's in the stream.
1773      */
1774     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1775 
1776     while (config_len > vdev->config_len) {
1777         qemu_get_byte(f);
1778         config_len--;
1779     }
1780 
1781     num = qemu_get_be32(f);
1782 
1783     if (num > VIRTIO_QUEUE_MAX) {
1784         error_report("Invalid number of virtqueues: 0x%x", num);
1785         return -1;
1786     }
1787 
1788     for (i = 0; i < num; i++) {
1789         vdev->vq[i].vring.num = qemu_get_be32(f);
1790         if (k->has_variable_vring_alignment) {
1791             vdev->vq[i].vring.align = qemu_get_be32(f);
1792         }
1793         vdev->vq[i].vring.desc = qemu_get_be64(f);
1794         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1795         vdev->vq[i].signalled_used_valid = false;
1796         vdev->vq[i].notification_disabled = 0;
1797 
1798         if (vdev->vq[i].vring.desc) {
1799             /* XXX virtio-1 devices */
1800             virtio_queue_update_rings(vdev, i);
1801         } else if (vdev->vq[i].last_avail_idx) {
1802             error_report("VQ %d address 0x0 "
1803                          "inconsistent with Host index 0x%x",
1804                          i, vdev->vq[i].last_avail_idx);
1805                 return -1;
1806         }
1807         if (k->load_queue) {
1808             ret = k->load_queue(qbus->parent, i, f);
1809             if (ret)
1810                 return ret;
1811         }
1812     }
1813 
1814     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1815 
1816     if (vdc->load != NULL) {
1817         ret = vdc->load(vdev, f, version_id);
1818         if (ret) {
1819             return ret;
1820         }
1821     }
1822 
1823     if (vdc->vmsd) {
1824         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1825         if (ret) {
1826             return ret;
1827         }
1828     }
1829 
1830     /* Subsections */
1831     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1832     if (ret) {
1833         return ret;
1834     }
1835 
1836     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1837         vdev->device_endian = virtio_default_endian();
1838     }
1839 
1840     if (virtio_64bit_features_needed(vdev)) {
1841         /*
1842          * Subsection load filled vdev->guest_features.  Run them
1843          * through virtio_set_features to sanity-check them against
1844          * host_features.
1845          */
1846         uint64_t features64 = vdev->guest_features;
1847         if (virtio_set_features_nocheck(vdev, features64) < 0) {
1848             error_report("Features 0x%" PRIx64 " unsupported. "
1849                          "Allowed features: 0x%" PRIx64,
1850                          features64, vdev->host_features);
1851             return -1;
1852         }
1853     } else {
1854         if (virtio_set_features_nocheck(vdev, features) < 0) {
1855             error_report("Features 0x%x unsupported. "
1856                          "Allowed features: 0x%" PRIx64,
1857                          features, vdev->host_features);
1858             return -1;
1859         }
1860     }
1861 
1862     for (i = 0; i < num; i++) {
1863         if (vdev->vq[i].vring.desc) {
1864             uint16_t nheads;
1865             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1866             /* Check it isn't doing strange things with descriptor numbers. */
1867             if (nheads > vdev->vq[i].vring.num) {
1868                 error_report("VQ %d size 0x%x Guest index 0x%x "
1869                              "inconsistent with Host index 0x%x: delta 0x%x",
1870                              i, vdev->vq[i].vring.num,
1871                              vring_avail_idx(&vdev->vq[i]),
1872                              vdev->vq[i].last_avail_idx, nheads);
1873                 return -1;
1874             }
1875             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1876             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1877 
1878             /*
1879              * Some devices migrate VirtQueueElements that have been popped
1880              * from the avail ring but not yet returned to the used ring.
1881              */
1882             vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1883                                 vdev->vq[i].used_idx;
1884             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1885                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1886                              "used_idx 0x%x",
1887                              i, vdev->vq[i].vring.num,
1888                              vdev->vq[i].last_avail_idx,
1889                              vdev->vq[i].used_idx);
1890                 return -1;
1891             }
1892         }
1893     }
1894 
1895     return 0;
1896 }
1897 
1898 void virtio_cleanup(VirtIODevice *vdev)
1899 {
1900     qemu_del_vm_change_state_handler(vdev->vmstate);
1901     g_free(vdev->config);
1902     g_free(vdev->vq);
1903     g_free(vdev->vector_queues);
1904 }
1905 
1906 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1907 {
1908     VirtIODevice *vdev = opaque;
1909     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1910     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1911     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1912     vdev->vm_running = running;
1913 
1914     if (backend_run) {
1915         virtio_set_status(vdev, vdev->status);
1916     }
1917 
1918     if (k->vmstate_change) {
1919         k->vmstate_change(qbus->parent, backend_run);
1920     }
1921 
1922     if (!backend_run) {
1923         virtio_set_status(vdev, vdev->status);
1924     }
1925 }
1926 
1927 void virtio_instance_init_common(Object *proxy_obj, void *data,
1928                                  size_t vdev_size, const char *vdev_name)
1929 {
1930     DeviceState *vdev = data;
1931 
1932     object_initialize(vdev, vdev_size, vdev_name);
1933     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1934     object_unref(OBJECT(vdev));
1935     qdev_alias_all_properties(vdev, proxy_obj);
1936 }
1937 
1938 void virtio_init(VirtIODevice *vdev, const char *name,
1939                  uint16_t device_id, size_t config_size)
1940 {
1941     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1942     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1943     int i;
1944     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1945 
1946     if (nvectors) {
1947         vdev->vector_queues =
1948             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1949     }
1950 
1951     vdev->device_id = device_id;
1952     vdev->status = 0;
1953     atomic_set(&vdev->isr, 0);
1954     vdev->queue_sel = 0;
1955     vdev->config_vector = VIRTIO_NO_VECTOR;
1956     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1957     vdev->vm_running = runstate_is_running();
1958     vdev->broken = false;
1959     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1960         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1961         vdev->vq[i].vdev = vdev;
1962         vdev->vq[i].queue_index = i;
1963     }
1964 
1965     vdev->name = name;
1966     vdev->config_len = config_size;
1967     if (vdev->config_len) {
1968         vdev->config = g_malloc0(config_size);
1969     } else {
1970         vdev->config = NULL;
1971     }
1972     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1973                                                      vdev);
1974     vdev->device_endian = virtio_default_endian();
1975     vdev->use_guest_notifier_mask = true;
1976 }
1977 
1978 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1979 {
1980     return vdev->vq[n].vring.desc;
1981 }
1982 
1983 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1984 {
1985     return vdev->vq[n].vring.avail;
1986 }
1987 
1988 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1989 {
1990     return vdev->vq[n].vring.used;
1991 }
1992 
1993 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1994 {
1995     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1996 }
1997 
1998 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1999 {
2000     return offsetof(VRingAvail, ring) +
2001         sizeof(uint16_t) * vdev->vq[n].vring.num;
2002 }
2003 
2004 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
2005 {
2006     return offsetof(VRingUsed, ring) +
2007         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
2008 }
2009 
2010 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
2011 {
2012     return vdev->vq[n].last_avail_idx;
2013 }
2014 
2015 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
2016 {
2017     vdev->vq[n].last_avail_idx = idx;
2018     vdev->vq[n].shadow_avail_idx = idx;
2019 }
2020 
2021 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
2022 {
2023     vdev->vq[n].signalled_used_valid = false;
2024 }
2025 
2026 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2027 {
2028     return vdev->vq + n;
2029 }
2030 
2031 uint16_t virtio_get_queue_index(VirtQueue *vq)
2032 {
2033     return vq->queue_index;
2034 }
2035 
2036 static void virtio_queue_guest_notifier_read(EventNotifier *n)
2037 {
2038     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2039     if (event_notifier_test_and_clear(n)) {
2040         virtio_notify_vector(vq->vdev, vq->vector);
2041     }
2042 }
2043 
2044 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2045                                                 bool with_irqfd)
2046 {
2047     if (assign && !with_irqfd) {
2048         event_notifier_set_handler(&vq->guest_notifier, false,
2049                                    virtio_queue_guest_notifier_read);
2050     } else {
2051         event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2052     }
2053     if (!assign) {
2054         /* Test and clear notifier before closing it,
2055          * in case poll callback didn't have time to run. */
2056         virtio_queue_guest_notifier_read(&vq->guest_notifier);
2057     }
2058 }
2059 
2060 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2061 {
2062     return &vq->guest_notifier;
2063 }
2064 
2065 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2066 {
2067     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2068     if (event_notifier_test_and_clear(n)) {
2069         virtio_queue_notify_aio_vq(vq);
2070     }
2071 }
2072 
2073 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
2074 {
2075     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2076 
2077     virtio_queue_set_notification(vq, 0);
2078 }
2079 
2080 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
2081 {
2082     EventNotifier *n = opaque;
2083     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2084 
2085     if (virtio_queue_empty(vq)) {
2086         return false;
2087     }
2088 
2089     virtio_queue_notify_aio_vq(vq);
2090     return true;
2091 }
2092 
2093 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
2094 {
2095     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2096 
2097     /* Caller polls once more after this to catch requests that race with us */
2098     virtio_queue_set_notification(vq, 1);
2099 }
2100 
2101 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2102                                                 VirtIOHandleOutput handle_output)
2103 {
2104     if (handle_output) {
2105         vq->handle_aio_output = handle_output;
2106         aio_set_event_notifier(ctx, &vq->host_notifier, true,
2107                                virtio_queue_host_notifier_aio_read,
2108                                virtio_queue_host_notifier_aio_poll);
2109         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
2110                                     virtio_queue_host_notifier_aio_poll_begin,
2111                                     virtio_queue_host_notifier_aio_poll_end);
2112     } else {
2113         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2114         /* Test and clear notifier before after disabling event,
2115          * in case poll callback didn't have time to run. */
2116         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2117         vq->handle_aio_output = NULL;
2118     }
2119 }
2120 
2121 void virtio_queue_host_notifier_read(EventNotifier *n)
2122 {
2123     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2124     if (event_notifier_test_and_clear(n)) {
2125         virtio_queue_notify_vq(vq);
2126     }
2127 }
2128 
2129 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2130 {
2131     return &vq->host_notifier;
2132 }
2133 
2134 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2135 {
2136     g_free(vdev->bus_name);
2137     vdev->bus_name = g_strdup(bus_name);
2138 }
2139 
2140 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2141 {
2142     va_list ap;
2143 
2144     va_start(ap, fmt);
2145     error_vreport(fmt, ap);
2146     va_end(ap);
2147 
2148     vdev->broken = true;
2149 
2150     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2151         virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2152         virtio_notify_config(vdev);
2153     }
2154 }
2155 
2156 static void virtio_device_realize(DeviceState *dev, Error **errp)
2157 {
2158     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2159     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2160     Error *err = NULL;
2161 
2162     /* Devices should either use vmsd or the load/save methods */
2163     assert(!vdc->vmsd || !vdc->load);
2164 
2165     if (vdc->realize != NULL) {
2166         vdc->realize(dev, &err);
2167         if (err != NULL) {
2168             error_propagate(errp, err);
2169             return;
2170         }
2171     }
2172 
2173     virtio_bus_device_plugged(vdev, &err);
2174     if (err != NULL) {
2175         error_propagate(errp, err);
2176         return;
2177     }
2178 }
2179 
2180 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2181 {
2182     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2183     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2184     Error *err = NULL;
2185 
2186     virtio_bus_device_unplugged(vdev);
2187 
2188     if (vdc->unrealize != NULL) {
2189         vdc->unrealize(dev, &err);
2190         if (err != NULL) {
2191             error_propagate(errp, err);
2192             return;
2193         }
2194     }
2195 
2196     g_free(vdev->bus_name);
2197     vdev->bus_name = NULL;
2198 }
2199 
2200 static Property virtio_properties[] = {
2201     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2202     DEFINE_PROP_END_OF_LIST(),
2203 };
2204 
2205 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2206 {
2207     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2208     int n, r, err;
2209 
2210     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2211         VirtQueue *vq = &vdev->vq[n];
2212         if (!virtio_queue_get_num(vdev, n)) {
2213             continue;
2214         }
2215         r = virtio_bus_set_host_notifier(qbus, n, true);
2216         if (r < 0) {
2217             err = r;
2218             goto assign_error;
2219         }
2220         event_notifier_set_handler(&vq->host_notifier, true,
2221                                    virtio_queue_host_notifier_read);
2222     }
2223 
2224     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2225         /* Kick right away to begin processing requests already in vring */
2226         VirtQueue *vq = &vdev->vq[n];
2227         if (!vq->vring.num) {
2228             continue;
2229         }
2230         event_notifier_set(&vq->host_notifier);
2231     }
2232     return 0;
2233 
2234 assign_error:
2235     while (--n >= 0) {
2236         VirtQueue *vq = &vdev->vq[n];
2237         if (!virtio_queue_get_num(vdev, n)) {
2238             continue;
2239         }
2240 
2241         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2242         r = virtio_bus_set_host_notifier(qbus, n, false);
2243         assert(r >= 0);
2244     }
2245     return err;
2246 }
2247 
2248 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2249 {
2250     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2251     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2252 
2253     return virtio_bus_start_ioeventfd(vbus);
2254 }
2255 
2256 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2257 {
2258     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2259     int n, r;
2260 
2261     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2262         VirtQueue *vq = &vdev->vq[n];
2263 
2264         if (!virtio_queue_get_num(vdev, n)) {
2265             continue;
2266         }
2267         event_notifier_set_handler(&vq->host_notifier, true, NULL);
2268         r = virtio_bus_set_host_notifier(qbus, n, false);
2269         assert(r >= 0);
2270     }
2271 }
2272 
2273 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2274 {
2275     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2276     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2277 
2278     virtio_bus_stop_ioeventfd(vbus);
2279 }
2280 
2281 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2282 {
2283     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2284     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2285 
2286     return virtio_bus_grab_ioeventfd(vbus);
2287 }
2288 
2289 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2290 {
2291     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2292     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2293 
2294     virtio_bus_release_ioeventfd(vbus);
2295 }
2296 
2297 static void virtio_device_class_init(ObjectClass *klass, void *data)
2298 {
2299     /* Set the default value here. */
2300     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2301     DeviceClass *dc = DEVICE_CLASS(klass);
2302 
2303     dc->realize = virtio_device_realize;
2304     dc->unrealize = virtio_device_unrealize;
2305     dc->bus_type = TYPE_VIRTIO_BUS;
2306     dc->props = virtio_properties;
2307     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2308     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2309 
2310     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2311 }
2312 
2313 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2314 {
2315     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2316     VirtioBusState *vbus = VIRTIO_BUS(qbus);
2317 
2318     return virtio_bus_ioeventfd_enabled(vbus);
2319 }
2320 
2321 static const TypeInfo virtio_device_info = {
2322     .name = TYPE_VIRTIO_DEVICE,
2323     .parent = TYPE_DEVICE,
2324     .instance_size = sizeof(VirtIODevice),
2325     .class_init = virtio_device_class_init,
2326     .abstract = true,
2327     .class_size = sizeof(VirtioDeviceClass),
2328 };
2329 
2330 static void virtio_register_types(void)
2331 {
2332     type_register_static(&virtio_device_info);
2333 }
2334 
2335 type_init(virtio_register_types)
2336