xref: /qemu/hw/virtio/virtio.c (revision dc293f60)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "exec/address-spaces.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/module.h"
23 #include "hw/virtio/virtio.h"
24 #include "migration/qemu-file-types.h"
25 #include "qemu/atomic.h"
26 #include "hw/virtio/virtio-bus.h"
27 #include "hw/qdev-properties.h"
28 #include "hw/virtio/virtio-access.h"
29 #include "sysemu/dma.h"
30 #include "sysemu/runstate.h"
31 #include "standard-headers/linux/virtio_ids.h"
32 
33 /*
34  * The alignment to use between consumer and producer parts of vring.
35  * x86 pagesize again. This is the default, used by transports like PCI
36  * which don't provide a means for the guest to tell the host the alignment.
37  */
38 #define VIRTIO_PCI_VRING_ALIGN         4096
39 
40 typedef struct VRingDesc
41 {
42     uint64_t addr;
43     uint32_t len;
44     uint16_t flags;
45     uint16_t next;
46 } VRingDesc;
47 
48 typedef struct VRingPackedDesc {
49     uint64_t addr;
50     uint32_t len;
51     uint16_t id;
52     uint16_t flags;
53 } VRingPackedDesc;
54 
55 typedef struct VRingAvail
56 {
57     uint16_t flags;
58     uint16_t idx;
59     uint16_t ring[];
60 } VRingAvail;
61 
62 typedef struct VRingUsedElem
63 {
64     uint32_t id;
65     uint32_t len;
66 } VRingUsedElem;
67 
68 typedef struct VRingUsed
69 {
70     uint16_t flags;
71     uint16_t idx;
72     VRingUsedElem ring[];
73 } VRingUsed;
74 
75 typedef struct VRingMemoryRegionCaches {
76     struct rcu_head rcu;
77     MemoryRegionCache desc;
78     MemoryRegionCache avail;
79     MemoryRegionCache used;
80 } VRingMemoryRegionCaches;
81 
82 typedef struct VRing
83 {
84     unsigned int num;
85     unsigned int num_default;
86     unsigned int align;
87     hwaddr desc;
88     hwaddr avail;
89     hwaddr used;
90     VRingMemoryRegionCaches *caches;
91 } VRing;
92 
93 typedef struct VRingPackedDescEvent {
94     uint16_t off_wrap;
95     uint16_t flags;
96 } VRingPackedDescEvent ;
97 
98 struct VirtQueue
99 {
100     VRing vring;
101     VirtQueueElement *used_elems;
102 
103     /* Next head to pop */
104     uint16_t last_avail_idx;
105     bool last_avail_wrap_counter;
106 
107     /* Last avail_idx read from VQ. */
108     uint16_t shadow_avail_idx;
109     bool shadow_avail_wrap_counter;
110 
111     uint16_t used_idx;
112     bool used_wrap_counter;
113 
114     /* Last used index value we have signalled on */
115     uint16_t signalled_used;
116 
117     /* Last used index value we have signalled on */
118     bool signalled_used_valid;
119 
120     /* Notification enabled? */
121     bool notification;
122 
123     uint16_t queue_index;
124 
125     unsigned int inuse;
126 
127     uint16_t vector;
128     VirtIOHandleOutput handle_output;
129     VirtIOHandleAIOOutput handle_aio_output;
130     VirtIODevice *vdev;
131     EventNotifier guest_notifier;
132     EventNotifier host_notifier;
133     bool host_notifier_enabled;
134     QLIST_ENTRY(VirtQueue) node;
135 };
136 
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139     if (!caches) {
140         return;
141     }
142 
143     address_space_cache_destroy(&caches->desc);
144     address_space_cache_destroy(&caches->avail);
145     address_space_cache_destroy(&caches->used);
146     g_free(caches);
147 }
148 
149 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
150 {
151     VRingMemoryRegionCaches *caches;
152 
153     caches = qatomic_read(&vq->vring.caches);
154     qatomic_rcu_set(&vq->vring.caches, NULL);
155     if (caches) {
156         call_rcu(caches, virtio_free_region_cache, rcu);
157     }
158 }
159 
160 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
161 {
162     VirtQueue *vq = &vdev->vq[n];
163     VRingMemoryRegionCaches *old = vq->vring.caches;
164     VRingMemoryRegionCaches *new = NULL;
165     hwaddr addr, size;
166     int64_t len;
167     bool packed;
168 
169 
170     addr = vq->vring.desc;
171     if (!addr) {
172         goto out_no_cache;
173     }
174     new = g_new0(VRingMemoryRegionCaches, 1);
175     size = virtio_queue_get_desc_size(vdev, n);
176     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
177                                    true : false;
178     len = address_space_cache_init(&new->desc, vdev->dma_as,
179                                    addr, size, packed);
180     if (len < size) {
181         virtio_error(vdev, "Cannot map desc");
182         goto err_desc;
183     }
184 
185     size = virtio_queue_get_used_size(vdev, n);
186     len = address_space_cache_init(&new->used, vdev->dma_as,
187                                    vq->vring.used, size, true);
188     if (len < size) {
189         virtio_error(vdev, "Cannot map used");
190         goto err_used;
191     }
192 
193     size = virtio_queue_get_avail_size(vdev, n);
194     len = address_space_cache_init(&new->avail, vdev->dma_as,
195                                    vq->vring.avail, size, false);
196     if (len < size) {
197         virtio_error(vdev, "Cannot map avail");
198         goto err_avail;
199     }
200 
201     qatomic_rcu_set(&vq->vring.caches, new);
202     if (old) {
203         call_rcu(old, virtio_free_region_cache, rcu);
204     }
205     return;
206 
207 err_avail:
208     address_space_cache_destroy(&new->avail);
209 err_used:
210     address_space_cache_destroy(&new->used);
211 err_desc:
212     address_space_cache_destroy(&new->desc);
213 out_no_cache:
214     g_free(new);
215     virtio_virtqueue_reset_region_cache(vq);
216 }
217 
218 /* virt queue functions */
219 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
220 {
221     VRing *vring = &vdev->vq[n].vring;
222 
223     if (!vring->num || !vring->desc || !vring->align) {
224         /* not yet setup -> nothing to do */
225         return;
226     }
227     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
228     vring->used = vring_align(vring->avail +
229                               offsetof(VRingAvail, ring[vring->num]),
230                               vring->align);
231     virtio_init_region_cache(vdev, n);
232 }
233 
234 /* Called within rcu_read_lock().  */
235 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
236                                   MemoryRegionCache *cache, int i)
237 {
238     address_space_read_cached(cache, i * sizeof(VRingDesc),
239                               desc, sizeof(VRingDesc));
240     virtio_tswap64s(vdev, &desc->addr);
241     virtio_tswap32s(vdev, &desc->len);
242     virtio_tswap16s(vdev, &desc->flags);
243     virtio_tswap16s(vdev, &desc->next);
244 }
245 
246 static void vring_packed_event_read(VirtIODevice *vdev,
247                                     MemoryRegionCache *cache,
248                                     VRingPackedDescEvent *e)
249 {
250     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
251     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
252 
253     address_space_read_cached(cache, off_flags, &e->flags,
254                               sizeof(e->flags));
255     /* Make sure flags is seen before off_wrap */
256     smp_rmb();
257     address_space_read_cached(cache, off_off, &e->off_wrap,
258                               sizeof(e->off_wrap));
259     virtio_tswap16s(vdev, &e->off_wrap);
260     virtio_tswap16s(vdev, &e->flags);
261 }
262 
263 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
264                                         MemoryRegionCache *cache,
265                                         uint16_t off_wrap)
266 {
267     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
268 
269     virtio_tswap16s(vdev, &off_wrap);
270     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
271     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
272 }
273 
274 static void vring_packed_flags_write(VirtIODevice *vdev,
275                                      MemoryRegionCache *cache, uint16_t flags)
276 {
277     hwaddr off = offsetof(VRingPackedDescEvent, flags);
278 
279     virtio_tswap16s(vdev, &flags);
280     address_space_write_cached(cache, off, &flags, sizeof(flags));
281     address_space_cache_invalidate(cache, off, sizeof(flags));
282 }
283 
284 /* Called within rcu_read_lock().  */
285 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
286 {
287     return qatomic_rcu_read(&vq->vring.caches);
288 }
289 
290 /* Called within rcu_read_lock().  */
291 static inline uint16_t vring_avail_flags(VirtQueue *vq)
292 {
293     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
294     hwaddr pa = offsetof(VRingAvail, flags);
295 
296     if (!caches) {
297         return 0;
298     }
299 
300     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
301 }
302 
303 /* Called within rcu_read_lock().  */
304 static inline uint16_t vring_avail_idx(VirtQueue *vq)
305 {
306     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
307     hwaddr pa = offsetof(VRingAvail, idx);
308 
309     if (!caches) {
310         return 0;
311     }
312 
313     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
314     return vq->shadow_avail_idx;
315 }
316 
317 /* Called within rcu_read_lock().  */
318 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
319 {
320     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
321     hwaddr pa = offsetof(VRingAvail, ring[i]);
322 
323     if (!caches) {
324         return 0;
325     }
326 
327     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
328 }
329 
330 /* Called within rcu_read_lock().  */
331 static inline uint16_t vring_get_used_event(VirtQueue *vq)
332 {
333     return vring_avail_ring(vq, vq->vring.num);
334 }
335 
336 /* Called within rcu_read_lock().  */
337 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
338                                     int i)
339 {
340     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
341     hwaddr pa = offsetof(VRingUsed, ring[i]);
342 
343     if (!caches) {
344         return;
345     }
346 
347     virtio_tswap32s(vq->vdev, &uelem->id);
348     virtio_tswap32s(vq->vdev, &uelem->len);
349     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
350     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
351 }
352 
353 /* Called within rcu_read_lock().  */
354 static uint16_t vring_used_idx(VirtQueue *vq)
355 {
356     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
357     hwaddr pa = offsetof(VRingUsed, idx);
358 
359     if (!caches) {
360         return 0;
361     }
362 
363     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
364 }
365 
366 /* Called within rcu_read_lock().  */
367 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
368 {
369     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
370     hwaddr pa = offsetof(VRingUsed, idx);
371 
372     if (caches) {
373         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
374         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
375     }
376 
377     vq->used_idx = val;
378 }
379 
380 /* Called within rcu_read_lock().  */
381 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
382 {
383     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
384     VirtIODevice *vdev = vq->vdev;
385     hwaddr pa = offsetof(VRingUsed, flags);
386     uint16_t flags;
387 
388     if (!caches) {
389         return;
390     }
391 
392     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
393     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
394     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
395 }
396 
397 /* Called within rcu_read_lock().  */
398 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
399 {
400     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
401     VirtIODevice *vdev = vq->vdev;
402     hwaddr pa = offsetof(VRingUsed, flags);
403     uint16_t flags;
404 
405     if (!caches) {
406         return;
407     }
408 
409     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
410     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
411     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
412 }
413 
414 /* Called within rcu_read_lock().  */
415 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
416 {
417     VRingMemoryRegionCaches *caches;
418     hwaddr pa;
419     if (!vq->notification) {
420         return;
421     }
422 
423     caches = vring_get_region_caches(vq);
424     if (!caches) {
425         return;
426     }
427 
428     pa = offsetof(VRingUsed, ring[vq->vring.num]);
429     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
430     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
431 }
432 
433 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
434 {
435     RCU_READ_LOCK_GUARD();
436 
437     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
438         vring_set_avail_event(vq, vring_avail_idx(vq));
439     } else if (enable) {
440         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
441     } else {
442         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
443     }
444     if (enable) {
445         /* Expose avail event/used flags before caller checks the avail idx. */
446         smp_mb();
447     }
448 }
449 
450 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
451 {
452     uint16_t off_wrap;
453     VRingPackedDescEvent e;
454     VRingMemoryRegionCaches *caches;
455 
456     RCU_READ_LOCK_GUARD();
457     caches = vring_get_region_caches(vq);
458     if (!caches) {
459         return;
460     }
461 
462     vring_packed_event_read(vq->vdev, &caches->used, &e);
463 
464     if (!enable) {
465         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
466     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
467         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
468         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
469         /* Make sure off_wrap is wrote before flags */
470         smp_wmb();
471         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
472     } else {
473         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
474     }
475 
476     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
477     if (enable) {
478         /* Expose avail event/used flags before caller checks the avail idx. */
479         smp_mb();
480     }
481 }
482 
483 bool virtio_queue_get_notification(VirtQueue *vq)
484 {
485     return vq->notification;
486 }
487 
488 void virtio_queue_set_notification(VirtQueue *vq, int enable)
489 {
490     vq->notification = enable;
491 
492     if (!vq->vring.desc) {
493         return;
494     }
495 
496     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
497         virtio_queue_packed_set_notification(vq, enable);
498     } else {
499         virtio_queue_split_set_notification(vq, enable);
500     }
501 }
502 
503 int virtio_queue_ready(VirtQueue *vq)
504 {
505     return vq->vring.avail != 0;
506 }
507 
508 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
509                                          uint16_t *flags,
510                                          MemoryRegionCache *cache,
511                                          int i)
512 {
513     address_space_read_cached(cache,
514                               i * sizeof(VRingPackedDesc) +
515                               offsetof(VRingPackedDesc, flags),
516                               flags, sizeof(*flags));
517     virtio_tswap16s(vdev, flags);
518 }
519 
520 static void vring_packed_desc_read(VirtIODevice *vdev,
521                                    VRingPackedDesc *desc,
522                                    MemoryRegionCache *cache,
523                                    int i, bool strict_order)
524 {
525     hwaddr off = i * sizeof(VRingPackedDesc);
526 
527     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
528 
529     if (strict_order) {
530         /* Make sure flags is read before the rest fields. */
531         smp_rmb();
532     }
533 
534     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
535                               &desc->addr, sizeof(desc->addr));
536     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
537                               &desc->id, sizeof(desc->id));
538     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
539                               &desc->len, sizeof(desc->len));
540     virtio_tswap64s(vdev, &desc->addr);
541     virtio_tswap16s(vdev, &desc->id);
542     virtio_tswap32s(vdev, &desc->len);
543 }
544 
545 static void vring_packed_desc_write_data(VirtIODevice *vdev,
546                                          VRingPackedDesc *desc,
547                                          MemoryRegionCache *cache,
548                                          int i)
549 {
550     hwaddr off_id = i * sizeof(VRingPackedDesc) +
551                     offsetof(VRingPackedDesc, id);
552     hwaddr off_len = i * sizeof(VRingPackedDesc) +
553                     offsetof(VRingPackedDesc, len);
554 
555     virtio_tswap32s(vdev, &desc->len);
556     virtio_tswap16s(vdev, &desc->id);
557     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
558     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
559     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
560     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
561 }
562 
563 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
564                                           VRingPackedDesc *desc,
565                                           MemoryRegionCache *cache,
566                                           int i)
567 {
568     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
569 
570     virtio_tswap16s(vdev, &desc->flags);
571     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
572     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
573 }
574 
575 static void vring_packed_desc_write(VirtIODevice *vdev,
576                                     VRingPackedDesc *desc,
577                                     MemoryRegionCache *cache,
578                                     int i, bool strict_order)
579 {
580     vring_packed_desc_write_data(vdev, desc, cache, i);
581     if (strict_order) {
582         /* Make sure data is wrote before flags. */
583         smp_wmb();
584     }
585     vring_packed_desc_write_flags(vdev, desc, cache, i);
586 }
587 
588 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
589 {
590     bool avail, used;
591 
592     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
593     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
594     return (avail != used) && (avail == wrap_counter);
595 }
596 
597 /* Fetch avail_idx from VQ memory only when we really need to know if
598  * guest has added some buffers.
599  * Called within rcu_read_lock().  */
600 static int virtio_queue_empty_rcu(VirtQueue *vq)
601 {
602     if (virtio_device_disabled(vq->vdev)) {
603         return 1;
604     }
605 
606     if (unlikely(!vq->vring.avail)) {
607         return 1;
608     }
609 
610     if (vq->shadow_avail_idx != vq->last_avail_idx) {
611         return 0;
612     }
613 
614     return vring_avail_idx(vq) == vq->last_avail_idx;
615 }
616 
617 static int virtio_queue_split_empty(VirtQueue *vq)
618 {
619     bool empty;
620 
621     if (virtio_device_disabled(vq->vdev)) {
622         return 1;
623     }
624 
625     if (unlikely(!vq->vring.avail)) {
626         return 1;
627     }
628 
629     if (vq->shadow_avail_idx != vq->last_avail_idx) {
630         return 0;
631     }
632 
633     RCU_READ_LOCK_GUARD();
634     empty = vring_avail_idx(vq) == vq->last_avail_idx;
635     return empty;
636 }
637 
638 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
639 {
640     struct VRingPackedDesc desc;
641     VRingMemoryRegionCaches *cache;
642 
643     if (unlikely(!vq->vring.desc)) {
644         return 1;
645     }
646 
647     cache = vring_get_region_caches(vq);
648     if (!cache) {
649         return 1;
650     }
651 
652     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
653                                  vq->last_avail_idx);
654 
655     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
656 }
657 
658 static int virtio_queue_packed_empty(VirtQueue *vq)
659 {
660     RCU_READ_LOCK_GUARD();
661     return virtio_queue_packed_empty_rcu(vq);
662 }
663 
664 int virtio_queue_empty(VirtQueue *vq)
665 {
666     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
667         return virtio_queue_packed_empty(vq);
668     } else {
669         return virtio_queue_split_empty(vq);
670     }
671 }
672 
673 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
674                                unsigned int len)
675 {
676     AddressSpace *dma_as = vq->vdev->dma_as;
677     unsigned int offset;
678     int i;
679 
680     offset = 0;
681     for (i = 0; i < elem->in_num; i++) {
682         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
683 
684         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
685                          elem->in_sg[i].iov_len,
686                          DMA_DIRECTION_FROM_DEVICE, size);
687 
688         offset += size;
689     }
690 
691     for (i = 0; i < elem->out_num; i++)
692         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
693                          elem->out_sg[i].iov_len,
694                          DMA_DIRECTION_TO_DEVICE,
695                          elem->out_sg[i].iov_len);
696 }
697 
698 /* virtqueue_detach_element:
699  * @vq: The #VirtQueue
700  * @elem: The #VirtQueueElement
701  * @len: number of bytes written
702  *
703  * Detach the element from the virtqueue.  This function is suitable for device
704  * reset or other situations where a #VirtQueueElement is simply freed and will
705  * not be pushed or discarded.
706  */
707 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
708                               unsigned int len)
709 {
710     vq->inuse -= elem->ndescs;
711     virtqueue_unmap_sg(vq, elem, len);
712 }
713 
714 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
715 {
716     vq->last_avail_idx -= num;
717 }
718 
719 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
720 {
721     if (vq->last_avail_idx < num) {
722         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
723         vq->last_avail_wrap_counter ^= 1;
724     } else {
725         vq->last_avail_idx -= num;
726     }
727 }
728 
729 /* virtqueue_unpop:
730  * @vq: The #VirtQueue
731  * @elem: The #VirtQueueElement
732  * @len: number of bytes written
733  *
734  * Pretend the most recent element wasn't popped from the virtqueue.  The next
735  * call to virtqueue_pop() will refetch the element.
736  */
737 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
738                      unsigned int len)
739 {
740 
741     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
742         virtqueue_packed_rewind(vq, 1);
743     } else {
744         virtqueue_split_rewind(vq, 1);
745     }
746 
747     virtqueue_detach_element(vq, elem, len);
748 }
749 
750 /* virtqueue_rewind:
751  * @vq: The #VirtQueue
752  * @num: Number of elements to push back
753  *
754  * Pretend that elements weren't popped from the virtqueue.  The next
755  * virtqueue_pop() will refetch the oldest element.
756  *
757  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
758  *
759  * Returns: true on success, false if @num is greater than the number of in use
760  * elements.
761  */
762 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
763 {
764     if (num > vq->inuse) {
765         return false;
766     }
767 
768     vq->inuse -= num;
769     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
770         virtqueue_packed_rewind(vq, num);
771     } else {
772         virtqueue_split_rewind(vq, num);
773     }
774     return true;
775 }
776 
777 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
778                     unsigned int len, unsigned int idx)
779 {
780     VRingUsedElem uelem;
781 
782     if (unlikely(!vq->vring.used)) {
783         return;
784     }
785 
786     idx = (idx + vq->used_idx) % vq->vring.num;
787 
788     uelem.id = elem->index;
789     uelem.len = len;
790     vring_used_write(vq, &uelem, idx);
791 }
792 
793 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
794                                   unsigned int len, unsigned int idx)
795 {
796     vq->used_elems[idx].index = elem->index;
797     vq->used_elems[idx].len = len;
798     vq->used_elems[idx].ndescs = elem->ndescs;
799 }
800 
801 static void virtqueue_packed_fill_desc(VirtQueue *vq,
802                                        const VirtQueueElement *elem,
803                                        unsigned int idx,
804                                        bool strict_order)
805 {
806     uint16_t head;
807     VRingMemoryRegionCaches *caches;
808     VRingPackedDesc desc = {
809         .id = elem->index,
810         .len = elem->len,
811     };
812     bool wrap_counter = vq->used_wrap_counter;
813 
814     if (unlikely(!vq->vring.desc)) {
815         return;
816     }
817 
818     head = vq->used_idx + idx;
819     if (head >= vq->vring.num) {
820         head -= vq->vring.num;
821         wrap_counter ^= 1;
822     }
823     if (wrap_counter) {
824         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
825         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
826     } else {
827         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
828         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
829     }
830 
831     caches = vring_get_region_caches(vq);
832     if (!caches) {
833         return;
834     }
835 
836     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
837 }
838 
839 /* Called within rcu_read_lock().  */
840 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
841                     unsigned int len, unsigned int idx)
842 {
843     trace_virtqueue_fill(vq, elem, len, idx);
844 
845     virtqueue_unmap_sg(vq, elem, len);
846 
847     if (virtio_device_disabled(vq->vdev)) {
848         return;
849     }
850 
851     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
852         virtqueue_packed_fill(vq, elem, len, idx);
853     } else {
854         virtqueue_split_fill(vq, elem, len, idx);
855     }
856 }
857 
858 /* Called within rcu_read_lock().  */
859 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
860 {
861     uint16_t old, new;
862 
863     if (unlikely(!vq->vring.used)) {
864         return;
865     }
866 
867     /* Make sure buffer is written before we update index. */
868     smp_wmb();
869     trace_virtqueue_flush(vq, count);
870     old = vq->used_idx;
871     new = old + count;
872     vring_used_idx_set(vq, new);
873     vq->inuse -= count;
874     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
875         vq->signalled_used_valid = false;
876 }
877 
878 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
879 {
880     unsigned int i, ndescs = 0;
881 
882     if (unlikely(!vq->vring.desc)) {
883         return;
884     }
885 
886     for (i = 1; i < count; i++) {
887         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
888         ndescs += vq->used_elems[i].ndescs;
889     }
890     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
891     ndescs += vq->used_elems[0].ndescs;
892 
893     vq->inuse -= ndescs;
894     vq->used_idx += ndescs;
895     if (vq->used_idx >= vq->vring.num) {
896         vq->used_idx -= vq->vring.num;
897         vq->used_wrap_counter ^= 1;
898     }
899 }
900 
901 void virtqueue_flush(VirtQueue *vq, unsigned int count)
902 {
903     if (virtio_device_disabled(vq->vdev)) {
904         vq->inuse -= count;
905         return;
906     }
907 
908     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
909         virtqueue_packed_flush(vq, count);
910     } else {
911         virtqueue_split_flush(vq, count);
912     }
913 }
914 
915 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
916                     unsigned int len)
917 {
918     RCU_READ_LOCK_GUARD();
919     virtqueue_fill(vq, elem, len, 0);
920     virtqueue_flush(vq, 1);
921 }
922 
923 /* Called within rcu_read_lock().  */
924 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
925 {
926     uint16_t num_heads = vring_avail_idx(vq) - idx;
927 
928     /* Check it isn't doing very strange things with descriptor numbers. */
929     if (num_heads > vq->vring.num) {
930         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
931                      idx, vq->shadow_avail_idx);
932         return -EINVAL;
933     }
934     /* On success, callers read a descriptor at vq->last_avail_idx.
935      * Make sure descriptor read does not bypass avail index read. */
936     if (num_heads) {
937         smp_rmb();
938     }
939 
940     return num_heads;
941 }
942 
943 /* Called within rcu_read_lock().  */
944 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
945                                unsigned int *head)
946 {
947     /* Grab the next descriptor number they're advertising, and increment
948      * the index we've seen. */
949     *head = vring_avail_ring(vq, idx % vq->vring.num);
950 
951     /* If their number is silly, that's a fatal mistake. */
952     if (*head >= vq->vring.num) {
953         virtio_error(vq->vdev, "Guest says index %u is available", *head);
954         return false;
955     }
956 
957     return true;
958 }
959 
960 enum {
961     VIRTQUEUE_READ_DESC_ERROR = -1,
962     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
963     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
964 };
965 
966 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
967                                           MemoryRegionCache *desc_cache,
968                                           unsigned int max, unsigned int *next)
969 {
970     /* If this descriptor says it doesn't chain, we're done. */
971     if (!(desc->flags & VRING_DESC_F_NEXT)) {
972         return VIRTQUEUE_READ_DESC_DONE;
973     }
974 
975     /* Check they're not leading us off end of descriptors. */
976     *next = desc->next;
977     /* Make sure compiler knows to grab that: we don't want it changing! */
978     smp_wmb();
979 
980     if (*next >= max) {
981         virtio_error(vdev, "Desc next is %u", *next);
982         return VIRTQUEUE_READ_DESC_ERROR;
983     }
984 
985     vring_split_desc_read(vdev, desc, desc_cache, *next);
986     return VIRTQUEUE_READ_DESC_MORE;
987 }
988 
989 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
990                             unsigned int *in_bytes, unsigned int *out_bytes,
991                             unsigned max_in_bytes, unsigned max_out_bytes)
992 {
993     VirtIODevice *vdev = vq->vdev;
994     unsigned int max, idx;
995     unsigned int total_bufs, in_total, out_total;
996     VRingMemoryRegionCaches *caches;
997     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
998     int64_t len = 0;
999     int rc;
1000 
1001     RCU_READ_LOCK_GUARD();
1002 
1003     idx = vq->last_avail_idx;
1004     total_bufs = in_total = out_total = 0;
1005 
1006     max = vq->vring.num;
1007     caches = vring_get_region_caches(vq);
1008     if (!caches) {
1009         goto err;
1010     }
1011 
1012     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1013         MemoryRegionCache *desc_cache = &caches->desc;
1014         unsigned int num_bufs;
1015         VRingDesc desc;
1016         unsigned int i;
1017 
1018         num_bufs = total_bufs;
1019 
1020         if (!virtqueue_get_head(vq, idx++, &i)) {
1021             goto err;
1022         }
1023 
1024         vring_split_desc_read(vdev, &desc, desc_cache, i);
1025 
1026         if (desc.flags & VRING_DESC_F_INDIRECT) {
1027             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1028                 virtio_error(vdev, "Invalid size for indirect buffer table");
1029                 goto err;
1030             }
1031 
1032             /* If we've got too many, that implies a descriptor loop. */
1033             if (num_bufs >= max) {
1034                 virtio_error(vdev, "Looped descriptor");
1035                 goto err;
1036             }
1037 
1038             /* loop over the indirect descriptor table */
1039             len = address_space_cache_init(&indirect_desc_cache,
1040                                            vdev->dma_as,
1041                                            desc.addr, desc.len, false);
1042             desc_cache = &indirect_desc_cache;
1043             if (len < desc.len) {
1044                 virtio_error(vdev, "Cannot map indirect buffer");
1045                 goto err;
1046             }
1047 
1048             max = desc.len / sizeof(VRingDesc);
1049             num_bufs = i = 0;
1050             vring_split_desc_read(vdev, &desc, desc_cache, i);
1051         }
1052 
1053         do {
1054             /* If we've got too many, that implies a descriptor loop. */
1055             if (++num_bufs > max) {
1056                 virtio_error(vdev, "Looped descriptor");
1057                 goto err;
1058             }
1059 
1060             if (desc.flags & VRING_DESC_F_WRITE) {
1061                 in_total += desc.len;
1062             } else {
1063                 out_total += desc.len;
1064             }
1065             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1066                 goto done;
1067             }
1068 
1069             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1070         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1071 
1072         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1073             goto err;
1074         }
1075 
1076         if (desc_cache == &indirect_desc_cache) {
1077             address_space_cache_destroy(&indirect_desc_cache);
1078             total_bufs++;
1079         } else {
1080             total_bufs = num_bufs;
1081         }
1082     }
1083 
1084     if (rc < 0) {
1085         goto err;
1086     }
1087 
1088 done:
1089     address_space_cache_destroy(&indirect_desc_cache);
1090     if (in_bytes) {
1091         *in_bytes = in_total;
1092     }
1093     if (out_bytes) {
1094         *out_bytes = out_total;
1095     }
1096     return;
1097 
1098 err:
1099     in_total = out_total = 0;
1100     goto done;
1101 }
1102 
1103 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1104                                            VRingPackedDesc *desc,
1105                                            MemoryRegionCache
1106                                            *desc_cache,
1107                                            unsigned int max,
1108                                            unsigned int *next,
1109                                            bool indirect)
1110 {
1111     /* If this descriptor says it doesn't chain, we're done. */
1112     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1113         return VIRTQUEUE_READ_DESC_DONE;
1114     }
1115 
1116     ++*next;
1117     if (*next == max) {
1118         if (indirect) {
1119             return VIRTQUEUE_READ_DESC_DONE;
1120         } else {
1121             (*next) -= vq->vring.num;
1122         }
1123     }
1124 
1125     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1126     return VIRTQUEUE_READ_DESC_MORE;
1127 }
1128 
1129 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1130                                              unsigned int *in_bytes,
1131                                              unsigned int *out_bytes,
1132                                              unsigned max_in_bytes,
1133                                              unsigned max_out_bytes)
1134 {
1135     VirtIODevice *vdev = vq->vdev;
1136     unsigned int max, idx;
1137     unsigned int total_bufs, in_total, out_total;
1138     MemoryRegionCache *desc_cache;
1139     VRingMemoryRegionCaches *caches;
1140     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1141     int64_t len = 0;
1142     VRingPackedDesc desc;
1143     bool wrap_counter;
1144 
1145     RCU_READ_LOCK_GUARD();
1146     idx = vq->last_avail_idx;
1147     wrap_counter = vq->last_avail_wrap_counter;
1148     total_bufs = in_total = out_total = 0;
1149 
1150     max = vq->vring.num;
1151     caches = vring_get_region_caches(vq);
1152     if (!caches) {
1153         goto err;
1154     }
1155 
1156     for (;;) {
1157         unsigned int num_bufs = total_bufs;
1158         unsigned int i = idx;
1159         int rc;
1160 
1161         desc_cache = &caches->desc;
1162         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1163         if (!is_desc_avail(desc.flags, wrap_counter)) {
1164             break;
1165         }
1166 
1167         if (desc.flags & VRING_DESC_F_INDIRECT) {
1168             if (desc.len % sizeof(VRingPackedDesc)) {
1169                 virtio_error(vdev, "Invalid size for indirect buffer table");
1170                 goto err;
1171             }
1172 
1173             /* If we've got too many, that implies a descriptor loop. */
1174             if (num_bufs >= max) {
1175                 virtio_error(vdev, "Looped descriptor");
1176                 goto err;
1177             }
1178 
1179             /* loop over the indirect descriptor table */
1180             len = address_space_cache_init(&indirect_desc_cache,
1181                                            vdev->dma_as,
1182                                            desc.addr, desc.len, false);
1183             desc_cache = &indirect_desc_cache;
1184             if (len < desc.len) {
1185                 virtio_error(vdev, "Cannot map indirect buffer");
1186                 goto err;
1187             }
1188 
1189             max = desc.len / sizeof(VRingPackedDesc);
1190             num_bufs = i = 0;
1191             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1192         }
1193 
1194         do {
1195             /* If we've got too many, that implies a descriptor loop. */
1196             if (++num_bufs > max) {
1197                 virtio_error(vdev, "Looped descriptor");
1198                 goto err;
1199             }
1200 
1201             if (desc.flags & VRING_DESC_F_WRITE) {
1202                 in_total += desc.len;
1203             } else {
1204                 out_total += desc.len;
1205             }
1206             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1207                 goto done;
1208             }
1209 
1210             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1211                                                  &i, desc_cache ==
1212                                                  &indirect_desc_cache);
1213         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1214 
1215         if (desc_cache == &indirect_desc_cache) {
1216             address_space_cache_destroy(&indirect_desc_cache);
1217             total_bufs++;
1218             idx++;
1219         } else {
1220             idx += num_bufs - total_bufs;
1221             total_bufs = num_bufs;
1222         }
1223 
1224         if (idx >= vq->vring.num) {
1225             idx -= vq->vring.num;
1226             wrap_counter ^= 1;
1227         }
1228     }
1229 
1230     /* Record the index and wrap counter for a kick we want */
1231     vq->shadow_avail_idx = idx;
1232     vq->shadow_avail_wrap_counter = wrap_counter;
1233 done:
1234     address_space_cache_destroy(&indirect_desc_cache);
1235     if (in_bytes) {
1236         *in_bytes = in_total;
1237     }
1238     if (out_bytes) {
1239         *out_bytes = out_total;
1240     }
1241     return;
1242 
1243 err:
1244     in_total = out_total = 0;
1245     goto done;
1246 }
1247 
1248 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1249                                unsigned int *out_bytes,
1250                                unsigned max_in_bytes, unsigned max_out_bytes)
1251 {
1252     uint16_t desc_size;
1253     VRingMemoryRegionCaches *caches;
1254 
1255     if (unlikely(!vq->vring.desc)) {
1256         goto err;
1257     }
1258 
1259     caches = vring_get_region_caches(vq);
1260     if (!caches) {
1261         goto err;
1262     }
1263 
1264     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1265                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1266     if (caches->desc.len < vq->vring.num * desc_size) {
1267         virtio_error(vq->vdev, "Cannot map descriptor ring");
1268         goto err;
1269     }
1270 
1271     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1272         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1273                                          max_in_bytes, max_out_bytes);
1274     } else {
1275         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1276                                         max_in_bytes, max_out_bytes);
1277     }
1278 
1279     return;
1280 err:
1281     if (in_bytes) {
1282         *in_bytes = 0;
1283     }
1284     if (out_bytes) {
1285         *out_bytes = 0;
1286     }
1287 }
1288 
1289 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1290                           unsigned int out_bytes)
1291 {
1292     unsigned int in_total, out_total;
1293 
1294     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1295     return in_bytes <= in_total && out_bytes <= out_total;
1296 }
1297 
1298 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1299                                hwaddr *addr, struct iovec *iov,
1300                                unsigned int max_num_sg, bool is_write,
1301                                hwaddr pa, size_t sz)
1302 {
1303     bool ok = false;
1304     unsigned num_sg = *p_num_sg;
1305     assert(num_sg <= max_num_sg);
1306 
1307     if (!sz) {
1308         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1309         goto out;
1310     }
1311 
1312     while (sz) {
1313         hwaddr len = sz;
1314 
1315         if (num_sg == max_num_sg) {
1316             virtio_error(vdev, "virtio: too many write descriptors in "
1317                                "indirect table");
1318             goto out;
1319         }
1320 
1321         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1322                                               is_write ?
1323                                               DMA_DIRECTION_FROM_DEVICE :
1324                                               DMA_DIRECTION_TO_DEVICE);
1325         if (!iov[num_sg].iov_base) {
1326             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1327             goto out;
1328         }
1329 
1330         iov[num_sg].iov_len = len;
1331         addr[num_sg] = pa;
1332 
1333         sz -= len;
1334         pa += len;
1335         num_sg++;
1336     }
1337     ok = true;
1338 
1339 out:
1340     *p_num_sg = num_sg;
1341     return ok;
1342 }
1343 
1344 /* Only used by error code paths before we have a VirtQueueElement (therefore
1345  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1346  * yet.
1347  */
1348 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1349                                     struct iovec *iov)
1350 {
1351     unsigned int i;
1352 
1353     for (i = 0; i < out_num + in_num; i++) {
1354         int is_write = i >= out_num;
1355 
1356         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1357         iov++;
1358     }
1359 }
1360 
1361 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1362                                 hwaddr *addr, unsigned int num_sg,
1363                                 bool is_write)
1364 {
1365     unsigned int i;
1366     hwaddr len;
1367 
1368     for (i = 0; i < num_sg; i++) {
1369         len = sg[i].iov_len;
1370         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1371                                         addr[i], &len, is_write ?
1372                                         DMA_DIRECTION_FROM_DEVICE :
1373                                         DMA_DIRECTION_TO_DEVICE);
1374         if (!sg[i].iov_base) {
1375             error_report("virtio: error trying to map MMIO memory");
1376             exit(1);
1377         }
1378         if (len != sg[i].iov_len) {
1379             error_report("virtio: unexpected memory split");
1380             exit(1);
1381         }
1382     }
1383 }
1384 
1385 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1386 {
1387     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1388     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1389                                                                         false);
1390 }
1391 
1392 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1393 {
1394     VirtQueueElement *elem;
1395     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1396     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1397     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1398     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1399     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1400     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1401 
1402     assert(sz >= sizeof(VirtQueueElement));
1403     elem = g_malloc(out_sg_end);
1404     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1405     elem->out_num = out_num;
1406     elem->in_num = in_num;
1407     elem->in_addr = (void *)elem + in_addr_ofs;
1408     elem->out_addr = (void *)elem + out_addr_ofs;
1409     elem->in_sg = (void *)elem + in_sg_ofs;
1410     elem->out_sg = (void *)elem + out_sg_ofs;
1411     return elem;
1412 }
1413 
1414 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1415 {
1416     unsigned int i, head, max;
1417     VRingMemoryRegionCaches *caches;
1418     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1419     MemoryRegionCache *desc_cache;
1420     int64_t len;
1421     VirtIODevice *vdev = vq->vdev;
1422     VirtQueueElement *elem = NULL;
1423     unsigned out_num, in_num, elem_entries;
1424     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1425     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1426     VRingDesc desc;
1427     int rc;
1428 
1429     RCU_READ_LOCK_GUARD();
1430     if (virtio_queue_empty_rcu(vq)) {
1431         goto done;
1432     }
1433     /* Needed after virtio_queue_empty(), see comment in
1434      * virtqueue_num_heads(). */
1435     smp_rmb();
1436 
1437     /* When we start there are none of either input nor output. */
1438     out_num = in_num = elem_entries = 0;
1439 
1440     max = vq->vring.num;
1441 
1442     if (vq->inuse >= vq->vring.num) {
1443         virtio_error(vdev, "Virtqueue size exceeded");
1444         goto done;
1445     }
1446 
1447     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1448         goto done;
1449     }
1450 
1451     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1452         vring_set_avail_event(vq, vq->last_avail_idx);
1453     }
1454 
1455     i = head;
1456 
1457     caches = vring_get_region_caches(vq);
1458     if (!caches) {
1459         virtio_error(vdev, "Region caches not initialized");
1460         goto done;
1461     }
1462 
1463     if (caches->desc.len < max * sizeof(VRingDesc)) {
1464         virtio_error(vdev, "Cannot map descriptor ring");
1465         goto done;
1466     }
1467 
1468     desc_cache = &caches->desc;
1469     vring_split_desc_read(vdev, &desc, desc_cache, i);
1470     if (desc.flags & VRING_DESC_F_INDIRECT) {
1471         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1472             virtio_error(vdev, "Invalid size for indirect buffer table");
1473             goto done;
1474         }
1475 
1476         /* loop over the indirect descriptor table */
1477         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1478                                        desc.addr, desc.len, false);
1479         desc_cache = &indirect_desc_cache;
1480         if (len < desc.len) {
1481             virtio_error(vdev, "Cannot map indirect buffer");
1482             goto done;
1483         }
1484 
1485         max = desc.len / sizeof(VRingDesc);
1486         i = 0;
1487         vring_split_desc_read(vdev, &desc, desc_cache, i);
1488     }
1489 
1490     /* Collect all the descriptors */
1491     do {
1492         bool map_ok;
1493 
1494         if (desc.flags & VRING_DESC_F_WRITE) {
1495             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1496                                         iov + out_num,
1497                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1498                                         desc.addr, desc.len);
1499         } else {
1500             if (in_num) {
1501                 virtio_error(vdev, "Incorrect order for descriptors");
1502                 goto err_undo_map;
1503             }
1504             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1505                                         VIRTQUEUE_MAX_SIZE, false,
1506                                         desc.addr, desc.len);
1507         }
1508         if (!map_ok) {
1509             goto err_undo_map;
1510         }
1511 
1512         /* If we've got too many, that implies a descriptor loop. */
1513         if (++elem_entries > max) {
1514             virtio_error(vdev, "Looped descriptor");
1515             goto err_undo_map;
1516         }
1517 
1518         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1519     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1520 
1521     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1522         goto err_undo_map;
1523     }
1524 
1525     /* Now copy what we have collected and mapped */
1526     elem = virtqueue_alloc_element(sz, out_num, in_num);
1527     elem->index = head;
1528     elem->ndescs = 1;
1529     for (i = 0; i < out_num; i++) {
1530         elem->out_addr[i] = addr[i];
1531         elem->out_sg[i] = iov[i];
1532     }
1533     for (i = 0; i < in_num; i++) {
1534         elem->in_addr[i] = addr[out_num + i];
1535         elem->in_sg[i] = iov[out_num + i];
1536     }
1537 
1538     vq->inuse++;
1539 
1540     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1541 done:
1542     address_space_cache_destroy(&indirect_desc_cache);
1543 
1544     return elem;
1545 
1546 err_undo_map:
1547     virtqueue_undo_map_desc(out_num, in_num, iov);
1548     goto done;
1549 }
1550 
1551 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1552 {
1553     unsigned int i, max;
1554     VRingMemoryRegionCaches *caches;
1555     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1556     MemoryRegionCache *desc_cache;
1557     int64_t len;
1558     VirtIODevice *vdev = vq->vdev;
1559     VirtQueueElement *elem = NULL;
1560     unsigned out_num, in_num, elem_entries;
1561     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1562     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1563     VRingPackedDesc desc;
1564     uint16_t id;
1565     int rc;
1566 
1567     RCU_READ_LOCK_GUARD();
1568     if (virtio_queue_packed_empty_rcu(vq)) {
1569         goto done;
1570     }
1571 
1572     /* When we start there are none of either input nor output. */
1573     out_num = in_num = elem_entries = 0;
1574 
1575     max = vq->vring.num;
1576 
1577     if (vq->inuse >= vq->vring.num) {
1578         virtio_error(vdev, "Virtqueue size exceeded");
1579         goto done;
1580     }
1581 
1582     i = vq->last_avail_idx;
1583 
1584     caches = vring_get_region_caches(vq);
1585     if (!caches) {
1586         virtio_error(vdev, "Region caches not initialized");
1587         goto done;
1588     }
1589 
1590     if (caches->desc.len < max * sizeof(VRingDesc)) {
1591         virtio_error(vdev, "Cannot map descriptor ring");
1592         goto done;
1593     }
1594 
1595     desc_cache = &caches->desc;
1596     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1597     id = desc.id;
1598     if (desc.flags & VRING_DESC_F_INDIRECT) {
1599         if (desc.len % sizeof(VRingPackedDesc)) {
1600             virtio_error(vdev, "Invalid size for indirect buffer table");
1601             goto done;
1602         }
1603 
1604         /* loop over the indirect descriptor table */
1605         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1606                                        desc.addr, desc.len, false);
1607         desc_cache = &indirect_desc_cache;
1608         if (len < desc.len) {
1609             virtio_error(vdev, "Cannot map indirect buffer");
1610             goto done;
1611         }
1612 
1613         max = desc.len / sizeof(VRingPackedDesc);
1614         i = 0;
1615         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1616     }
1617 
1618     /* Collect all the descriptors */
1619     do {
1620         bool map_ok;
1621 
1622         if (desc.flags & VRING_DESC_F_WRITE) {
1623             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1624                                         iov + out_num,
1625                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1626                                         desc.addr, desc.len);
1627         } else {
1628             if (in_num) {
1629                 virtio_error(vdev, "Incorrect order for descriptors");
1630                 goto err_undo_map;
1631             }
1632             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1633                                         VIRTQUEUE_MAX_SIZE, false,
1634                                         desc.addr, desc.len);
1635         }
1636         if (!map_ok) {
1637             goto err_undo_map;
1638         }
1639 
1640         /* If we've got too many, that implies a descriptor loop. */
1641         if (++elem_entries > max) {
1642             virtio_error(vdev, "Looped descriptor");
1643             goto err_undo_map;
1644         }
1645 
1646         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1647                                              desc_cache ==
1648                                              &indirect_desc_cache);
1649     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1650 
1651     /* Now copy what we have collected and mapped */
1652     elem = virtqueue_alloc_element(sz, out_num, in_num);
1653     for (i = 0; i < out_num; i++) {
1654         elem->out_addr[i] = addr[i];
1655         elem->out_sg[i] = iov[i];
1656     }
1657     for (i = 0; i < in_num; i++) {
1658         elem->in_addr[i] = addr[out_num + i];
1659         elem->in_sg[i] = iov[out_num + i];
1660     }
1661 
1662     elem->index = id;
1663     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1664     vq->last_avail_idx += elem->ndescs;
1665     vq->inuse += elem->ndescs;
1666 
1667     if (vq->last_avail_idx >= vq->vring.num) {
1668         vq->last_avail_idx -= vq->vring.num;
1669         vq->last_avail_wrap_counter ^= 1;
1670     }
1671 
1672     vq->shadow_avail_idx = vq->last_avail_idx;
1673     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1674 
1675     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1676 done:
1677     address_space_cache_destroy(&indirect_desc_cache);
1678 
1679     return elem;
1680 
1681 err_undo_map:
1682     virtqueue_undo_map_desc(out_num, in_num, iov);
1683     goto done;
1684 }
1685 
1686 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1687 {
1688     if (virtio_device_disabled(vq->vdev)) {
1689         return NULL;
1690     }
1691 
1692     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1693         return virtqueue_packed_pop(vq, sz);
1694     } else {
1695         return virtqueue_split_pop(vq, sz);
1696     }
1697 }
1698 
1699 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1700 {
1701     VRingMemoryRegionCaches *caches;
1702     MemoryRegionCache *desc_cache;
1703     unsigned int dropped = 0;
1704     VirtQueueElement elem = {};
1705     VirtIODevice *vdev = vq->vdev;
1706     VRingPackedDesc desc;
1707 
1708     caches = vring_get_region_caches(vq);
1709     if (!caches) {
1710         return 0;
1711     }
1712 
1713     desc_cache = &caches->desc;
1714 
1715     virtio_queue_set_notification(vq, 0);
1716 
1717     while (vq->inuse < vq->vring.num) {
1718         unsigned int idx = vq->last_avail_idx;
1719         /*
1720          * works similar to virtqueue_pop but does not map buffers
1721          * and does not allocate any memory.
1722          */
1723         vring_packed_desc_read(vdev, &desc, desc_cache,
1724                                vq->last_avail_idx , true);
1725         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1726             break;
1727         }
1728         elem.index = desc.id;
1729         elem.ndescs = 1;
1730         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1731                                                vq->vring.num, &idx, false)) {
1732             ++elem.ndescs;
1733         }
1734         /*
1735          * immediately push the element, nothing to unmap
1736          * as both in_num and out_num are set to 0.
1737          */
1738         virtqueue_push(vq, &elem, 0);
1739         dropped++;
1740         vq->last_avail_idx += elem.ndescs;
1741         if (vq->last_avail_idx >= vq->vring.num) {
1742             vq->last_avail_idx -= vq->vring.num;
1743             vq->last_avail_wrap_counter ^= 1;
1744         }
1745     }
1746 
1747     return dropped;
1748 }
1749 
1750 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1751 {
1752     unsigned int dropped = 0;
1753     VirtQueueElement elem = {};
1754     VirtIODevice *vdev = vq->vdev;
1755     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1756 
1757     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1758         /* works similar to virtqueue_pop but does not map buffers
1759         * and does not allocate any memory */
1760         smp_rmb();
1761         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1762             break;
1763         }
1764         vq->inuse++;
1765         vq->last_avail_idx++;
1766         if (fEventIdx) {
1767             vring_set_avail_event(vq, vq->last_avail_idx);
1768         }
1769         /* immediately push the element, nothing to unmap
1770          * as both in_num and out_num are set to 0 */
1771         virtqueue_push(vq, &elem, 0);
1772         dropped++;
1773     }
1774 
1775     return dropped;
1776 }
1777 
1778 /* virtqueue_drop_all:
1779  * @vq: The #VirtQueue
1780  * Drops all queued buffers and indicates them to the guest
1781  * as if they are done. Useful when buffers can not be
1782  * processed but must be returned to the guest.
1783  */
1784 unsigned int virtqueue_drop_all(VirtQueue *vq)
1785 {
1786     struct VirtIODevice *vdev = vq->vdev;
1787 
1788     if (virtio_device_disabled(vq->vdev)) {
1789         return 0;
1790     }
1791 
1792     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1793         return virtqueue_packed_drop_all(vq);
1794     } else {
1795         return virtqueue_split_drop_all(vq);
1796     }
1797 }
1798 
1799 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1800  * it is what QEMU has always done by mistake.  We can change it sooner
1801  * or later by bumping the version number of the affected vm states.
1802  * In the meanwhile, since the in-memory layout of VirtQueueElement
1803  * has changed, we need to marshal to and from the layout that was
1804  * used before the change.
1805  */
1806 typedef struct VirtQueueElementOld {
1807     unsigned int index;
1808     unsigned int out_num;
1809     unsigned int in_num;
1810     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1811     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1812     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1813     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1814 } VirtQueueElementOld;
1815 
1816 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1817 {
1818     VirtQueueElement *elem;
1819     VirtQueueElementOld data;
1820     int i;
1821 
1822     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1823 
1824     /* TODO: teach all callers that this can fail, and return failure instead
1825      * of asserting here.
1826      * This is just one thing (there are probably more) that must be
1827      * fixed before we can allow NDEBUG compilation.
1828      */
1829     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1830     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1831 
1832     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1833     elem->index = data.index;
1834 
1835     for (i = 0; i < elem->in_num; i++) {
1836         elem->in_addr[i] = data.in_addr[i];
1837     }
1838 
1839     for (i = 0; i < elem->out_num; i++) {
1840         elem->out_addr[i] = data.out_addr[i];
1841     }
1842 
1843     for (i = 0; i < elem->in_num; i++) {
1844         /* Base is overwritten by virtqueue_map.  */
1845         elem->in_sg[i].iov_base = 0;
1846         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1847     }
1848 
1849     for (i = 0; i < elem->out_num; i++) {
1850         /* Base is overwritten by virtqueue_map.  */
1851         elem->out_sg[i].iov_base = 0;
1852         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1853     }
1854 
1855     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1856         qemu_get_be32s(f, &elem->ndescs);
1857     }
1858 
1859     virtqueue_map(vdev, elem);
1860     return elem;
1861 }
1862 
1863 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1864                                 VirtQueueElement *elem)
1865 {
1866     VirtQueueElementOld data;
1867     int i;
1868 
1869     memset(&data, 0, sizeof(data));
1870     data.index = elem->index;
1871     data.in_num = elem->in_num;
1872     data.out_num = elem->out_num;
1873 
1874     for (i = 0; i < elem->in_num; i++) {
1875         data.in_addr[i] = elem->in_addr[i];
1876     }
1877 
1878     for (i = 0; i < elem->out_num; i++) {
1879         data.out_addr[i] = elem->out_addr[i];
1880     }
1881 
1882     for (i = 0; i < elem->in_num; i++) {
1883         /* Base is overwritten by virtqueue_map when loading.  Do not
1884          * save it, as it would leak the QEMU address space layout.  */
1885         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1886     }
1887 
1888     for (i = 0; i < elem->out_num; i++) {
1889         /* Do not save iov_base as above.  */
1890         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1891     }
1892 
1893     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1894         qemu_put_be32s(f, &elem->ndescs);
1895     }
1896 
1897     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1898 }
1899 
1900 /* virtio device */
1901 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1902 {
1903     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1904     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1905 
1906     if (virtio_device_disabled(vdev)) {
1907         return;
1908     }
1909 
1910     if (k->notify) {
1911         k->notify(qbus->parent, vector);
1912     }
1913 }
1914 
1915 void virtio_update_irq(VirtIODevice *vdev)
1916 {
1917     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1918 }
1919 
1920 static int virtio_validate_features(VirtIODevice *vdev)
1921 {
1922     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1923 
1924     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1925         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1926         return -EFAULT;
1927     }
1928 
1929     if (k->validate_features) {
1930         return k->validate_features(vdev);
1931     } else {
1932         return 0;
1933     }
1934 }
1935 
1936 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1937 {
1938     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1939     trace_virtio_set_status(vdev, val);
1940 
1941     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1942         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1943             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1944             int ret = virtio_validate_features(vdev);
1945 
1946             if (ret) {
1947                 return ret;
1948             }
1949         }
1950     }
1951 
1952     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1953         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1954         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1955     }
1956 
1957     if (k->set_status) {
1958         k->set_status(vdev, val);
1959     }
1960     vdev->status = val;
1961 
1962     return 0;
1963 }
1964 
1965 static enum virtio_device_endian virtio_default_endian(void)
1966 {
1967     if (target_words_bigendian()) {
1968         return VIRTIO_DEVICE_ENDIAN_BIG;
1969     } else {
1970         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1971     }
1972 }
1973 
1974 static enum virtio_device_endian virtio_current_cpu_endian(void)
1975 {
1976     CPUClass *cc = CPU_GET_CLASS(current_cpu);
1977 
1978     if (cc->virtio_is_big_endian(current_cpu)) {
1979         return VIRTIO_DEVICE_ENDIAN_BIG;
1980     } else {
1981         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1982     }
1983 }
1984 
1985 void virtio_reset(void *opaque)
1986 {
1987     VirtIODevice *vdev = opaque;
1988     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1989     int i;
1990 
1991     virtio_set_status(vdev, 0);
1992     if (current_cpu) {
1993         /* Guest initiated reset */
1994         vdev->device_endian = virtio_current_cpu_endian();
1995     } else {
1996         /* System reset */
1997         vdev->device_endian = virtio_default_endian();
1998     }
1999 
2000     if (k->reset) {
2001         k->reset(vdev);
2002     }
2003 
2004     vdev->start_on_kick = false;
2005     vdev->started = false;
2006     vdev->broken = false;
2007     vdev->guest_features = 0;
2008     vdev->queue_sel = 0;
2009     vdev->status = 0;
2010     vdev->disabled = false;
2011     qatomic_set(&vdev->isr, 0);
2012     vdev->config_vector = VIRTIO_NO_VECTOR;
2013     virtio_notify_vector(vdev, vdev->config_vector);
2014 
2015     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2016         vdev->vq[i].vring.desc = 0;
2017         vdev->vq[i].vring.avail = 0;
2018         vdev->vq[i].vring.used = 0;
2019         vdev->vq[i].last_avail_idx = 0;
2020         vdev->vq[i].shadow_avail_idx = 0;
2021         vdev->vq[i].used_idx = 0;
2022         vdev->vq[i].last_avail_wrap_counter = true;
2023         vdev->vq[i].shadow_avail_wrap_counter = true;
2024         vdev->vq[i].used_wrap_counter = true;
2025         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2026         vdev->vq[i].signalled_used = 0;
2027         vdev->vq[i].signalled_used_valid = false;
2028         vdev->vq[i].notification = true;
2029         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2030         vdev->vq[i].inuse = 0;
2031         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2032     }
2033 }
2034 
2035 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2036 {
2037     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2038     uint8_t val;
2039 
2040     if (addr + sizeof(val) > vdev->config_len) {
2041         return (uint32_t)-1;
2042     }
2043 
2044     k->get_config(vdev, vdev->config);
2045 
2046     val = ldub_p(vdev->config + addr);
2047     return val;
2048 }
2049 
2050 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2051 {
2052     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2053     uint16_t val;
2054 
2055     if (addr + sizeof(val) > vdev->config_len) {
2056         return (uint32_t)-1;
2057     }
2058 
2059     k->get_config(vdev, vdev->config);
2060 
2061     val = lduw_p(vdev->config + addr);
2062     return val;
2063 }
2064 
2065 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2066 {
2067     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2068     uint32_t val;
2069 
2070     if (addr + sizeof(val) > vdev->config_len) {
2071         return (uint32_t)-1;
2072     }
2073 
2074     k->get_config(vdev, vdev->config);
2075 
2076     val = ldl_p(vdev->config + addr);
2077     return val;
2078 }
2079 
2080 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2081 {
2082     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2083     uint8_t val = data;
2084 
2085     if (addr + sizeof(val) > vdev->config_len) {
2086         return;
2087     }
2088 
2089     stb_p(vdev->config + addr, val);
2090 
2091     if (k->set_config) {
2092         k->set_config(vdev, vdev->config);
2093     }
2094 }
2095 
2096 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2097 {
2098     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2099     uint16_t val = data;
2100 
2101     if (addr + sizeof(val) > vdev->config_len) {
2102         return;
2103     }
2104 
2105     stw_p(vdev->config + addr, val);
2106 
2107     if (k->set_config) {
2108         k->set_config(vdev, vdev->config);
2109     }
2110 }
2111 
2112 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2113 {
2114     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2115     uint32_t val = data;
2116 
2117     if (addr + sizeof(val) > vdev->config_len) {
2118         return;
2119     }
2120 
2121     stl_p(vdev->config + addr, val);
2122 
2123     if (k->set_config) {
2124         k->set_config(vdev, vdev->config);
2125     }
2126 }
2127 
2128 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2129 {
2130     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2131     uint8_t val;
2132 
2133     if (addr + sizeof(val) > vdev->config_len) {
2134         return (uint32_t)-1;
2135     }
2136 
2137     k->get_config(vdev, vdev->config);
2138 
2139     val = ldub_p(vdev->config + addr);
2140     return val;
2141 }
2142 
2143 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2144 {
2145     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2146     uint16_t val;
2147 
2148     if (addr + sizeof(val) > vdev->config_len) {
2149         return (uint32_t)-1;
2150     }
2151 
2152     k->get_config(vdev, vdev->config);
2153 
2154     val = lduw_le_p(vdev->config + addr);
2155     return val;
2156 }
2157 
2158 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2159 {
2160     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2161     uint32_t val;
2162 
2163     if (addr + sizeof(val) > vdev->config_len) {
2164         return (uint32_t)-1;
2165     }
2166 
2167     k->get_config(vdev, vdev->config);
2168 
2169     val = ldl_le_p(vdev->config + addr);
2170     return val;
2171 }
2172 
2173 void virtio_config_modern_writeb(VirtIODevice *vdev,
2174                                  uint32_t addr, uint32_t data)
2175 {
2176     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2177     uint8_t val = data;
2178 
2179     if (addr + sizeof(val) > vdev->config_len) {
2180         return;
2181     }
2182 
2183     stb_p(vdev->config + addr, val);
2184 
2185     if (k->set_config) {
2186         k->set_config(vdev, vdev->config);
2187     }
2188 }
2189 
2190 void virtio_config_modern_writew(VirtIODevice *vdev,
2191                                  uint32_t addr, uint32_t data)
2192 {
2193     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2194     uint16_t val = data;
2195 
2196     if (addr + sizeof(val) > vdev->config_len) {
2197         return;
2198     }
2199 
2200     stw_le_p(vdev->config + addr, val);
2201 
2202     if (k->set_config) {
2203         k->set_config(vdev, vdev->config);
2204     }
2205 }
2206 
2207 void virtio_config_modern_writel(VirtIODevice *vdev,
2208                                  uint32_t addr, uint32_t data)
2209 {
2210     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2211     uint32_t val = data;
2212 
2213     if (addr + sizeof(val) > vdev->config_len) {
2214         return;
2215     }
2216 
2217     stl_le_p(vdev->config + addr, val);
2218 
2219     if (k->set_config) {
2220         k->set_config(vdev, vdev->config);
2221     }
2222 }
2223 
2224 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2225 {
2226     if (!vdev->vq[n].vring.num) {
2227         return;
2228     }
2229     vdev->vq[n].vring.desc = addr;
2230     virtio_queue_update_rings(vdev, n);
2231 }
2232 
2233 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2234 {
2235     return vdev->vq[n].vring.desc;
2236 }
2237 
2238 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2239                             hwaddr avail, hwaddr used)
2240 {
2241     if (!vdev->vq[n].vring.num) {
2242         return;
2243     }
2244     vdev->vq[n].vring.desc = desc;
2245     vdev->vq[n].vring.avail = avail;
2246     vdev->vq[n].vring.used = used;
2247     virtio_init_region_cache(vdev, n);
2248 }
2249 
2250 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2251 {
2252     /* Don't allow guest to flip queue between existent and
2253      * nonexistent states, or to set it to an invalid size.
2254      */
2255     if (!!num != !!vdev->vq[n].vring.num ||
2256         num > VIRTQUEUE_MAX_SIZE ||
2257         num < 0) {
2258         return;
2259     }
2260     vdev->vq[n].vring.num = num;
2261 }
2262 
2263 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2264 {
2265     return QLIST_FIRST(&vdev->vector_queues[vector]);
2266 }
2267 
2268 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2269 {
2270     return QLIST_NEXT(vq, node);
2271 }
2272 
2273 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2274 {
2275     return vdev->vq[n].vring.num;
2276 }
2277 
2278 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2279 {
2280     return vdev->vq[n].vring.num_default;
2281 }
2282 
2283 int virtio_get_num_queues(VirtIODevice *vdev)
2284 {
2285     int i;
2286 
2287     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2288         if (!virtio_queue_get_num(vdev, i)) {
2289             break;
2290         }
2291     }
2292 
2293     return i;
2294 }
2295 
2296 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2297 {
2298     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2299     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2300 
2301     /* virtio-1 compliant devices cannot change the alignment */
2302     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2303         error_report("tried to modify queue alignment for virtio-1 device");
2304         return;
2305     }
2306     /* Check that the transport told us it was going to do this
2307      * (so a buggy transport will immediately assert rather than
2308      * silently failing to migrate this state)
2309      */
2310     assert(k->has_variable_vring_alignment);
2311 
2312     if (align) {
2313         vdev->vq[n].vring.align = align;
2314         virtio_queue_update_rings(vdev, n);
2315     }
2316 }
2317 
2318 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2319 {
2320     bool ret = false;
2321 
2322     if (vq->vring.desc && vq->handle_aio_output) {
2323         VirtIODevice *vdev = vq->vdev;
2324 
2325         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2326         ret = vq->handle_aio_output(vdev, vq);
2327 
2328         if (unlikely(vdev->start_on_kick)) {
2329             virtio_set_started(vdev, true);
2330         }
2331     }
2332 
2333     return ret;
2334 }
2335 
2336 static void virtio_queue_notify_vq(VirtQueue *vq)
2337 {
2338     if (vq->vring.desc && vq->handle_output) {
2339         VirtIODevice *vdev = vq->vdev;
2340 
2341         if (unlikely(vdev->broken)) {
2342             return;
2343         }
2344 
2345         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2346         vq->handle_output(vdev, vq);
2347 
2348         if (unlikely(vdev->start_on_kick)) {
2349             virtio_set_started(vdev, true);
2350         }
2351     }
2352 }
2353 
2354 void virtio_queue_notify(VirtIODevice *vdev, int n)
2355 {
2356     VirtQueue *vq = &vdev->vq[n];
2357 
2358     if (unlikely(!vq->vring.desc || vdev->broken)) {
2359         return;
2360     }
2361 
2362     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2363     if (vq->host_notifier_enabled) {
2364         event_notifier_set(&vq->host_notifier);
2365     } else if (vq->handle_output) {
2366         vq->handle_output(vdev, vq);
2367 
2368         if (unlikely(vdev->start_on_kick)) {
2369             virtio_set_started(vdev, true);
2370         }
2371     }
2372 }
2373 
2374 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2375 {
2376     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2377         VIRTIO_NO_VECTOR;
2378 }
2379 
2380 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2381 {
2382     VirtQueue *vq = &vdev->vq[n];
2383 
2384     if (n < VIRTIO_QUEUE_MAX) {
2385         if (vdev->vector_queues &&
2386             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2387             QLIST_REMOVE(vq, node);
2388         }
2389         vdev->vq[n].vector = vector;
2390         if (vdev->vector_queues &&
2391             vector != VIRTIO_NO_VECTOR) {
2392             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2393         }
2394     }
2395 }
2396 
2397 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2398                             VirtIOHandleOutput handle_output)
2399 {
2400     int i;
2401 
2402     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2403         if (vdev->vq[i].vring.num == 0)
2404             break;
2405     }
2406 
2407     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2408         abort();
2409 
2410     vdev->vq[i].vring.num = queue_size;
2411     vdev->vq[i].vring.num_default = queue_size;
2412     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2413     vdev->vq[i].handle_output = handle_output;
2414     vdev->vq[i].handle_aio_output = NULL;
2415     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2416                                        queue_size);
2417 
2418     return &vdev->vq[i];
2419 }
2420 
2421 void virtio_delete_queue(VirtQueue *vq)
2422 {
2423     vq->vring.num = 0;
2424     vq->vring.num_default = 0;
2425     vq->handle_output = NULL;
2426     vq->handle_aio_output = NULL;
2427     g_free(vq->used_elems);
2428     vq->used_elems = NULL;
2429     virtio_virtqueue_reset_region_cache(vq);
2430 }
2431 
2432 void virtio_del_queue(VirtIODevice *vdev, int n)
2433 {
2434     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2435         abort();
2436     }
2437 
2438     virtio_delete_queue(&vdev->vq[n]);
2439 }
2440 
2441 static void virtio_set_isr(VirtIODevice *vdev, int value)
2442 {
2443     uint8_t old = qatomic_read(&vdev->isr);
2444 
2445     /* Do not write ISR if it does not change, so that its cacheline remains
2446      * shared in the common case where the guest does not read it.
2447      */
2448     if ((old & value) != value) {
2449         qatomic_or(&vdev->isr, value);
2450     }
2451 }
2452 
2453 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2454 {
2455     uint16_t old, new;
2456     bool v;
2457     /* We need to expose used array entries before checking used event. */
2458     smp_mb();
2459     /* Always notify when queue is empty (when feature acknowledge) */
2460     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2461         !vq->inuse && virtio_queue_empty(vq)) {
2462         return true;
2463     }
2464 
2465     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2466         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2467     }
2468 
2469     v = vq->signalled_used_valid;
2470     vq->signalled_used_valid = true;
2471     old = vq->signalled_used;
2472     new = vq->signalled_used = vq->used_idx;
2473     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2474 }
2475 
2476 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2477                                     uint16_t off_wrap, uint16_t new,
2478                                     uint16_t old)
2479 {
2480     int off = off_wrap & ~(1 << 15);
2481 
2482     if (wrap != off_wrap >> 15) {
2483         off -= vq->vring.num;
2484     }
2485 
2486     return vring_need_event(off, new, old);
2487 }
2488 
2489 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2490 {
2491     VRingPackedDescEvent e;
2492     uint16_t old, new;
2493     bool v;
2494     VRingMemoryRegionCaches *caches;
2495 
2496     caches = vring_get_region_caches(vq);
2497     if (!caches) {
2498         return false;
2499     }
2500 
2501     vring_packed_event_read(vdev, &caches->avail, &e);
2502 
2503     old = vq->signalled_used;
2504     new = vq->signalled_used = vq->used_idx;
2505     v = vq->signalled_used_valid;
2506     vq->signalled_used_valid = true;
2507 
2508     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2509         return false;
2510     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2511         return true;
2512     }
2513 
2514     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2515                                          e.off_wrap, new, old);
2516 }
2517 
2518 /* Called within rcu_read_lock().  */
2519 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2520 {
2521     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2522         return virtio_packed_should_notify(vdev, vq);
2523     } else {
2524         return virtio_split_should_notify(vdev, vq);
2525     }
2526 }
2527 
2528 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2529 {
2530     WITH_RCU_READ_LOCK_GUARD() {
2531         if (!virtio_should_notify(vdev, vq)) {
2532             return;
2533         }
2534     }
2535 
2536     trace_virtio_notify_irqfd(vdev, vq);
2537 
2538     /*
2539      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2540      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2541      * incorrectly polling this bit during crashdump and hibernation
2542      * in MSI mode, causing a hang if this bit is never updated.
2543      * Recent releases of Windows do not really shut down, but rather
2544      * log out and hibernate to make the next startup faster.  Hence,
2545      * this manifested as a more serious hang during shutdown with
2546      *
2547      * Next driver release from 2016 fixed this problem, so working around it
2548      * is not a must, but it's easy to do so let's do it here.
2549      *
2550      * Note: it's safe to update ISR from any thread as it was switched
2551      * to an atomic operation.
2552      */
2553     virtio_set_isr(vq->vdev, 0x1);
2554     event_notifier_set(&vq->guest_notifier);
2555 }
2556 
2557 static void virtio_irq(VirtQueue *vq)
2558 {
2559     virtio_set_isr(vq->vdev, 0x1);
2560     virtio_notify_vector(vq->vdev, vq->vector);
2561 }
2562 
2563 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2564 {
2565     WITH_RCU_READ_LOCK_GUARD() {
2566         if (!virtio_should_notify(vdev, vq)) {
2567             return;
2568         }
2569     }
2570 
2571     trace_virtio_notify(vdev, vq);
2572     virtio_irq(vq);
2573 }
2574 
2575 void virtio_notify_config(VirtIODevice *vdev)
2576 {
2577     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2578         return;
2579 
2580     virtio_set_isr(vdev, 0x3);
2581     vdev->generation++;
2582     virtio_notify_vector(vdev, vdev->config_vector);
2583 }
2584 
2585 static bool virtio_device_endian_needed(void *opaque)
2586 {
2587     VirtIODevice *vdev = opaque;
2588 
2589     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2590     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2591         return vdev->device_endian != virtio_default_endian();
2592     }
2593     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2594     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2595 }
2596 
2597 static bool virtio_64bit_features_needed(void *opaque)
2598 {
2599     VirtIODevice *vdev = opaque;
2600 
2601     return (vdev->host_features >> 32) != 0;
2602 }
2603 
2604 static bool virtio_virtqueue_needed(void *opaque)
2605 {
2606     VirtIODevice *vdev = opaque;
2607 
2608     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2609 }
2610 
2611 static bool virtio_packed_virtqueue_needed(void *opaque)
2612 {
2613     VirtIODevice *vdev = opaque;
2614 
2615     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2616 }
2617 
2618 static bool virtio_ringsize_needed(void *opaque)
2619 {
2620     VirtIODevice *vdev = opaque;
2621     int i;
2622 
2623     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2624         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2625             return true;
2626         }
2627     }
2628     return false;
2629 }
2630 
2631 static bool virtio_extra_state_needed(void *opaque)
2632 {
2633     VirtIODevice *vdev = opaque;
2634     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2635     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2636 
2637     return k->has_extra_state &&
2638         k->has_extra_state(qbus->parent);
2639 }
2640 
2641 static bool virtio_broken_needed(void *opaque)
2642 {
2643     VirtIODevice *vdev = opaque;
2644 
2645     return vdev->broken;
2646 }
2647 
2648 static bool virtio_started_needed(void *opaque)
2649 {
2650     VirtIODevice *vdev = opaque;
2651 
2652     return vdev->started;
2653 }
2654 
2655 static bool virtio_disabled_needed(void *opaque)
2656 {
2657     VirtIODevice *vdev = opaque;
2658 
2659     return vdev->disabled;
2660 }
2661 
2662 static const VMStateDescription vmstate_virtqueue = {
2663     .name = "virtqueue_state",
2664     .version_id = 1,
2665     .minimum_version_id = 1,
2666     .fields = (VMStateField[]) {
2667         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2668         VMSTATE_UINT64(vring.used, struct VirtQueue),
2669         VMSTATE_END_OF_LIST()
2670     }
2671 };
2672 
2673 static const VMStateDescription vmstate_packed_virtqueue = {
2674     .name = "packed_virtqueue_state",
2675     .version_id = 1,
2676     .minimum_version_id = 1,
2677     .fields = (VMStateField[]) {
2678         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2679         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2680         VMSTATE_UINT16(used_idx, struct VirtQueue),
2681         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2682         VMSTATE_UINT32(inuse, struct VirtQueue),
2683         VMSTATE_END_OF_LIST()
2684     }
2685 };
2686 
2687 static const VMStateDescription vmstate_virtio_virtqueues = {
2688     .name = "virtio/virtqueues",
2689     .version_id = 1,
2690     .minimum_version_id = 1,
2691     .needed = &virtio_virtqueue_needed,
2692     .fields = (VMStateField[]) {
2693         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2694                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2695         VMSTATE_END_OF_LIST()
2696     }
2697 };
2698 
2699 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2700     .name = "virtio/packed_virtqueues",
2701     .version_id = 1,
2702     .minimum_version_id = 1,
2703     .needed = &virtio_packed_virtqueue_needed,
2704     .fields = (VMStateField[]) {
2705         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2706                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2707         VMSTATE_END_OF_LIST()
2708     }
2709 };
2710 
2711 static const VMStateDescription vmstate_ringsize = {
2712     .name = "ringsize_state",
2713     .version_id = 1,
2714     .minimum_version_id = 1,
2715     .fields = (VMStateField[]) {
2716         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2717         VMSTATE_END_OF_LIST()
2718     }
2719 };
2720 
2721 static const VMStateDescription vmstate_virtio_ringsize = {
2722     .name = "virtio/ringsize",
2723     .version_id = 1,
2724     .minimum_version_id = 1,
2725     .needed = &virtio_ringsize_needed,
2726     .fields = (VMStateField[]) {
2727         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2728                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2729         VMSTATE_END_OF_LIST()
2730     }
2731 };
2732 
2733 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2734                            const VMStateField *field)
2735 {
2736     VirtIODevice *vdev = pv;
2737     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2738     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2739 
2740     if (!k->load_extra_state) {
2741         return -1;
2742     } else {
2743         return k->load_extra_state(qbus->parent, f);
2744     }
2745 }
2746 
2747 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2748                            const VMStateField *field, JSONWriter *vmdesc)
2749 {
2750     VirtIODevice *vdev = pv;
2751     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2752     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2753 
2754     k->save_extra_state(qbus->parent, f);
2755     return 0;
2756 }
2757 
2758 static const VMStateInfo vmstate_info_extra_state = {
2759     .name = "virtqueue_extra_state",
2760     .get = get_extra_state,
2761     .put = put_extra_state,
2762 };
2763 
2764 static const VMStateDescription vmstate_virtio_extra_state = {
2765     .name = "virtio/extra_state",
2766     .version_id = 1,
2767     .minimum_version_id = 1,
2768     .needed = &virtio_extra_state_needed,
2769     .fields = (VMStateField[]) {
2770         {
2771             .name         = "extra_state",
2772             .version_id   = 0,
2773             .field_exists = NULL,
2774             .size         = 0,
2775             .info         = &vmstate_info_extra_state,
2776             .flags        = VMS_SINGLE,
2777             .offset       = 0,
2778         },
2779         VMSTATE_END_OF_LIST()
2780     }
2781 };
2782 
2783 static const VMStateDescription vmstate_virtio_device_endian = {
2784     .name = "virtio/device_endian",
2785     .version_id = 1,
2786     .minimum_version_id = 1,
2787     .needed = &virtio_device_endian_needed,
2788     .fields = (VMStateField[]) {
2789         VMSTATE_UINT8(device_endian, VirtIODevice),
2790         VMSTATE_END_OF_LIST()
2791     }
2792 };
2793 
2794 static const VMStateDescription vmstate_virtio_64bit_features = {
2795     .name = "virtio/64bit_features",
2796     .version_id = 1,
2797     .minimum_version_id = 1,
2798     .needed = &virtio_64bit_features_needed,
2799     .fields = (VMStateField[]) {
2800         VMSTATE_UINT64(guest_features, VirtIODevice),
2801         VMSTATE_END_OF_LIST()
2802     }
2803 };
2804 
2805 static const VMStateDescription vmstate_virtio_broken = {
2806     .name = "virtio/broken",
2807     .version_id = 1,
2808     .minimum_version_id = 1,
2809     .needed = &virtio_broken_needed,
2810     .fields = (VMStateField[]) {
2811         VMSTATE_BOOL(broken, VirtIODevice),
2812         VMSTATE_END_OF_LIST()
2813     }
2814 };
2815 
2816 static const VMStateDescription vmstate_virtio_started = {
2817     .name = "virtio/started",
2818     .version_id = 1,
2819     .minimum_version_id = 1,
2820     .needed = &virtio_started_needed,
2821     .fields = (VMStateField[]) {
2822         VMSTATE_BOOL(started, VirtIODevice),
2823         VMSTATE_END_OF_LIST()
2824     }
2825 };
2826 
2827 static const VMStateDescription vmstate_virtio_disabled = {
2828     .name = "virtio/disabled",
2829     .version_id = 1,
2830     .minimum_version_id = 1,
2831     .needed = &virtio_disabled_needed,
2832     .fields = (VMStateField[]) {
2833         VMSTATE_BOOL(disabled, VirtIODevice),
2834         VMSTATE_END_OF_LIST()
2835     }
2836 };
2837 
2838 static const VMStateDescription vmstate_virtio = {
2839     .name = "virtio",
2840     .version_id = 1,
2841     .minimum_version_id = 1,
2842     .minimum_version_id_old = 1,
2843     .fields = (VMStateField[]) {
2844         VMSTATE_END_OF_LIST()
2845     },
2846     .subsections = (const VMStateDescription*[]) {
2847         &vmstate_virtio_device_endian,
2848         &vmstate_virtio_64bit_features,
2849         &vmstate_virtio_virtqueues,
2850         &vmstate_virtio_ringsize,
2851         &vmstate_virtio_broken,
2852         &vmstate_virtio_extra_state,
2853         &vmstate_virtio_started,
2854         &vmstate_virtio_packed_virtqueues,
2855         &vmstate_virtio_disabled,
2856         NULL
2857     }
2858 };
2859 
2860 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2861 {
2862     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2863     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2864     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2865     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2866     int i;
2867 
2868     if (k->save_config) {
2869         k->save_config(qbus->parent, f);
2870     }
2871 
2872     qemu_put_8s(f, &vdev->status);
2873     qemu_put_8s(f, &vdev->isr);
2874     qemu_put_be16s(f, &vdev->queue_sel);
2875     qemu_put_be32s(f, &guest_features_lo);
2876     qemu_put_be32(f, vdev->config_len);
2877     qemu_put_buffer(f, vdev->config, vdev->config_len);
2878 
2879     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2880         if (vdev->vq[i].vring.num == 0)
2881             break;
2882     }
2883 
2884     qemu_put_be32(f, i);
2885 
2886     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2887         if (vdev->vq[i].vring.num == 0)
2888             break;
2889 
2890         qemu_put_be32(f, vdev->vq[i].vring.num);
2891         if (k->has_variable_vring_alignment) {
2892             qemu_put_be32(f, vdev->vq[i].vring.align);
2893         }
2894         /*
2895          * Save desc now, the rest of the ring addresses are saved in
2896          * subsections for VIRTIO-1 devices.
2897          */
2898         qemu_put_be64(f, vdev->vq[i].vring.desc);
2899         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2900         if (k->save_queue) {
2901             k->save_queue(qbus->parent, i, f);
2902         }
2903     }
2904 
2905     if (vdc->save != NULL) {
2906         vdc->save(vdev, f);
2907     }
2908 
2909     if (vdc->vmsd) {
2910         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2911         if (ret) {
2912             return ret;
2913         }
2914     }
2915 
2916     /* Subsections */
2917     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2918 }
2919 
2920 /* A wrapper for use as a VMState .put function */
2921 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2922                               const VMStateField *field, JSONWriter *vmdesc)
2923 {
2924     return virtio_save(VIRTIO_DEVICE(opaque), f);
2925 }
2926 
2927 /* A wrapper for use as a VMState .get function */
2928 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2929                              const VMStateField *field)
2930 {
2931     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2932     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2933 
2934     return virtio_load(vdev, f, dc->vmsd->version_id);
2935 }
2936 
2937 const VMStateInfo  virtio_vmstate_info = {
2938     .name = "virtio",
2939     .get = virtio_device_get,
2940     .put = virtio_device_put,
2941 };
2942 
2943 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2944 {
2945     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2946     bool bad = (val & ~(vdev->host_features)) != 0;
2947 
2948     val &= vdev->host_features;
2949     if (k->set_features) {
2950         k->set_features(vdev, val);
2951     }
2952     vdev->guest_features = val;
2953     return bad ? -1 : 0;
2954 }
2955 
2956 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2957 {
2958     int ret;
2959     /*
2960      * The driver must not attempt to set features after feature negotiation
2961      * has finished.
2962      */
2963     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2964         return -EINVAL;
2965     }
2966     ret = virtio_set_features_nocheck(vdev, val);
2967     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2968         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2969         int i;
2970         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2971             if (vdev->vq[i].vring.num != 0) {
2972                 virtio_init_region_cache(vdev, i);
2973             }
2974         }
2975     }
2976     if (!ret) {
2977         if (!virtio_device_started(vdev, vdev->status) &&
2978             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2979             vdev->start_on_kick = true;
2980         }
2981     }
2982     return ret;
2983 }
2984 
2985 size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
2986                                       uint64_t host_features)
2987 {
2988     size_t config_size = 0;
2989     int i;
2990 
2991     for (i = 0; feature_sizes[i].flags != 0; i++) {
2992         if (host_features & feature_sizes[i].flags) {
2993             config_size = MAX(feature_sizes[i].end, config_size);
2994         }
2995     }
2996 
2997     return config_size;
2998 }
2999 
3000 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
3001 {
3002     int i, ret;
3003     int32_t config_len;
3004     uint32_t num;
3005     uint32_t features;
3006     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3007     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3008     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3009 
3010     /*
3011      * We poison the endianness to ensure it does not get used before
3012      * subsections have been loaded.
3013      */
3014     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3015 
3016     if (k->load_config) {
3017         ret = k->load_config(qbus->parent, f);
3018         if (ret)
3019             return ret;
3020     }
3021 
3022     qemu_get_8s(f, &vdev->status);
3023     qemu_get_8s(f, &vdev->isr);
3024     qemu_get_be16s(f, &vdev->queue_sel);
3025     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3026         return -1;
3027     }
3028     qemu_get_be32s(f, &features);
3029 
3030     /*
3031      * Temporarily set guest_features low bits - needed by
3032      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3033      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3034      *
3035      * Note: devices should always test host features in future - don't create
3036      * new dependencies like this.
3037      */
3038     vdev->guest_features = features;
3039 
3040     config_len = qemu_get_be32(f);
3041 
3042     /*
3043      * There are cases where the incoming config can be bigger or smaller
3044      * than what we have; so load what we have space for, and skip
3045      * any excess that's in the stream.
3046      */
3047     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3048 
3049     while (config_len > vdev->config_len) {
3050         qemu_get_byte(f);
3051         config_len--;
3052     }
3053 
3054     num = qemu_get_be32(f);
3055 
3056     if (num > VIRTIO_QUEUE_MAX) {
3057         error_report("Invalid number of virtqueues: 0x%x", num);
3058         return -1;
3059     }
3060 
3061     for (i = 0; i < num; i++) {
3062         vdev->vq[i].vring.num = qemu_get_be32(f);
3063         if (k->has_variable_vring_alignment) {
3064             vdev->vq[i].vring.align = qemu_get_be32(f);
3065         }
3066         vdev->vq[i].vring.desc = qemu_get_be64(f);
3067         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3068         vdev->vq[i].signalled_used_valid = false;
3069         vdev->vq[i].notification = true;
3070 
3071         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3072             error_report("VQ %d address 0x0 "
3073                          "inconsistent with Host index 0x%x",
3074                          i, vdev->vq[i].last_avail_idx);
3075             return -1;
3076         }
3077         if (k->load_queue) {
3078             ret = k->load_queue(qbus->parent, i, f);
3079             if (ret)
3080                 return ret;
3081         }
3082     }
3083 
3084     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3085 
3086     if (vdc->load != NULL) {
3087         ret = vdc->load(vdev, f, version_id);
3088         if (ret) {
3089             return ret;
3090         }
3091     }
3092 
3093     if (vdc->vmsd) {
3094         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3095         if (ret) {
3096             return ret;
3097         }
3098     }
3099 
3100     /* Subsections */
3101     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3102     if (ret) {
3103         return ret;
3104     }
3105 
3106     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3107         vdev->device_endian = virtio_default_endian();
3108     }
3109 
3110     if (virtio_64bit_features_needed(vdev)) {
3111         /*
3112          * Subsection load filled vdev->guest_features.  Run them
3113          * through virtio_set_features to sanity-check them against
3114          * host_features.
3115          */
3116         uint64_t features64 = vdev->guest_features;
3117         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3118             error_report("Features 0x%" PRIx64 " unsupported. "
3119                          "Allowed features: 0x%" PRIx64,
3120                          features64, vdev->host_features);
3121             return -1;
3122         }
3123     } else {
3124         if (virtio_set_features_nocheck(vdev, features) < 0) {
3125             error_report("Features 0x%x unsupported. "
3126                          "Allowed features: 0x%" PRIx64,
3127                          features, vdev->host_features);
3128             return -1;
3129         }
3130     }
3131 
3132     if (!virtio_device_started(vdev, vdev->status) &&
3133         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3134         vdev->start_on_kick = true;
3135     }
3136 
3137     RCU_READ_LOCK_GUARD();
3138     for (i = 0; i < num; i++) {
3139         if (vdev->vq[i].vring.desc) {
3140             uint16_t nheads;
3141 
3142             /*
3143              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3144              * only the region cache needs to be set up.  Legacy devices need
3145              * to calculate used and avail ring addresses based on the desc
3146              * address.
3147              */
3148             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3149                 virtio_init_region_cache(vdev, i);
3150             } else {
3151                 virtio_queue_update_rings(vdev, i);
3152             }
3153 
3154             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3155                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3156                 vdev->vq[i].shadow_avail_wrap_counter =
3157                                         vdev->vq[i].last_avail_wrap_counter;
3158                 continue;
3159             }
3160 
3161             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3162             /* Check it isn't doing strange things with descriptor numbers. */
3163             if (nheads > vdev->vq[i].vring.num) {
3164                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3165                              "inconsistent with Host index 0x%x: delta 0x%x",
3166                              i, vdev->vq[i].vring.num,
3167                              vring_avail_idx(&vdev->vq[i]),
3168                              vdev->vq[i].last_avail_idx, nheads);
3169                 vdev->vq[i].used_idx = 0;
3170                 vdev->vq[i].shadow_avail_idx = 0;
3171                 vdev->vq[i].inuse = 0;
3172                 continue;
3173             }
3174             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3175             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3176 
3177             /*
3178              * Some devices migrate VirtQueueElements that have been popped
3179              * from the avail ring but not yet returned to the used ring.
3180              * Since max ring size < UINT16_MAX it's safe to use modulo
3181              * UINT16_MAX + 1 subtraction.
3182              */
3183             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3184                                 vdev->vq[i].used_idx);
3185             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3186                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3187                              "used_idx 0x%x",
3188                              i, vdev->vq[i].vring.num,
3189                              vdev->vq[i].last_avail_idx,
3190                              vdev->vq[i].used_idx);
3191                 return -1;
3192             }
3193         }
3194     }
3195 
3196     if (vdc->post_load) {
3197         ret = vdc->post_load(vdev);
3198         if (ret) {
3199             return ret;
3200         }
3201     }
3202 
3203     return 0;
3204 }
3205 
3206 void virtio_cleanup(VirtIODevice *vdev)
3207 {
3208     qemu_del_vm_change_state_handler(vdev->vmstate);
3209 }
3210 
3211 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3212 {
3213     VirtIODevice *vdev = opaque;
3214     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3215     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3216     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3217     vdev->vm_running = running;
3218 
3219     if (backend_run) {
3220         virtio_set_status(vdev, vdev->status);
3221     }
3222 
3223     if (k->vmstate_change) {
3224         k->vmstate_change(qbus->parent, backend_run);
3225     }
3226 
3227     if (!backend_run) {
3228         virtio_set_status(vdev, vdev->status);
3229     }
3230 }
3231 
3232 void virtio_instance_init_common(Object *proxy_obj, void *data,
3233                                  size_t vdev_size, const char *vdev_name)
3234 {
3235     DeviceState *vdev = data;
3236 
3237     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3238                                        vdev_size, vdev_name, &error_abort,
3239                                        NULL);
3240     qdev_alias_all_properties(vdev, proxy_obj);
3241 }
3242 
3243 void virtio_init(VirtIODevice *vdev, const char *name,
3244                  uint16_t device_id, size_t config_size)
3245 {
3246     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3247     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3248     int i;
3249     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3250 
3251     if (nvectors) {
3252         vdev->vector_queues =
3253             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3254     }
3255 
3256     vdev->start_on_kick = false;
3257     vdev->started = false;
3258     vdev->device_id = device_id;
3259     vdev->status = 0;
3260     qatomic_set(&vdev->isr, 0);
3261     vdev->queue_sel = 0;
3262     vdev->config_vector = VIRTIO_NO_VECTOR;
3263     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3264     vdev->vm_running = runstate_is_running();
3265     vdev->broken = false;
3266     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3267         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3268         vdev->vq[i].vdev = vdev;
3269         vdev->vq[i].queue_index = i;
3270         vdev->vq[i].host_notifier_enabled = false;
3271     }
3272 
3273     vdev->name = name;
3274     vdev->config_len = config_size;
3275     if (vdev->config_len) {
3276         vdev->config = g_malloc0(config_size);
3277     } else {
3278         vdev->config = NULL;
3279     }
3280     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3281             virtio_vmstate_change, vdev);
3282     vdev->device_endian = virtio_default_endian();
3283     vdev->use_guest_notifier_mask = true;
3284 }
3285 
3286 /*
3287  * Only devices that have already been around prior to defining the virtio
3288  * standard support legacy mode; this includes devices not specified in the
3289  * standard. All newer devices conform to the virtio standard only.
3290  */
3291 bool virtio_legacy_allowed(VirtIODevice *vdev)
3292 {
3293     switch (vdev->device_id) {
3294     case VIRTIO_ID_NET:
3295     case VIRTIO_ID_BLOCK:
3296     case VIRTIO_ID_CONSOLE:
3297     case VIRTIO_ID_RNG:
3298     case VIRTIO_ID_BALLOON:
3299     case VIRTIO_ID_RPMSG:
3300     case VIRTIO_ID_SCSI:
3301     case VIRTIO_ID_9P:
3302     case VIRTIO_ID_RPROC_SERIAL:
3303     case VIRTIO_ID_CAIF:
3304         return true;
3305     default:
3306         return false;
3307     }
3308 }
3309 
3310 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3311 {
3312     return vdev->disable_legacy_check;
3313 }
3314 
3315 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3316 {
3317     return vdev->vq[n].vring.desc;
3318 }
3319 
3320 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3321 {
3322     return virtio_queue_get_desc_addr(vdev, n) != 0;
3323 }
3324 
3325 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3326 {
3327     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3328     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3329 
3330     if (k->queue_enabled) {
3331         return k->queue_enabled(qbus->parent, n);
3332     }
3333     return virtio_queue_enabled_legacy(vdev, n);
3334 }
3335 
3336 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3337 {
3338     return vdev->vq[n].vring.avail;
3339 }
3340 
3341 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3342 {
3343     return vdev->vq[n].vring.used;
3344 }
3345 
3346 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3347 {
3348     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3349 }
3350 
3351 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3352 {
3353     int s;
3354 
3355     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3356         return sizeof(struct VRingPackedDescEvent);
3357     }
3358 
3359     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3360     return offsetof(VRingAvail, ring) +
3361         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3362 }
3363 
3364 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3365 {
3366     int s;
3367 
3368     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3369         return sizeof(struct VRingPackedDescEvent);
3370     }
3371 
3372     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3373     return offsetof(VRingUsed, ring) +
3374         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3375 }
3376 
3377 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3378                                                            int n)
3379 {
3380     unsigned int avail, used;
3381 
3382     avail = vdev->vq[n].last_avail_idx;
3383     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3384 
3385     used = vdev->vq[n].used_idx;
3386     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3387 
3388     return avail | used << 16;
3389 }
3390 
3391 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3392                                                       int n)
3393 {
3394     return vdev->vq[n].last_avail_idx;
3395 }
3396 
3397 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3398 {
3399     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3400         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3401     } else {
3402         return virtio_queue_split_get_last_avail_idx(vdev, n);
3403     }
3404 }
3405 
3406 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3407                                                    int n, unsigned int idx)
3408 {
3409     struct VirtQueue *vq = &vdev->vq[n];
3410 
3411     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3412     vq->last_avail_wrap_counter =
3413         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3414     idx >>= 16;
3415     vq->used_idx = idx & 0x7ffff;
3416     vq->used_wrap_counter = !!(idx & 0x8000);
3417 }
3418 
3419 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3420                                                   int n, unsigned int idx)
3421 {
3422         vdev->vq[n].last_avail_idx = idx;
3423         vdev->vq[n].shadow_avail_idx = idx;
3424 }
3425 
3426 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3427                                      unsigned int idx)
3428 {
3429     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3430         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3431     } else {
3432         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3433     }
3434 }
3435 
3436 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3437                                                        int n)
3438 {
3439     /* We don't have a reference like avail idx in shared memory */
3440     return;
3441 }
3442 
3443 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3444                                                       int n)
3445 {
3446     RCU_READ_LOCK_GUARD();
3447     if (vdev->vq[n].vring.desc) {
3448         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3449         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3450     }
3451 }
3452 
3453 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3454 {
3455     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3456         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3457     } else {
3458         virtio_queue_split_restore_last_avail_idx(vdev, n);
3459     }
3460 }
3461 
3462 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3463 {
3464     /* used idx was updated through set_last_avail_idx() */
3465     return;
3466 }
3467 
3468 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3469 {
3470     RCU_READ_LOCK_GUARD();
3471     if (vdev->vq[n].vring.desc) {
3472         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3473     }
3474 }
3475 
3476 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3477 {
3478     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3479         return virtio_queue_packed_update_used_idx(vdev, n);
3480     } else {
3481         return virtio_split_packed_update_used_idx(vdev, n);
3482     }
3483 }
3484 
3485 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3486 {
3487     vdev->vq[n].signalled_used_valid = false;
3488 }
3489 
3490 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3491 {
3492     return vdev->vq + n;
3493 }
3494 
3495 uint16_t virtio_get_queue_index(VirtQueue *vq)
3496 {
3497     return vq->queue_index;
3498 }
3499 
3500 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3501 {
3502     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3503     if (event_notifier_test_and_clear(n)) {
3504         virtio_irq(vq);
3505     }
3506 }
3507 
3508 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3509                                                 bool with_irqfd)
3510 {
3511     if (assign && !with_irqfd) {
3512         event_notifier_set_handler(&vq->guest_notifier,
3513                                    virtio_queue_guest_notifier_read);
3514     } else {
3515         event_notifier_set_handler(&vq->guest_notifier, NULL);
3516     }
3517     if (!assign) {
3518         /* Test and clear notifier before closing it,
3519          * in case poll callback didn't have time to run. */
3520         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3521     }
3522 }
3523 
3524 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3525 {
3526     return &vq->guest_notifier;
3527 }
3528 
3529 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3530 {
3531     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3532     if (event_notifier_test_and_clear(n)) {
3533         virtio_queue_notify_aio_vq(vq);
3534     }
3535 }
3536 
3537 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3538 {
3539     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3540 
3541     virtio_queue_set_notification(vq, 0);
3542 }
3543 
3544 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3545 {
3546     EventNotifier *n = opaque;
3547     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548 
3549     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3550         return false;
3551     }
3552 
3553     return virtio_queue_notify_aio_vq(vq);
3554 }
3555 
3556 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3557 {
3558     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3559 
3560     /* Caller polls once more after this to catch requests that race with us */
3561     virtio_queue_set_notification(vq, 1);
3562 }
3563 
3564 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3565                                                 VirtIOHandleAIOOutput handle_output)
3566 {
3567     if (handle_output) {
3568         vq->handle_aio_output = handle_output;
3569         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3570                                virtio_queue_host_notifier_aio_read,
3571                                virtio_queue_host_notifier_aio_poll);
3572         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3573                                     virtio_queue_host_notifier_aio_poll_begin,
3574                                     virtio_queue_host_notifier_aio_poll_end);
3575     } else {
3576         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3577         /* Test and clear notifier before after disabling event,
3578          * in case poll callback didn't have time to run. */
3579         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3580         vq->handle_aio_output = NULL;
3581     }
3582 }
3583 
3584 void virtio_queue_host_notifier_read(EventNotifier *n)
3585 {
3586     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3587     if (event_notifier_test_and_clear(n)) {
3588         virtio_queue_notify_vq(vq);
3589     }
3590 }
3591 
3592 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3593 {
3594     return &vq->host_notifier;
3595 }
3596 
3597 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3598 {
3599     vq->host_notifier_enabled = enabled;
3600 }
3601 
3602 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3603                                       MemoryRegion *mr, bool assign)
3604 {
3605     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3606     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3607 
3608     if (k->set_host_notifier_mr) {
3609         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3610     }
3611 
3612     return -1;
3613 }
3614 
3615 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3616 {
3617     g_free(vdev->bus_name);
3618     vdev->bus_name = g_strdup(bus_name);
3619 }
3620 
3621 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3622 {
3623     va_list ap;
3624 
3625     va_start(ap, fmt);
3626     error_vreport(fmt, ap);
3627     va_end(ap);
3628 
3629     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3630         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3631         virtio_notify_config(vdev);
3632     }
3633 
3634     vdev->broken = true;
3635 }
3636 
3637 static void virtio_memory_listener_commit(MemoryListener *listener)
3638 {
3639     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3640     int i;
3641 
3642     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3643         if (vdev->vq[i].vring.num == 0) {
3644             break;
3645         }
3646         virtio_init_region_cache(vdev, i);
3647     }
3648 }
3649 
3650 static void virtio_device_realize(DeviceState *dev, Error **errp)
3651 {
3652     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3653     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3654     Error *err = NULL;
3655 
3656     /* Devices should either use vmsd or the load/save methods */
3657     assert(!vdc->vmsd || !vdc->load);
3658 
3659     if (vdc->realize != NULL) {
3660         vdc->realize(dev, &err);
3661         if (err != NULL) {
3662             error_propagate(errp, err);
3663             return;
3664         }
3665     }
3666 
3667     virtio_bus_device_plugged(vdev, &err);
3668     if (err != NULL) {
3669         error_propagate(errp, err);
3670         vdc->unrealize(dev);
3671         return;
3672     }
3673 
3674     vdev->listener.commit = virtio_memory_listener_commit;
3675     memory_listener_register(&vdev->listener, vdev->dma_as);
3676 }
3677 
3678 static void virtio_device_unrealize(DeviceState *dev)
3679 {
3680     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3681     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3682 
3683     memory_listener_unregister(&vdev->listener);
3684     virtio_bus_device_unplugged(vdev);
3685 
3686     if (vdc->unrealize != NULL) {
3687         vdc->unrealize(dev);
3688     }
3689 
3690     g_free(vdev->bus_name);
3691     vdev->bus_name = NULL;
3692 }
3693 
3694 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3695 {
3696     int i;
3697     if (!vdev->vq) {
3698         return;
3699     }
3700 
3701     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3702         if (vdev->vq[i].vring.num == 0) {
3703             break;
3704         }
3705         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3706     }
3707     g_free(vdev->vq);
3708 }
3709 
3710 static void virtio_device_instance_finalize(Object *obj)
3711 {
3712     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3713 
3714     virtio_device_free_virtqueues(vdev);
3715 
3716     g_free(vdev->config);
3717     g_free(vdev->vector_queues);
3718 }
3719 
3720 static Property virtio_properties[] = {
3721     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3722     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3723     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3724     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3725                      disable_legacy_check, false),
3726     DEFINE_PROP_END_OF_LIST(),
3727 };
3728 
3729 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3730 {
3731     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3732     int i, n, r, err;
3733 
3734     memory_region_transaction_begin();
3735     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3736         VirtQueue *vq = &vdev->vq[n];
3737         if (!virtio_queue_get_num(vdev, n)) {
3738             continue;
3739         }
3740         r = virtio_bus_set_host_notifier(qbus, n, true);
3741         if (r < 0) {
3742             err = r;
3743             goto assign_error;
3744         }
3745         event_notifier_set_handler(&vq->host_notifier,
3746                                    virtio_queue_host_notifier_read);
3747     }
3748 
3749     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3750         /* Kick right away to begin processing requests already in vring */
3751         VirtQueue *vq = &vdev->vq[n];
3752         if (!vq->vring.num) {
3753             continue;
3754         }
3755         event_notifier_set(&vq->host_notifier);
3756     }
3757     memory_region_transaction_commit();
3758     return 0;
3759 
3760 assign_error:
3761     i = n; /* save n for a second iteration after transaction is committed. */
3762     while (--n >= 0) {
3763         VirtQueue *vq = &vdev->vq[n];
3764         if (!virtio_queue_get_num(vdev, n)) {
3765             continue;
3766         }
3767 
3768         event_notifier_set_handler(&vq->host_notifier, NULL);
3769         r = virtio_bus_set_host_notifier(qbus, n, false);
3770         assert(r >= 0);
3771     }
3772     memory_region_transaction_commit();
3773 
3774     while (--i >= 0) {
3775         if (!virtio_queue_get_num(vdev, i)) {
3776             continue;
3777         }
3778         virtio_bus_cleanup_host_notifier(qbus, i);
3779     }
3780     return err;
3781 }
3782 
3783 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3784 {
3785     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3786     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3787 
3788     return virtio_bus_start_ioeventfd(vbus);
3789 }
3790 
3791 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3792 {
3793     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3794     int n, r;
3795 
3796     memory_region_transaction_begin();
3797     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3798         VirtQueue *vq = &vdev->vq[n];
3799 
3800         if (!virtio_queue_get_num(vdev, n)) {
3801             continue;
3802         }
3803         event_notifier_set_handler(&vq->host_notifier, NULL);
3804         r = virtio_bus_set_host_notifier(qbus, n, false);
3805         assert(r >= 0);
3806     }
3807     memory_region_transaction_commit();
3808 
3809     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3810         if (!virtio_queue_get_num(vdev, n)) {
3811             continue;
3812         }
3813         virtio_bus_cleanup_host_notifier(qbus, n);
3814     }
3815 }
3816 
3817 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3818 {
3819     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3820     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3821 
3822     return virtio_bus_grab_ioeventfd(vbus);
3823 }
3824 
3825 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3826 {
3827     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3828     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3829 
3830     virtio_bus_release_ioeventfd(vbus);
3831 }
3832 
3833 static void virtio_device_class_init(ObjectClass *klass, void *data)
3834 {
3835     /* Set the default value here. */
3836     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3837     DeviceClass *dc = DEVICE_CLASS(klass);
3838 
3839     dc->realize = virtio_device_realize;
3840     dc->unrealize = virtio_device_unrealize;
3841     dc->bus_type = TYPE_VIRTIO_BUS;
3842     device_class_set_props(dc, virtio_properties);
3843     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3844     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3845 
3846     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3847 }
3848 
3849 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3850 {
3851     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3852     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3853 
3854     return virtio_bus_ioeventfd_enabled(vbus);
3855 }
3856 
3857 static const TypeInfo virtio_device_info = {
3858     .name = TYPE_VIRTIO_DEVICE,
3859     .parent = TYPE_DEVICE,
3860     .instance_size = sizeof(VirtIODevice),
3861     .class_init = virtio_device_class_init,
3862     .instance_finalize = virtio_device_instance_finalize,
3863     .abstract = true,
3864     .class_size = sizeof(VirtioDeviceClass),
3865 };
3866 
3867 static void virtio_register_types(void)
3868 {
3869     type_register_static(&virtio_device_info);
3870 }
3871 
3872 type_init(virtio_register_types)
3873