xref: /qemu/hw/virtio/virtio.c (revision d0fb9657)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31 
32 /*
33  * The alignment to use between consumer and producer parts of vring.
34  * x86 pagesize again. This is the default, used by transports like PCI
35  * which don't provide a means for the guest to tell the host the alignment.
36  */
37 #define VIRTIO_PCI_VRING_ALIGN         4096
38 
39 typedef struct VRingDesc
40 {
41     uint64_t addr;
42     uint32_t len;
43     uint16_t flags;
44     uint16_t next;
45 } VRingDesc;
46 
47 typedef struct VRingPackedDesc {
48     uint64_t addr;
49     uint32_t len;
50     uint16_t id;
51     uint16_t flags;
52 } VRingPackedDesc;
53 
54 typedef struct VRingAvail
55 {
56     uint16_t flags;
57     uint16_t idx;
58     uint16_t ring[];
59 } VRingAvail;
60 
61 typedef struct VRingUsedElem
62 {
63     uint32_t id;
64     uint32_t len;
65 } VRingUsedElem;
66 
67 typedef struct VRingUsed
68 {
69     uint16_t flags;
70     uint16_t idx;
71     VRingUsedElem ring[];
72 } VRingUsed;
73 
74 typedef struct VRingMemoryRegionCaches {
75     struct rcu_head rcu;
76     MemoryRegionCache desc;
77     MemoryRegionCache avail;
78     MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80 
81 typedef struct VRing
82 {
83     unsigned int num;
84     unsigned int num_default;
85     unsigned int align;
86     hwaddr desc;
87     hwaddr avail;
88     hwaddr used;
89     VRingMemoryRegionCaches *caches;
90 } VRing;
91 
92 typedef struct VRingPackedDescEvent {
93     uint16_t off_wrap;
94     uint16_t flags;
95 } VRingPackedDescEvent ;
96 
97 struct VirtQueue
98 {
99     VRing vring;
100     VirtQueueElement *used_elems;
101 
102     /* Next head to pop */
103     uint16_t last_avail_idx;
104     bool last_avail_wrap_counter;
105 
106     /* Last avail_idx read from VQ. */
107     uint16_t shadow_avail_idx;
108     bool shadow_avail_wrap_counter;
109 
110     uint16_t used_idx;
111     bool used_wrap_counter;
112 
113     /* Last used index value we have signalled on */
114     uint16_t signalled_used;
115 
116     /* Last used index value we have signalled on */
117     bool signalled_used_valid;
118 
119     /* Notification enabled? */
120     bool notification;
121 
122     uint16_t queue_index;
123 
124     unsigned int inuse;
125 
126     uint16_t vector;
127     VirtIOHandleOutput handle_output;
128     VirtIOHandleAIOOutput handle_aio_output;
129     VirtIODevice *vdev;
130     EventNotifier guest_notifier;
131     EventNotifier host_notifier;
132     bool host_notifier_enabled;
133     QLIST_ENTRY(VirtQueue) node;
134 };
135 
136 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
137 {
138     if (!caches) {
139         return;
140     }
141 
142     address_space_cache_destroy(&caches->desc);
143     address_space_cache_destroy(&caches->avail);
144     address_space_cache_destroy(&caches->used);
145     g_free(caches);
146 }
147 
148 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
149 {
150     VRingMemoryRegionCaches *caches;
151 
152     caches = qatomic_read(&vq->vring.caches);
153     qatomic_rcu_set(&vq->vring.caches, NULL);
154     if (caches) {
155         call_rcu(caches, virtio_free_region_cache, rcu);
156     }
157 }
158 
159 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
160 {
161     VirtQueue *vq = &vdev->vq[n];
162     VRingMemoryRegionCaches *old = vq->vring.caches;
163     VRingMemoryRegionCaches *new = NULL;
164     hwaddr addr, size;
165     int64_t len;
166     bool packed;
167 
168 
169     addr = vq->vring.desc;
170     if (!addr) {
171         goto out_no_cache;
172     }
173     new = g_new0(VRingMemoryRegionCaches, 1);
174     size = virtio_queue_get_desc_size(vdev, n);
175     packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
176                                    true : false;
177     len = address_space_cache_init(&new->desc, vdev->dma_as,
178                                    addr, size, packed);
179     if (len < size) {
180         virtio_error(vdev, "Cannot map desc");
181         goto err_desc;
182     }
183 
184     size = virtio_queue_get_used_size(vdev, n);
185     len = address_space_cache_init(&new->used, vdev->dma_as,
186                                    vq->vring.used, size, true);
187     if (len < size) {
188         virtio_error(vdev, "Cannot map used");
189         goto err_used;
190     }
191 
192     size = virtio_queue_get_avail_size(vdev, n);
193     len = address_space_cache_init(&new->avail, vdev->dma_as,
194                                    vq->vring.avail, size, false);
195     if (len < size) {
196         virtio_error(vdev, "Cannot map avail");
197         goto err_avail;
198     }
199 
200     qatomic_rcu_set(&vq->vring.caches, new);
201     if (old) {
202         call_rcu(old, virtio_free_region_cache, rcu);
203     }
204     return;
205 
206 err_avail:
207     address_space_cache_destroy(&new->avail);
208 err_used:
209     address_space_cache_destroy(&new->used);
210 err_desc:
211     address_space_cache_destroy(&new->desc);
212 out_no_cache:
213     g_free(new);
214     virtio_virtqueue_reset_region_cache(vq);
215 }
216 
217 /* virt queue functions */
218 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
219 {
220     VRing *vring = &vdev->vq[n].vring;
221 
222     if (!vring->num || !vring->desc || !vring->align) {
223         /* not yet setup -> nothing to do */
224         return;
225     }
226     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
227     vring->used = vring_align(vring->avail +
228                               offsetof(VRingAvail, ring[vring->num]),
229                               vring->align);
230     virtio_init_region_cache(vdev, n);
231 }
232 
233 /* Called within rcu_read_lock().  */
234 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
235                                   MemoryRegionCache *cache, int i)
236 {
237     address_space_read_cached(cache, i * sizeof(VRingDesc),
238                               desc, sizeof(VRingDesc));
239     virtio_tswap64s(vdev, &desc->addr);
240     virtio_tswap32s(vdev, &desc->len);
241     virtio_tswap16s(vdev, &desc->flags);
242     virtio_tswap16s(vdev, &desc->next);
243 }
244 
245 static void vring_packed_event_read(VirtIODevice *vdev,
246                                     MemoryRegionCache *cache,
247                                     VRingPackedDescEvent *e)
248 {
249     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
250     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
251 
252     address_space_read_cached(cache, off_flags, &e->flags,
253                               sizeof(e->flags));
254     /* Make sure flags is seen before off_wrap */
255     smp_rmb();
256     address_space_read_cached(cache, off_off, &e->off_wrap,
257                               sizeof(e->off_wrap));
258     virtio_tswap16s(vdev, &e->off_wrap);
259     virtio_tswap16s(vdev, &e->flags);
260 }
261 
262 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
263                                         MemoryRegionCache *cache,
264                                         uint16_t off_wrap)
265 {
266     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
267 
268     virtio_tswap16s(vdev, &off_wrap);
269     address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
270     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
271 }
272 
273 static void vring_packed_flags_write(VirtIODevice *vdev,
274                                      MemoryRegionCache *cache, uint16_t flags)
275 {
276     hwaddr off = offsetof(VRingPackedDescEvent, flags);
277 
278     virtio_tswap16s(vdev, &flags);
279     address_space_write_cached(cache, off, &flags, sizeof(flags));
280     address_space_cache_invalidate(cache, off, sizeof(flags));
281 }
282 
283 /* Called within rcu_read_lock().  */
284 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
285 {
286     return qatomic_rcu_read(&vq->vring.caches);
287 }
288 
289 /* Called within rcu_read_lock().  */
290 static inline uint16_t vring_avail_flags(VirtQueue *vq)
291 {
292     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
293     hwaddr pa = offsetof(VRingAvail, flags);
294 
295     if (!caches) {
296         return 0;
297     }
298 
299     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
300 }
301 
302 /* Called within rcu_read_lock().  */
303 static inline uint16_t vring_avail_idx(VirtQueue *vq)
304 {
305     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
306     hwaddr pa = offsetof(VRingAvail, idx);
307 
308     if (!caches) {
309         return 0;
310     }
311 
312     vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
313     return vq->shadow_avail_idx;
314 }
315 
316 /* Called within rcu_read_lock().  */
317 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
318 {
319     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
320     hwaddr pa = offsetof(VRingAvail, ring[i]);
321 
322     if (!caches) {
323         return 0;
324     }
325 
326     return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
327 }
328 
329 /* Called within rcu_read_lock().  */
330 static inline uint16_t vring_get_used_event(VirtQueue *vq)
331 {
332     return vring_avail_ring(vq, vq->vring.num);
333 }
334 
335 /* Called within rcu_read_lock().  */
336 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
337                                     int i)
338 {
339     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
340     hwaddr pa = offsetof(VRingUsed, ring[i]);
341 
342     if (!caches) {
343         return;
344     }
345 
346     virtio_tswap32s(vq->vdev, &uelem->id);
347     virtio_tswap32s(vq->vdev, &uelem->len);
348     address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
349     address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
350 }
351 
352 /* Called within rcu_read_lock().  */
353 static uint16_t vring_used_idx(VirtQueue *vq)
354 {
355     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
356     hwaddr pa = offsetof(VRingUsed, idx);
357 
358     if (!caches) {
359         return 0;
360     }
361 
362     return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
363 }
364 
365 /* Called within rcu_read_lock().  */
366 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
367 {
368     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
369     hwaddr pa = offsetof(VRingUsed, idx);
370 
371     if (caches) {
372         virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
373         address_space_cache_invalidate(&caches->used, pa, sizeof(val));
374     }
375 
376     vq->used_idx = val;
377 }
378 
379 /* Called within rcu_read_lock().  */
380 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
381 {
382     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
383     VirtIODevice *vdev = vq->vdev;
384     hwaddr pa = offsetof(VRingUsed, flags);
385     uint16_t flags;
386 
387     if (!caches) {
388         return;
389     }
390 
391     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
392     virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
393     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
394 }
395 
396 /* Called within rcu_read_lock().  */
397 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
398 {
399     VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
400     VirtIODevice *vdev = vq->vdev;
401     hwaddr pa = offsetof(VRingUsed, flags);
402     uint16_t flags;
403 
404     if (!caches) {
405         return;
406     }
407 
408     flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
409     virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
410     address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
411 }
412 
413 /* Called within rcu_read_lock().  */
414 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
415 {
416     VRingMemoryRegionCaches *caches;
417     hwaddr pa;
418     if (!vq->notification) {
419         return;
420     }
421 
422     caches = vring_get_region_caches(vq);
423     if (!caches) {
424         return;
425     }
426 
427     pa = offsetof(VRingUsed, ring[vq->vring.num]);
428     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
429     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
430 }
431 
432 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
433 {
434     RCU_READ_LOCK_GUARD();
435 
436     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
437         vring_set_avail_event(vq, vring_avail_idx(vq));
438     } else if (enable) {
439         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
440     } else {
441         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
442     }
443     if (enable) {
444         /* Expose avail event/used flags before caller checks the avail idx. */
445         smp_mb();
446     }
447 }
448 
449 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
450 {
451     uint16_t off_wrap;
452     VRingPackedDescEvent e;
453     VRingMemoryRegionCaches *caches;
454 
455     RCU_READ_LOCK_GUARD();
456     caches = vring_get_region_caches(vq);
457     if (!caches) {
458         return;
459     }
460 
461     vring_packed_event_read(vq->vdev, &caches->used, &e);
462 
463     if (!enable) {
464         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
465     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
466         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
467         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
468         /* Make sure off_wrap is wrote before flags */
469         smp_wmb();
470         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
471     } else {
472         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
473     }
474 
475     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
476     if (enable) {
477         /* Expose avail event/used flags before caller checks the avail idx. */
478         smp_mb();
479     }
480 }
481 
482 bool virtio_queue_get_notification(VirtQueue *vq)
483 {
484     return vq->notification;
485 }
486 
487 void virtio_queue_set_notification(VirtQueue *vq, int enable)
488 {
489     vq->notification = enable;
490 
491     if (!vq->vring.desc) {
492         return;
493     }
494 
495     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
496         virtio_queue_packed_set_notification(vq, enable);
497     } else {
498         virtio_queue_split_set_notification(vq, enable);
499     }
500 }
501 
502 int virtio_queue_ready(VirtQueue *vq)
503 {
504     return vq->vring.avail != 0;
505 }
506 
507 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
508                                          uint16_t *flags,
509                                          MemoryRegionCache *cache,
510                                          int i)
511 {
512     address_space_read_cached(cache,
513                               i * sizeof(VRingPackedDesc) +
514                               offsetof(VRingPackedDesc, flags),
515                               flags, sizeof(*flags));
516     virtio_tswap16s(vdev, flags);
517 }
518 
519 static void vring_packed_desc_read(VirtIODevice *vdev,
520                                    VRingPackedDesc *desc,
521                                    MemoryRegionCache *cache,
522                                    int i, bool strict_order)
523 {
524     hwaddr off = i * sizeof(VRingPackedDesc);
525 
526     vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
527 
528     if (strict_order) {
529         /* Make sure flags is read before the rest fields. */
530         smp_rmb();
531     }
532 
533     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
534                               &desc->addr, sizeof(desc->addr));
535     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
536                               &desc->id, sizeof(desc->id));
537     address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
538                               &desc->len, sizeof(desc->len));
539     virtio_tswap64s(vdev, &desc->addr);
540     virtio_tswap16s(vdev, &desc->id);
541     virtio_tswap32s(vdev, &desc->len);
542 }
543 
544 static void vring_packed_desc_write_data(VirtIODevice *vdev,
545                                          VRingPackedDesc *desc,
546                                          MemoryRegionCache *cache,
547                                          int i)
548 {
549     hwaddr off_id = i * sizeof(VRingPackedDesc) +
550                     offsetof(VRingPackedDesc, id);
551     hwaddr off_len = i * sizeof(VRingPackedDesc) +
552                     offsetof(VRingPackedDesc, len);
553 
554     virtio_tswap32s(vdev, &desc->len);
555     virtio_tswap16s(vdev, &desc->id);
556     address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
557     address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
558     address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
559     address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
560 }
561 
562 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
563                                           VRingPackedDesc *desc,
564                                           MemoryRegionCache *cache,
565                                           int i)
566 {
567     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
568 
569     virtio_tswap16s(vdev, &desc->flags);
570     address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
571     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
572 }
573 
574 static void vring_packed_desc_write(VirtIODevice *vdev,
575                                     VRingPackedDesc *desc,
576                                     MemoryRegionCache *cache,
577                                     int i, bool strict_order)
578 {
579     vring_packed_desc_write_data(vdev, desc, cache, i);
580     if (strict_order) {
581         /* Make sure data is wrote before flags. */
582         smp_wmb();
583     }
584     vring_packed_desc_write_flags(vdev, desc, cache, i);
585 }
586 
587 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
588 {
589     bool avail, used;
590 
591     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
592     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
593     return (avail != used) && (avail == wrap_counter);
594 }
595 
596 /* Fetch avail_idx from VQ memory only when we really need to know if
597  * guest has added some buffers.
598  * Called within rcu_read_lock().  */
599 static int virtio_queue_empty_rcu(VirtQueue *vq)
600 {
601     if (virtio_device_disabled(vq->vdev)) {
602         return 1;
603     }
604 
605     if (unlikely(!vq->vring.avail)) {
606         return 1;
607     }
608 
609     if (vq->shadow_avail_idx != vq->last_avail_idx) {
610         return 0;
611     }
612 
613     return vring_avail_idx(vq) == vq->last_avail_idx;
614 }
615 
616 static int virtio_queue_split_empty(VirtQueue *vq)
617 {
618     bool empty;
619 
620     if (virtio_device_disabled(vq->vdev)) {
621         return 1;
622     }
623 
624     if (unlikely(!vq->vring.avail)) {
625         return 1;
626     }
627 
628     if (vq->shadow_avail_idx != vq->last_avail_idx) {
629         return 0;
630     }
631 
632     RCU_READ_LOCK_GUARD();
633     empty = vring_avail_idx(vq) == vq->last_avail_idx;
634     return empty;
635 }
636 
637 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
638 {
639     struct VRingPackedDesc desc;
640     VRingMemoryRegionCaches *cache;
641 
642     if (unlikely(!vq->vring.desc)) {
643         return 1;
644     }
645 
646     cache = vring_get_region_caches(vq);
647     if (!cache) {
648         return 1;
649     }
650 
651     vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
652                                  vq->last_avail_idx);
653 
654     return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
655 }
656 
657 static int virtio_queue_packed_empty(VirtQueue *vq)
658 {
659     RCU_READ_LOCK_GUARD();
660     return virtio_queue_packed_empty_rcu(vq);
661 }
662 
663 int virtio_queue_empty(VirtQueue *vq)
664 {
665     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
666         return virtio_queue_packed_empty(vq);
667     } else {
668         return virtio_queue_split_empty(vq);
669     }
670 }
671 
672 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
673                                unsigned int len)
674 {
675     AddressSpace *dma_as = vq->vdev->dma_as;
676     unsigned int offset;
677     int i;
678 
679     offset = 0;
680     for (i = 0; i < elem->in_num; i++) {
681         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
682 
683         dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
684                          elem->in_sg[i].iov_len,
685                          DMA_DIRECTION_FROM_DEVICE, size);
686 
687         offset += size;
688     }
689 
690     for (i = 0; i < elem->out_num; i++)
691         dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
692                          elem->out_sg[i].iov_len,
693                          DMA_DIRECTION_TO_DEVICE,
694                          elem->out_sg[i].iov_len);
695 }
696 
697 /* virtqueue_detach_element:
698  * @vq: The #VirtQueue
699  * @elem: The #VirtQueueElement
700  * @len: number of bytes written
701  *
702  * Detach the element from the virtqueue.  This function is suitable for device
703  * reset or other situations where a #VirtQueueElement is simply freed and will
704  * not be pushed or discarded.
705  */
706 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
707                               unsigned int len)
708 {
709     vq->inuse -= elem->ndescs;
710     virtqueue_unmap_sg(vq, elem, len);
711 }
712 
713 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
714 {
715     vq->last_avail_idx -= num;
716 }
717 
718 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
719 {
720     if (vq->last_avail_idx < num) {
721         vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
722         vq->last_avail_wrap_counter ^= 1;
723     } else {
724         vq->last_avail_idx -= num;
725     }
726 }
727 
728 /* virtqueue_unpop:
729  * @vq: The #VirtQueue
730  * @elem: The #VirtQueueElement
731  * @len: number of bytes written
732  *
733  * Pretend the most recent element wasn't popped from the virtqueue.  The next
734  * call to virtqueue_pop() will refetch the element.
735  */
736 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
737                      unsigned int len)
738 {
739 
740     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
741         virtqueue_packed_rewind(vq, 1);
742     } else {
743         virtqueue_split_rewind(vq, 1);
744     }
745 
746     virtqueue_detach_element(vq, elem, len);
747 }
748 
749 /* virtqueue_rewind:
750  * @vq: The #VirtQueue
751  * @num: Number of elements to push back
752  *
753  * Pretend that elements weren't popped from the virtqueue.  The next
754  * virtqueue_pop() will refetch the oldest element.
755  *
756  * Use virtqueue_unpop() instead if you have a VirtQueueElement.
757  *
758  * Returns: true on success, false if @num is greater than the number of in use
759  * elements.
760  */
761 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
762 {
763     if (num > vq->inuse) {
764         return false;
765     }
766 
767     vq->inuse -= num;
768     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
769         virtqueue_packed_rewind(vq, num);
770     } else {
771         virtqueue_split_rewind(vq, num);
772     }
773     return true;
774 }
775 
776 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
777                     unsigned int len, unsigned int idx)
778 {
779     VRingUsedElem uelem;
780 
781     if (unlikely(!vq->vring.used)) {
782         return;
783     }
784 
785     idx = (idx + vq->used_idx) % vq->vring.num;
786 
787     uelem.id = elem->index;
788     uelem.len = len;
789     vring_used_write(vq, &uelem, idx);
790 }
791 
792 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
793                                   unsigned int len, unsigned int idx)
794 {
795     vq->used_elems[idx].index = elem->index;
796     vq->used_elems[idx].len = len;
797     vq->used_elems[idx].ndescs = elem->ndescs;
798 }
799 
800 static void virtqueue_packed_fill_desc(VirtQueue *vq,
801                                        const VirtQueueElement *elem,
802                                        unsigned int idx,
803                                        bool strict_order)
804 {
805     uint16_t head;
806     VRingMemoryRegionCaches *caches;
807     VRingPackedDesc desc = {
808         .id = elem->index,
809         .len = elem->len,
810     };
811     bool wrap_counter = vq->used_wrap_counter;
812 
813     if (unlikely(!vq->vring.desc)) {
814         return;
815     }
816 
817     head = vq->used_idx + idx;
818     if (head >= vq->vring.num) {
819         head -= vq->vring.num;
820         wrap_counter ^= 1;
821     }
822     if (wrap_counter) {
823         desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
824         desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
825     } else {
826         desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
827         desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
828     }
829 
830     caches = vring_get_region_caches(vq);
831     if (!caches) {
832         return;
833     }
834 
835     vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
836 }
837 
838 /* Called within rcu_read_lock().  */
839 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
840                     unsigned int len, unsigned int idx)
841 {
842     trace_virtqueue_fill(vq, elem, len, idx);
843 
844     virtqueue_unmap_sg(vq, elem, len);
845 
846     if (virtio_device_disabled(vq->vdev)) {
847         return;
848     }
849 
850     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
851         virtqueue_packed_fill(vq, elem, len, idx);
852     } else {
853         virtqueue_split_fill(vq, elem, len, idx);
854     }
855 }
856 
857 /* Called within rcu_read_lock().  */
858 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
859 {
860     uint16_t old, new;
861 
862     if (unlikely(!vq->vring.used)) {
863         return;
864     }
865 
866     /* Make sure buffer is written before we update index. */
867     smp_wmb();
868     trace_virtqueue_flush(vq, count);
869     old = vq->used_idx;
870     new = old + count;
871     vring_used_idx_set(vq, new);
872     vq->inuse -= count;
873     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
874         vq->signalled_used_valid = false;
875 }
876 
877 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
878 {
879     unsigned int i, ndescs = 0;
880 
881     if (unlikely(!vq->vring.desc)) {
882         return;
883     }
884 
885     for (i = 1; i < count; i++) {
886         virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
887         ndescs += vq->used_elems[i].ndescs;
888     }
889     virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
890     ndescs += vq->used_elems[0].ndescs;
891 
892     vq->inuse -= ndescs;
893     vq->used_idx += ndescs;
894     if (vq->used_idx >= vq->vring.num) {
895         vq->used_idx -= vq->vring.num;
896         vq->used_wrap_counter ^= 1;
897     }
898 }
899 
900 void virtqueue_flush(VirtQueue *vq, unsigned int count)
901 {
902     if (virtio_device_disabled(vq->vdev)) {
903         vq->inuse -= count;
904         return;
905     }
906 
907     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
908         virtqueue_packed_flush(vq, count);
909     } else {
910         virtqueue_split_flush(vq, count);
911     }
912 }
913 
914 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
915                     unsigned int len)
916 {
917     RCU_READ_LOCK_GUARD();
918     virtqueue_fill(vq, elem, len, 0);
919     virtqueue_flush(vq, 1);
920 }
921 
922 /* Called within rcu_read_lock().  */
923 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
924 {
925     uint16_t num_heads = vring_avail_idx(vq) - idx;
926 
927     /* Check it isn't doing very strange things with descriptor numbers. */
928     if (num_heads > vq->vring.num) {
929         virtio_error(vq->vdev, "Guest moved used index from %u to %u",
930                      idx, vq->shadow_avail_idx);
931         return -EINVAL;
932     }
933     /* On success, callers read a descriptor at vq->last_avail_idx.
934      * Make sure descriptor read does not bypass avail index read. */
935     if (num_heads) {
936         smp_rmb();
937     }
938 
939     return num_heads;
940 }
941 
942 /* Called within rcu_read_lock().  */
943 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
944                                unsigned int *head)
945 {
946     /* Grab the next descriptor number they're advertising, and increment
947      * the index we've seen. */
948     *head = vring_avail_ring(vq, idx % vq->vring.num);
949 
950     /* If their number is silly, that's a fatal mistake. */
951     if (*head >= vq->vring.num) {
952         virtio_error(vq->vdev, "Guest says index %u is available", *head);
953         return false;
954     }
955 
956     return true;
957 }
958 
959 enum {
960     VIRTQUEUE_READ_DESC_ERROR = -1,
961     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
962     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
963 };
964 
965 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
966                                           MemoryRegionCache *desc_cache,
967                                           unsigned int max, unsigned int *next)
968 {
969     /* If this descriptor says it doesn't chain, we're done. */
970     if (!(desc->flags & VRING_DESC_F_NEXT)) {
971         return VIRTQUEUE_READ_DESC_DONE;
972     }
973 
974     /* Check they're not leading us off end of descriptors. */
975     *next = desc->next;
976     /* Make sure compiler knows to grab that: we don't want it changing! */
977     smp_wmb();
978 
979     if (*next >= max) {
980         virtio_error(vdev, "Desc next is %u", *next);
981         return VIRTQUEUE_READ_DESC_ERROR;
982     }
983 
984     vring_split_desc_read(vdev, desc, desc_cache, *next);
985     return VIRTQUEUE_READ_DESC_MORE;
986 }
987 
988 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
989                             unsigned int *in_bytes, unsigned int *out_bytes,
990                             unsigned max_in_bytes, unsigned max_out_bytes)
991 {
992     VirtIODevice *vdev = vq->vdev;
993     unsigned int max, idx;
994     unsigned int total_bufs, in_total, out_total;
995     VRingMemoryRegionCaches *caches;
996     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
997     int64_t len = 0;
998     int rc;
999 
1000     RCU_READ_LOCK_GUARD();
1001 
1002     idx = vq->last_avail_idx;
1003     total_bufs = in_total = out_total = 0;
1004 
1005     max = vq->vring.num;
1006     caches = vring_get_region_caches(vq);
1007     if (!caches) {
1008         goto err;
1009     }
1010 
1011     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
1012         MemoryRegionCache *desc_cache = &caches->desc;
1013         unsigned int num_bufs;
1014         VRingDesc desc;
1015         unsigned int i;
1016 
1017         num_bufs = total_bufs;
1018 
1019         if (!virtqueue_get_head(vq, idx++, &i)) {
1020             goto err;
1021         }
1022 
1023         vring_split_desc_read(vdev, &desc, desc_cache, i);
1024 
1025         if (desc.flags & VRING_DESC_F_INDIRECT) {
1026             if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1027                 virtio_error(vdev, "Invalid size for indirect buffer table");
1028                 goto err;
1029             }
1030 
1031             /* If we've got too many, that implies a descriptor loop. */
1032             if (num_bufs >= max) {
1033                 virtio_error(vdev, "Looped descriptor");
1034                 goto err;
1035             }
1036 
1037             /* loop over the indirect descriptor table */
1038             len = address_space_cache_init(&indirect_desc_cache,
1039                                            vdev->dma_as,
1040                                            desc.addr, desc.len, false);
1041             desc_cache = &indirect_desc_cache;
1042             if (len < desc.len) {
1043                 virtio_error(vdev, "Cannot map indirect buffer");
1044                 goto err;
1045             }
1046 
1047             max = desc.len / sizeof(VRingDesc);
1048             num_bufs = i = 0;
1049             vring_split_desc_read(vdev, &desc, desc_cache, i);
1050         }
1051 
1052         do {
1053             /* If we've got too many, that implies a descriptor loop. */
1054             if (++num_bufs > max) {
1055                 virtio_error(vdev, "Looped descriptor");
1056                 goto err;
1057             }
1058 
1059             if (desc.flags & VRING_DESC_F_WRITE) {
1060                 in_total += desc.len;
1061             } else {
1062                 out_total += desc.len;
1063             }
1064             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1065                 goto done;
1066             }
1067 
1068             rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1069         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1070 
1071         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1072             goto err;
1073         }
1074 
1075         if (desc_cache == &indirect_desc_cache) {
1076             address_space_cache_destroy(&indirect_desc_cache);
1077             total_bufs++;
1078         } else {
1079             total_bufs = num_bufs;
1080         }
1081     }
1082 
1083     if (rc < 0) {
1084         goto err;
1085     }
1086 
1087 done:
1088     address_space_cache_destroy(&indirect_desc_cache);
1089     if (in_bytes) {
1090         *in_bytes = in_total;
1091     }
1092     if (out_bytes) {
1093         *out_bytes = out_total;
1094     }
1095     return;
1096 
1097 err:
1098     in_total = out_total = 0;
1099     goto done;
1100 }
1101 
1102 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1103                                            VRingPackedDesc *desc,
1104                                            MemoryRegionCache
1105                                            *desc_cache,
1106                                            unsigned int max,
1107                                            unsigned int *next,
1108                                            bool indirect)
1109 {
1110     /* If this descriptor says it doesn't chain, we're done. */
1111     if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1112         return VIRTQUEUE_READ_DESC_DONE;
1113     }
1114 
1115     ++*next;
1116     if (*next == max) {
1117         if (indirect) {
1118             return VIRTQUEUE_READ_DESC_DONE;
1119         } else {
1120             (*next) -= vq->vring.num;
1121         }
1122     }
1123 
1124     vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1125     return VIRTQUEUE_READ_DESC_MORE;
1126 }
1127 
1128 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1129                                              unsigned int *in_bytes,
1130                                              unsigned int *out_bytes,
1131                                              unsigned max_in_bytes,
1132                                              unsigned max_out_bytes)
1133 {
1134     VirtIODevice *vdev = vq->vdev;
1135     unsigned int max, idx;
1136     unsigned int total_bufs, in_total, out_total;
1137     MemoryRegionCache *desc_cache;
1138     VRingMemoryRegionCaches *caches;
1139     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1140     int64_t len = 0;
1141     VRingPackedDesc desc;
1142     bool wrap_counter;
1143 
1144     RCU_READ_LOCK_GUARD();
1145     idx = vq->last_avail_idx;
1146     wrap_counter = vq->last_avail_wrap_counter;
1147     total_bufs = in_total = out_total = 0;
1148 
1149     max = vq->vring.num;
1150     caches = vring_get_region_caches(vq);
1151     if (!caches) {
1152         goto err;
1153     }
1154 
1155     for (;;) {
1156         unsigned int num_bufs = total_bufs;
1157         unsigned int i = idx;
1158         int rc;
1159 
1160         desc_cache = &caches->desc;
1161         vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1162         if (!is_desc_avail(desc.flags, wrap_counter)) {
1163             break;
1164         }
1165 
1166         if (desc.flags & VRING_DESC_F_INDIRECT) {
1167             if (desc.len % sizeof(VRingPackedDesc)) {
1168                 virtio_error(vdev, "Invalid size for indirect buffer table");
1169                 goto err;
1170             }
1171 
1172             /* If we've got too many, that implies a descriptor loop. */
1173             if (num_bufs >= max) {
1174                 virtio_error(vdev, "Looped descriptor");
1175                 goto err;
1176             }
1177 
1178             /* loop over the indirect descriptor table */
1179             len = address_space_cache_init(&indirect_desc_cache,
1180                                            vdev->dma_as,
1181                                            desc.addr, desc.len, false);
1182             desc_cache = &indirect_desc_cache;
1183             if (len < desc.len) {
1184                 virtio_error(vdev, "Cannot map indirect buffer");
1185                 goto err;
1186             }
1187 
1188             max = desc.len / sizeof(VRingPackedDesc);
1189             num_bufs = i = 0;
1190             vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1191         }
1192 
1193         do {
1194             /* If we've got too many, that implies a descriptor loop. */
1195             if (++num_bufs > max) {
1196                 virtio_error(vdev, "Looped descriptor");
1197                 goto err;
1198             }
1199 
1200             if (desc.flags & VRING_DESC_F_WRITE) {
1201                 in_total += desc.len;
1202             } else {
1203                 out_total += desc.len;
1204             }
1205             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1206                 goto done;
1207             }
1208 
1209             rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1210                                                  &i, desc_cache ==
1211                                                  &indirect_desc_cache);
1212         } while (rc == VIRTQUEUE_READ_DESC_MORE);
1213 
1214         if (desc_cache == &indirect_desc_cache) {
1215             address_space_cache_destroy(&indirect_desc_cache);
1216             total_bufs++;
1217             idx++;
1218         } else {
1219             idx += num_bufs - total_bufs;
1220             total_bufs = num_bufs;
1221         }
1222 
1223         if (idx >= vq->vring.num) {
1224             idx -= vq->vring.num;
1225             wrap_counter ^= 1;
1226         }
1227     }
1228 
1229     /* Record the index and wrap counter for a kick we want */
1230     vq->shadow_avail_idx = idx;
1231     vq->shadow_avail_wrap_counter = wrap_counter;
1232 done:
1233     address_space_cache_destroy(&indirect_desc_cache);
1234     if (in_bytes) {
1235         *in_bytes = in_total;
1236     }
1237     if (out_bytes) {
1238         *out_bytes = out_total;
1239     }
1240     return;
1241 
1242 err:
1243     in_total = out_total = 0;
1244     goto done;
1245 }
1246 
1247 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1248                                unsigned int *out_bytes,
1249                                unsigned max_in_bytes, unsigned max_out_bytes)
1250 {
1251     uint16_t desc_size;
1252     VRingMemoryRegionCaches *caches;
1253 
1254     if (unlikely(!vq->vring.desc)) {
1255         goto err;
1256     }
1257 
1258     caches = vring_get_region_caches(vq);
1259     if (!caches) {
1260         goto err;
1261     }
1262 
1263     desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1264                                 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1265     if (caches->desc.len < vq->vring.num * desc_size) {
1266         virtio_error(vq->vdev, "Cannot map descriptor ring");
1267         goto err;
1268     }
1269 
1270     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1271         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1272                                          max_in_bytes, max_out_bytes);
1273     } else {
1274         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1275                                         max_in_bytes, max_out_bytes);
1276     }
1277 
1278     return;
1279 err:
1280     if (in_bytes) {
1281         *in_bytes = 0;
1282     }
1283     if (out_bytes) {
1284         *out_bytes = 0;
1285     }
1286 }
1287 
1288 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1289                           unsigned int out_bytes)
1290 {
1291     unsigned int in_total, out_total;
1292 
1293     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1294     return in_bytes <= in_total && out_bytes <= out_total;
1295 }
1296 
1297 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1298                                hwaddr *addr, struct iovec *iov,
1299                                unsigned int max_num_sg, bool is_write,
1300                                hwaddr pa, size_t sz)
1301 {
1302     bool ok = false;
1303     unsigned num_sg = *p_num_sg;
1304     assert(num_sg <= max_num_sg);
1305 
1306     if (!sz) {
1307         virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1308         goto out;
1309     }
1310 
1311     while (sz) {
1312         hwaddr len = sz;
1313 
1314         if (num_sg == max_num_sg) {
1315             virtio_error(vdev, "virtio: too many write descriptors in "
1316                                "indirect table");
1317             goto out;
1318         }
1319 
1320         iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1321                                               is_write ?
1322                                               DMA_DIRECTION_FROM_DEVICE :
1323                                               DMA_DIRECTION_TO_DEVICE);
1324         if (!iov[num_sg].iov_base) {
1325             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1326             goto out;
1327         }
1328 
1329         iov[num_sg].iov_len = len;
1330         addr[num_sg] = pa;
1331 
1332         sz -= len;
1333         pa += len;
1334         num_sg++;
1335     }
1336     ok = true;
1337 
1338 out:
1339     *p_num_sg = num_sg;
1340     return ok;
1341 }
1342 
1343 /* Only used by error code paths before we have a VirtQueueElement (therefore
1344  * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
1345  * yet.
1346  */
1347 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1348                                     struct iovec *iov)
1349 {
1350     unsigned int i;
1351 
1352     for (i = 0; i < out_num + in_num; i++) {
1353         int is_write = i >= out_num;
1354 
1355         cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1356         iov++;
1357     }
1358 }
1359 
1360 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1361                                 hwaddr *addr, unsigned int num_sg,
1362                                 bool is_write)
1363 {
1364     unsigned int i;
1365     hwaddr len;
1366 
1367     for (i = 0; i < num_sg; i++) {
1368         len = sg[i].iov_len;
1369         sg[i].iov_base = dma_memory_map(vdev->dma_as,
1370                                         addr[i], &len, is_write ?
1371                                         DMA_DIRECTION_FROM_DEVICE :
1372                                         DMA_DIRECTION_TO_DEVICE);
1373         if (!sg[i].iov_base) {
1374             error_report("virtio: error trying to map MMIO memory");
1375             exit(1);
1376         }
1377         if (len != sg[i].iov_len) {
1378             error_report("virtio: unexpected memory split");
1379             exit(1);
1380         }
1381     }
1382 }
1383 
1384 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1385 {
1386     virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1387     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1388                                                                         false);
1389 }
1390 
1391 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1392 {
1393     VirtQueueElement *elem;
1394     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1395     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1396     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1397     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1398     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1399     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1400 
1401     assert(sz >= sizeof(VirtQueueElement));
1402     elem = g_malloc(out_sg_end);
1403     trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1404     elem->out_num = out_num;
1405     elem->in_num = in_num;
1406     elem->in_addr = (void *)elem + in_addr_ofs;
1407     elem->out_addr = (void *)elem + out_addr_ofs;
1408     elem->in_sg = (void *)elem + in_sg_ofs;
1409     elem->out_sg = (void *)elem + out_sg_ofs;
1410     return elem;
1411 }
1412 
1413 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1414 {
1415     unsigned int i, head, max;
1416     VRingMemoryRegionCaches *caches;
1417     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1418     MemoryRegionCache *desc_cache;
1419     int64_t len;
1420     VirtIODevice *vdev = vq->vdev;
1421     VirtQueueElement *elem = NULL;
1422     unsigned out_num, in_num, elem_entries;
1423     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1424     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1425     VRingDesc desc;
1426     int rc;
1427 
1428     RCU_READ_LOCK_GUARD();
1429     if (virtio_queue_empty_rcu(vq)) {
1430         goto done;
1431     }
1432     /* Needed after virtio_queue_empty(), see comment in
1433      * virtqueue_num_heads(). */
1434     smp_rmb();
1435 
1436     /* When we start there are none of either input nor output. */
1437     out_num = in_num = elem_entries = 0;
1438 
1439     max = vq->vring.num;
1440 
1441     if (vq->inuse >= vq->vring.num) {
1442         virtio_error(vdev, "Virtqueue size exceeded");
1443         goto done;
1444     }
1445 
1446     if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1447         goto done;
1448     }
1449 
1450     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1451         vring_set_avail_event(vq, vq->last_avail_idx);
1452     }
1453 
1454     i = head;
1455 
1456     caches = vring_get_region_caches(vq);
1457     if (!caches) {
1458         virtio_error(vdev, "Region caches not initialized");
1459         goto done;
1460     }
1461 
1462     if (caches->desc.len < max * sizeof(VRingDesc)) {
1463         virtio_error(vdev, "Cannot map descriptor ring");
1464         goto done;
1465     }
1466 
1467     desc_cache = &caches->desc;
1468     vring_split_desc_read(vdev, &desc, desc_cache, i);
1469     if (desc.flags & VRING_DESC_F_INDIRECT) {
1470         if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1471             virtio_error(vdev, "Invalid size for indirect buffer table");
1472             goto done;
1473         }
1474 
1475         /* loop over the indirect descriptor table */
1476         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1477                                        desc.addr, desc.len, false);
1478         desc_cache = &indirect_desc_cache;
1479         if (len < desc.len) {
1480             virtio_error(vdev, "Cannot map indirect buffer");
1481             goto done;
1482         }
1483 
1484         max = desc.len / sizeof(VRingDesc);
1485         i = 0;
1486         vring_split_desc_read(vdev, &desc, desc_cache, i);
1487     }
1488 
1489     /* Collect all the descriptors */
1490     do {
1491         bool map_ok;
1492 
1493         if (desc.flags & VRING_DESC_F_WRITE) {
1494             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1495                                         iov + out_num,
1496                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1497                                         desc.addr, desc.len);
1498         } else {
1499             if (in_num) {
1500                 virtio_error(vdev, "Incorrect order for descriptors");
1501                 goto err_undo_map;
1502             }
1503             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1504                                         VIRTQUEUE_MAX_SIZE, false,
1505                                         desc.addr, desc.len);
1506         }
1507         if (!map_ok) {
1508             goto err_undo_map;
1509         }
1510 
1511         /* If we've got too many, that implies a descriptor loop. */
1512         if (++elem_entries > max) {
1513             virtio_error(vdev, "Looped descriptor");
1514             goto err_undo_map;
1515         }
1516 
1517         rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1518     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1519 
1520     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1521         goto err_undo_map;
1522     }
1523 
1524     /* Now copy what we have collected and mapped */
1525     elem = virtqueue_alloc_element(sz, out_num, in_num);
1526     elem->index = head;
1527     elem->ndescs = 1;
1528     for (i = 0; i < out_num; i++) {
1529         elem->out_addr[i] = addr[i];
1530         elem->out_sg[i] = iov[i];
1531     }
1532     for (i = 0; i < in_num; i++) {
1533         elem->in_addr[i] = addr[out_num + i];
1534         elem->in_sg[i] = iov[out_num + i];
1535     }
1536 
1537     vq->inuse++;
1538 
1539     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1540 done:
1541     address_space_cache_destroy(&indirect_desc_cache);
1542 
1543     return elem;
1544 
1545 err_undo_map:
1546     virtqueue_undo_map_desc(out_num, in_num, iov);
1547     goto done;
1548 }
1549 
1550 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1551 {
1552     unsigned int i, max;
1553     VRingMemoryRegionCaches *caches;
1554     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1555     MemoryRegionCache *desc_cache;
1556     int64_t len;
1557     VirtIODevice *vdev = vq->vdev;
1558     VirtQueueElement *elem = NULL;
1559     unsigned out_num, in_num, elem_entries;
1560     hwaddr addr[VIRTQUEUE_MAX_SIZE];
1561     struct iovec iov[VIRTQUEUE_MAX_SIZE];
1562     VRingPackedDesc desc;
1563     uint16_t id;
1564     int rc;
1565 
1566     RCU_READ_LOCK_GUARD();
1567     if (virtio_queue_packed_empty_rcu(vq)) {
1568         goto done;
1569     }
1570 
1571     /* When we start there are none of either input nor output. */
1572     out_num = in_num = elem_entries = 0;
1573 
1574     max = vq->vring.num;
1575 
1576     if (vq->inuse >= vq->vring.num) {
1577         virtio_error(vdev, "Virtqueue size exceeded");
1578         goto done;
1579     }
1580 
1581     i = vq->last_avail_idx;
1582 
1583     caches = vring_get_region_caches(vq);
1584     if (!caches) {
1585         virtio_error(vdev, "Region caches not initialized");
1586         goto done;
1587     }
1588 
1589     if (caches->desc.len < max * sizeof(VRingDesc)) {
1590         virtio_error(vdev, "Cannot map descriptor ring");
1591         goto done;
1592     }
1593 
1594     desc_cache = &caches->desc;
1595     vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1596     id = desc.id;
1597     if (desc.flags & VRING_DESC_F_INDIRECT) {
1598         if (desc.len % sizeof(VRingPackedDesc)) {
1599             virtio_error(vdev, "Invalid size for indirect buffer table");
1600             goto done;
1601         }
1602 
1603         /* loop over the indirect descriptor table */
1604         len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1605                                        desc.addr, desc.len, false);
1606         desc_cache = &indirect_desc_cache;
1607         if (len < desc.len) {
1608             virtio_error(vdev, "Cannot map indirect buffer");
1609             goto done;
1610         }
1611 
1612         max = desc.len / sizeof(VRingPackedDesc);
1613         i = 0;
1614         vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1615     }
1616 
1617     /* Collect all the descriptors */
1618     do {
1619         bool map_ok;
1620 
1621         if (desc.flags & VRING_DESC_F_WRITE) {
1622             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1623                                         iov + out_num,
1624                                         VIRTQUEUE_MAX_SIZE - out_num, true,
1625                                         desc.addr, desc.len);
1626         } else {
1627             if (in_num) {
1628                 virtio_error(vdev, "Incorrect order for descriptors");
1629                 goto err_undo_map;
1630             }
1631             map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1632                                         VIRTQUEUE_MAX_SIZE, false,
1633                                         desc.addr, desc.len);
1634         }
1635         if (!map_ok) {
1636             goto err_undo_map;
1637         }
1638 
1639         /* If we've got too many, that implies a descriptor loop. */
1640         if (++elem_entries > max) {
1641             virtio_error(vdev, "Looped descriptor");
1642             goto err_undo_map;
1643         }
1644 
1645         rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1646                                              desc_cache ==
1647                                              &indirect_desc_cache);
1648     } while (rc == VIRTQUEUE_READ_DESC_MORE);
1649 
1650     /* Now copy what we have collected and mapped */
1651     elem = virtqueue_alloc_element(sz, out_num, in_num);
1652     for (i = 0; i < out_num; i++) {
1653         elem->out_addr[i] = addr[i];
1654         elem->out_sg[i] = iov[i];
1655     }
1656     for (i = 0; i < in_num; i++) {
1657         elem->in_addr[i] = addr[out_num + i];
1658         elem->in_sg[i] = iov[out_num + i];
1659     }
1660 
1661     elem->index = id;
1662     elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1663     vq->last_avail_idx += elem->ndescs;
1664     vq->inuse += elem->ndescs;
1665 
1666     if (vq->last_avail_idx >= vq->vring.num) {
1667         vq->last_avail_idx -= vq->vring.num;
1668         vq->last_avail_wrap_counter ^= 1;
1669     }
1670 
1671     vq->shadow_avail_idx = vq->last_avail_idx;
1672     vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1673 
1674     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1675 done:
1676     address_space_cache_destroy(&indirect_desc_cache);
1677 
1678     return elem;
1679 
1680 err_undo_map:
1681     virtqueue_undo_map_desc(out_num, in_num, iov);
1682     goto done;
1683 }
1684 
1685 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1686 {
1687     if (virtio_device_disabled(vq->vdev)) {
1688         return NULL;
1689     }
1690 
1691     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1692         return virtqueue_packed_pop(vq, sz);
1693     } else {
1694         return virtqueue_split_pop(vq, sz);
1695     }
1696 }
1697 
1698 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1699 {
1700     VRingMemoryRegionCaches *caches;
1701     MemoryRegionCache *desc_cache;
1702     unsigned int dropped = 0;
1703     VirtQueueElement elem = {};
1704     VirtIODevice *vdev = vq->vdev;
1705     VRingPackedDesc desc;
1706 
1707     caches = vring_get_region_caches(vq);
1708     if (!caches) {
1709         return 0;
1710     }
1711 
1712     desc_cache = &caches->desc;
1713 
1714     virtio_queue_set_notification(vq, 0);
1715 
1716     while (vq->inuse < vq->vring.num) {
1717         unsigned int idx = vq->last_avail_idx;
1718         /*
1719          * works similar to virtqueue_pop but does not map buffers
1720          * and does not allocate any memory.
1721          */
1722         vring_packed_desc_read(vdev, &desc, desc_cache,
1723                                vq->last_avail_idx , true);
1724         if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1725             break;
1726         }
1727         elem.index = desc.id;
1728         elem.ndescs = 1;
1729         while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1730                                                vq->vring.num, &idx, false)) {
1731             ++elem.ndescs;
1732         }
1733         /*
1734          * immediately push the element, nothing to unmap
1735          * as both in_num and out_num are set to 0.
1736          */
1737         virtqueue_push(vq, &elem, 0);
1738         dropped++;
1739         vq->last_avail_idx += elem.ndescs;
1740         if (vq->last_avail_idx >= vq->vring.num) {
1741             vq->last_avail_idx -= vq->vring.num;
1742             vq->last_avail_wrap_counter ^= 1;
1743         }
1744     }
1745 
1746     return dropped;
1747 }
1748 
1749 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1750 {
1751     unsigned int dropped = 0;
1752     VirtQueueElement elem = {};
1753     VirtIODevice *vdev = vq->vdev;
1754     bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1755 
1756     while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1757         /* works similar to virtqueue_pop but does not map buffers
1758         * and does not allocate any memory */
1759         smp_rmb();
1760         if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1761             break;
1762         }
1763         vq->inuse++;
1764         vq->last_avail_idx++;
1765         if (fEventIdx) {
1766             vring_set_avail_event(vq, vq->last_avail_idx);
1767         }
1768         /* immediately push the element, nothing to unmap
1769          * as both in_num and out_num are set to 0 */
1770         virtqueue_push(vq, &elem, 0);
1771         dropped++;
1772     }
1773 
1774     return dropped;
1775 }
1776 
1777 /* virtqueue_drop_all:
1778  * @vq: The #VirtQueue
1779  * Drops all queued buffers and indicates them to the guest
1780  * as if they are done. Useful when buffers can not be
1781  * processed but must be returned to the guest.
1782  */
1783 unsigned int virtqueue_drop_all(VirtQueue *vq)
1784 {
1785     struct VirtIODevice *vdev = vq->vdev;
1786 
1787     if (virtio_device_disabled(vq->vdev)) {
1788         return 0;
1789     }
1790 
1791     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1792         return virtqueue_packed_drop_all(vq);
1793     } else {
1794         return virtqueue_split_drop_all(vq);
1795     }
1796 }
1797 
1798 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1799  * it is what QEMU has always done by mistake.  We can change it sooner
1800  * or later by bumping the version number of the affected vm states.
1801  * In the meanwhile, since the in-memory layout of VirtQueueElement
1802  * has changed, we need to marshal to and from the layout that was
1803  * used before the change.
1804  */
1805 typedef struct VirtQueueElementOld {
1806     unsigned int index;
1807     unsigned int out_num;
1808     unsigned int in_num;
1809     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1810     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1811     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1812     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1813 } VirtQueueElementOld;
1814 
1815 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1816 {
1817     VirtQueueElement *elem;
1818     VirtQueueElementOld data;
1819     int i;
1820 
1821     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1822 
1823     /* TODO: teach all callers that this can fail, and return failure instead
1824      * of asserting here.
1825      * This is just one thing (there are probably more) that must be
1826      * fixed before we can allow NDEBUG compilation.
1827      */
1828     assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1829     assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1830 
1831     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1832     elem->index = data.index;
1833 
1834     for (i = 0; i < elem->in_num; i++) {
1835         elem->in_addr[i] = data.in_addr[i];
1836     }
1837 
1838     for (i = 0; i < elem->out_num; i++) {
1839         elem->out_addr[i] = data.out_addr[i];
1840     }
1841 
1842     for (i = 0; i < elem->in_num; i++) {
1843         /* Base is overwritten by virtqueue_map.  */
1844         elem->in_sg[i].iov_base = 0;
1845         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1846     }
1847 
1848     for (i = 0; i < elem->out_num; i++) {
1849         /* Base is overwritten by virtqueue_map.  */
1850         elem->out_sg[i].iov_base = 0;
1851         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1852     }
1853 
1854     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1855         qemu_get_be32s(f, &elem->ndescs);
1856     }
1857 
1858     virtqueue_map(vdev, elem);
1859     return elem;
1860 }
1861 
1862 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1863                                 VirtQueueElement *elem)
1864 {
1865     VirtQueueElementOld data;
1866     int i;
1867 
1868     memset(&data, 0, sizeof(data));
1869     data.index = elem->index;
1870     data.in_num = elem->in_num;
1871     data.out_num = elem->out_num;
1872 
1873     for (i = 0; i < elem->in_num; i++) {
1874         data.in_addr[i] = elem->in_addr[i];
1875     }
1876 
1877     for (i = 0; i < elem->out_num; i++) {
1878         data.out_addr[i] = elem->out_addr[i];
1879     }
1880 
1881     for (i = 0; i < elem->in_num; i++) {
1882         /* Base is overwritten by virtqueue_map when loading.  Do not
1883          * save it, as it would leak the QEMU address space layout.  */
1884         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1885     }
1886 
1887     for (i = 0; i < elem->out_num; i++) {
1888         /* Do not save iov_base as above.  */
1889         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1890     }
1891 
1892     if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1893         qemu_put_be32s(f, &elem->ndescs);
1894     }
1895 
1896     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1897 }
1898 
1899 /* virtio device */
1900 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1901 {
1902     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1903     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1904 
1905     if (virtio_device_disabled(vdev)) {
1906         return;
1907     }
1908 
1909     if (k->notify) {
1910         k->notify(qbus->parent, vector);
1911     }
1912 }
1913 
1914 void virtio_update_irq(VirtIODevice *vdev)
1915 {
1916     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1917 }
1918 
1919 static int virtio_validate_features(VirtIODevice *vdev)
1920 {
1921     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1922 
1923     if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1924         !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1925         return -EFAULT;
1926     }
1927 
1928     if (k->validate_features) {
1929         return k->validate_features(vdev);
1930     } else {
1931         return 0;
1932     }
1933 }
1934 
1935 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1936 {
1937     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1938     trace_virtio_set_status(vdev, val);
1939 
1940     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1941         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1942             val & VIRTIO_CONFIG_S_FEATURES_OK) {
1943             int ret = virtio_validate_features(vdev);
1944 
1945             if (ret) {
1946                 return ret;
1947             }
1948         }
1949     }
1950 
1951     if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1952         (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1953         virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1954     }
1955 
1956     if (k->set_status) {
1957         k->set_status(vdev, val);
1958     }
1959     vdev->status = val;
1960 
1961     return 0;
1962 }
1963 
1964 static enum virtio_device_endian virtio_default_endian(void)
1965 {
1966     if (target_words_bigendian()) {
1967         return VIRTIO_DEVICE_ENDIAN_BIG;
1968     } else {
1969         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1970     }
1971 }
1972 
1973 static enum virtio_device_endian virtio_current_cpu_endian(void)
1974 {
1975     if (cpu_virtio_is_big_endian(current_cpu)) {
1976         return VIRTIO_DEVICE_ENDIAN_BIG;
1977     } else {
1978         return VIRTIO_DEVICE_ENDIAN_LITTLE;
1979     }
1980 }
1981 
1982 void virtio_reset(void *opaque)
1983 {
1984     VirtIODevice *vdev = opaque;
1985     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1986     int i;
1987 
1988     virtio_set_status(vdev, 0);
1989     if (current_cpu) {
1990         /* Guest initiated reset */
1991         vdev->device_endian = virtio_current_cpu_endian();
1992     } else {
1993         /* System reset */
1994         vdev->device_endian = virtio_default_endian();
1995     }
1996 
1997     if (k->reset) {
1998         k->reset(vdev);
1999     }
2000 
2001     vdev->start_on_kick = false;
2002     vdev->started = false;
2003     vdev->broken = false;
2004     vdev->guest_features = 0;
2005     vdev->queue_sel = 0;
2006     vdev->status = 0;
2007     vdev->disabled = false;
2008     qatomic_set(&vdev->isr, 0);
2009     vdev->config_vector = VIRTIO_NO_VECTOR;
2010     virtio_notify_vector(vdev, vdev->config_vector);
2011 
2012     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2013         vdev->vq[i].vring.desc = 0;
2014         vdev->vq[i].vring.avail = 0;
2015         vdev->vq[i].vring.used = 0;
2016         vdev->vq[i].last_avail_idx = 0;
2017         vdev->vq[i].shadow_avail_idx = 0;
2018         vdev->vq[i].used_idx = 0;
2019         vdev->vq[i].last_avail_wrap_counter = true;
2020         vdev->vq[i].shadow_avail_wrap_counter = true;
2021         vdev->vq[i].used_wrap_counter = true;
2022         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2023         vdev->vq[i].signalled_used = 0;
2024         vdev->vq[i].signalled_used_valid = false;
2025         vdev->vq[i].notification = true;
2026         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2027         vdev->vq[i].inuse = 0;
2028         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2029     }
2030 }
2031 
2032 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2033 {
2034     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2035     uint8_t val;
2036 
2037     if (addr + sizeof(val) > vdev->config_len) {
2038         return (uint32_t)-1;
2039     }
2040 
2041     k->get_config(vdev, vdev->config);
2042 
2043     val = ldub_p(vdev->config + addr);
2044     return val;
2045 }
2046 
2047 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2048 {
2049     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2050     uint16_t val;
2051 
2052     if (addr + sizeof(val) > vdev->config_len) {
2053         return (uint32_t)-1;
2054     }
2055 
2056     k->get_config(vdev, vdev->config);
2057 
2058     val = lduw_p(vdev->config + addr);
2059     return val;
2060 }
2061 
2062 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2063 {
2064     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2065     uint32_t val;
2066 
2067     if (addr + sizeof(val) > vdev->config_len) {
2068         return (uint32_t)-1;
2069     }
2070 
2071     k->get_config(vdev, vdev->config);
2072 
2073     val = ldl_p(vdev->config + addr);
2074     return val;
2075 }
2076 
2077 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2078 {
2079     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2080     uint8_t val = data;
2081 
2082     if (addr + sizeof(val) > vdev->config_len) {
2083         return;
2084     }
2085 
2086     stb_p(vdev->config + addr, val);
2087 
2088     if (k->set_config) {
2089         k->set_config(vdev, vdev->config);
2090     }
2091 }
2092 
2093 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2094 {
2095     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2096     uint16_t val = data;
2097 
2098     if (addr + sizeof(val) > vdev->config_len) {
2099         return;
2100     }
2101 
2102     stw_p(vdev->config + addr, val);
2103 
2104     if (k->set_config) {
2105         k->set_config(vdev, vdev->config);
2106     }
2107 }
2108 
2109 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2110 {
2111     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2112     uint32_t val = data;
2113 
2114     if (addr + sizeof(val) > vdev->config_len) {
2115         return;
2116     }
2117 
2118     stl_p(vdev->config + addr, val);
2119 
2120     if (k->set_config) {
2121         k->set_config(vdev, vdev->config);
2122     }
2123 }
2124 
2125 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2126 {
2127     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2128     uint8_t val;
2129 
2130     if (addr + sizeof(val) > vdev->config_len) {
2131         return (uint32_t)-1;
2132     }
2133 
2134     k->get_config(vdev, vdev->config);
2135 
2136     val = ldub_p(vdev->config + addr);
2137     return val;
2138 }
2139 
2140 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2141 {
2142     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2143     uint16_t val;
2144 
2145     if (addr + sizeof(val) > vdev->config_len) {
2146         return (uint32_t)-1;
2147     }
2148 
2149     k->get_config(vdev, vdev->config);
2150 
2151     val = lduw_le_p(vdev->config + addr);
2152     return val;
2153 }
2154 
2155 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2156 {
2157     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2158     uint32_t val;
2159 
2160     if (addr + sizeof(val) > vdev->config_len) {
2161         return (uint32_t)-1;
2162     }
2163 
2164     k->get_config(vdev, vdev->config);
2165 
2166     val = ldl_le_p(vdev->config + addr);
2167     return val;
2168 }
2169 
2170 void virtio_config_modern_writeb(VirtIODevice *vdev,
2171                                  uint32_t addr, uint32_t data)
2172 {
2173     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2174     uint8_t val = data;
2175 
2176     if (addr + sizeof(val) > vdev->config_len) {
2177         return;
2178     }
2179 
2180     stb_p(vdev->config + addr, val);
2181 
2182     if (k->set_config) {
2183         k->set_config(vdev, vdev->config);
2184     }
2185 }
2186 
2187 void virtio_config_modern_writew(VirtIODevice *vdev,
2188                                  uint32_t addr, uint32_t data)
2189 {
2190     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2191     uint16_t val = data;
2192 
2193     if (addr + sizeof(val) > vdev->config_len) {
2194         return;
2195     }
2196 
2197     stw_le_p(vdev->config + addr, val);
2198 
2199     if (k->set_config) {
2200         k->set_config(vdev, vdev->config);
2201     }
2202 }
2203 
2204 void virtio_config_modern_writel(VirtIODevice *vdev,
2205                                  uint32_t addr, uint32_t data)
2206 {
2207     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2208     uint32_t val = data;
2209 
2210     if (addr + sizeof(val) > vdev->config_len) {
2211         return;
2212     }
2213 
2214     stl_le_p(vdev->config + addr, val);
2215 
2216     if (k->set_config) {
2217         k->set_config(vdev, vdev->config);
2218     }
2219 }
2220 
2221 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2222 {
2223     if (!vdev->vq[n].vring.num) {
2224         return;
2225     }
2226     vdev->vq[n].vring.desc = addr;
2227     virtio_queue_update_rings(vdev, n);
2228 }
2229 
2230 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2231 {
2232     return vdev->vq[n].vring.desc;
2233 }
2234 
2235 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2236                             hwaddr avail, hwaddr used)
2237 {
2238     if (!vdev->vq[n].vring.num) {
2239         return;
2240     }
2241     vdev->vq[n].vring.desc = desc;
2242     vdev->vq[n].vring.avail = avail;
2243     vdev->vq[n].vring.used = used;
2244     virtio_init_region_cache(vdev, n);
2245 }
2246 
2247 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2248 {
2249     /* Don't allow guest to flip queue between existent and
2250      * nonexistent states, or to set it to an invalid size.
2251      */
2252     if (!!num != !!vdev->vq[n].vring.num ||
2253         num > VIRTQUEUE_MAX_SIZE ||
2254         num < 0) {
2255         return;
2256     }
2257     vdev->vq[n].vring.num = num;
2258 }
2259 
2260 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2261 {
2262     return QLIST_FIRST(&vdev->vector_queues[vector]);
2263 }
2264 
2265 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2266 {
2267     return QLIST_NEXT(vq, node);
2268 }
2269 
2270 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2271 {
2272     return vdev->vq[n].vring.num;
2273 }
2274 
2275 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2276 {
2277     return vdev->vq[n].vring.num_default;
2278 }
2279 
2280 int virtio_get_num_queues(VirtIODevice *vdev)
2281 {
2282     int i;
2283 
2284     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2285         if (!virtio_queue_get_num(vdev, i)) {
2286             break;
2287         }
2288     }
2289 
2290     return i;
2291 }
2292 
2293 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2294 {
2295     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2296     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2297 
2298     /* virtio-1 compliant devices cannot change the alignment */
2299     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2300         error_report("tried to modify queue alignment for virtio-1 device");
2301         return;
2302     }
2303     /* Check that the transport told us it was going to do this
2304      * (so a buggy transport will immediately assert rather than
2305      * silently failing to migrate this state)
2306      */
2307     assert(k->has_variable_vring_alignment);
2308 
2309     if (align) {
2310         vdev->vq[n].vring.align = align;
2311         virtio_queue_update_rings(vdev, n);
2312     }
2313 }
2314 
2315 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2316 {
2317     bool ret = false;
2318 
2319     if (vq->vring.desc && vq->handle_aio_output) {
2320         VirtIODevice *vdev = vq->vdev;
2321 
2322         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2323         ret = vq->handle_aio_output(vdev, vq);
2324 
2325         if (unlikely(vdev->start_on_kick)) {
2326             virtio_set_started(vdev, true);
2327         }
2328     }
2329 
2330     return ret;
2331 }
2332 
2333 static void virtio_queue_notify_vq(VirtQueue *vq)
2334 {
2335     if (vq->vring.desc && vq->handle_output) {
2336         VirtIODevice *vdev = vq->vdev;
2337 
2338         if (unlikely(vdev->broken)) {
2339             return;
2340         }
2341 
2342         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2343         vq->handle_output(vdev, vq);
2344 
2345         if (unlikely(vdev->start_on_kick)) {
2346             virtio_set_started(vdev, true);
2347         }
2348     }
2349 }
2350 
2351 void virtio_queue_notify(VirtIODevice *vdev, int n)
2352 {
2353     VirtQueue *vq = &vdev->vq[n];
2354 
2355     if (unlikely(!vq->vring.desc || vdev->broken)) {
2356         return;
2357     }
2358 
2359     trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2360     if (vq->host_notifier_enabled) {
2361         event_notifier_set(&vq->host_notifier);
2362     } else if (vq->handle_output) {
2363         vq->handle_output(vdev, vq);
2364 
2365         if (unlikely(vdev->start_on_kick)) {
2366             virtio_set_started(vdev, true);
2367         }
2368     }
2369 }
2370 
2371 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2372 {
2373     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2374         VIRTIO_NO_VECTOR;
2375 }
2376 
2377 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2378 {
2379     VirtQueue *vq = &vdev->vq[n];
2380 
2381     if (n < VIRTIO_QUEUE_MAX) {
2382         if (vdev->vector_queues &&
2383             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2384             QLIST_REMOVE(vq, node);
2385         }
2386         vdev->vq[n].vector = vector;
2387         if (vdev->vector_queues &&
2388             vector != VIRTIO_NO_VECTOR) {
2389             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2390         }
2391     }
2392 }
2393 
2394 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2395                             VirtIOHandleOutput handle_output)
2396 {
2397     int i;
2398 
2399     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2400         if (vdev->vq[i].vring.num == 0)
2401             break;
2402     }
2403 
2404     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2405         abort();
2406 
2407     vdev->vq[i].vring.num = queue_size;
2408     vdev->vq[i].vring.num_default = queue_size;
2409     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2410     vdev->vq[i].handle_output = handle_output;
2411     vdev->vq[i].handle_aio_output = NULL;
2412     vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2413                                        queue_size);
2414 
2415     return &vdev->vq[i];
2416 }
2417 
2418 void virtio_delete_queue(VirtQueue *vq)
2419 {
2420     vq->vring.num = 0;
2421     vq->vring.num_default = 0;
2422     vq->handle_output = NULL;
2423     vq->handle_aio_output = NULL;
2424     g_free(vq->used_elems);
2425     vq->used_elems = NULL;
2426     virtio_virtqueue_reset_region_cache(vq);
2427 }
2428 
2429 void virtio_del_queue(VirtIODevice *vdev, int n)
2430 {
2431     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2432         abort();
2433     }
2434 
2435     virtio_delete_queue(&vdev->vq[n]);
2436 }
2437 
2438 static void virtio_set_isr(VirtIODevice *vdev, int value)
2439 {
2440     uint8_t old = qatomic_read(&vdev->isr);
2441 
2442     /* Do not write ISR if it does not change, so that its cacheline remains
2443      * shared in the common case where the guest does not read it.
2444      */
2445     if ((old & value) != value) {
2446         qatomic_or(&vdev->isr, value);
2447     }
2448 }
2449 
2450 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2451 {
2452     uint16_t old, new;
2453     bool v;
2454     /* We need to expose used array entries before checking used event. */
2455     smp_mb();
2456     /* Always notify when queue is empty (when feature acknowledge) */
2457     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2458         !vq->inuse && virtio_queue_empty(vq)) {
2459         return true;
2460     }
2461 
2462     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2463         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2464     }
2465 
2466     v = vq->signalled_used_valid;
2467     vq->signalled_used_valid = true;
2468     old = vq->signalled_used;
2469     new = vq->signalled_used = vq->used_idx;
2470     return !v || vring_need_event(vring_get_used_event(vq), new, old);
2471 }
2472 
2473 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2474                                     uint16_t off_wrap, uint16_t new,
2475                                     uint16_t old)
2476 {
2477     int off = off_wrap & ~(1 << 15);
2478 
2479     if (wrap != off_wrap >> 15) {
2480         off -= vq->vring.num;
2481     }
2482 
2483     return vring_need_event(off, new, old);
2484 }
2485 
2486 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2487 {
2488     VRingPackedDescEvent e;
2489     uint16_t old, new;
2490     bool v;
2491     VRingMemoryRegionCaches *caches;
2492 
2493     caches = vring_get_region_caches(vq);
2494     if (!caches) {
2495         return false;
2496     }
2497 
2498     vring_packed_event_read(vdev, &caches->avail, &e);
2499 
2500     old = vq->signalled_used;
2501     new = vq->signalled_used = vq->used_idx;
2502     v = vq->signalled_used_valid;
2503     vq->signalled_used_valid = true;
2504 
2505     if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2506         return false;
2507     } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2508         return true;
2509     }
2510 
2511     return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2512                                          e.off_wrap, new, old);
2513 }
2514 
2515 /* Called within rcu_read_lock().  */
2516 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2517 {
2518     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2519         return virtio_packed_should_notify(vdev, vq);
2520     } else {
2521         return virtio_split_should_notify(vdev, vq);
2522     }
2523 }
2524 
2525 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2526 {
2527     WITH_RCU_READ_LOCK_GUARD() {
2528         if (!virtio_should_notify(vdev, vq)) {
2529             return;
2530         }
2531     }
2532 
2533     trace_virtio_notify_irqfd(vdev, vq);
2534 
2535     /*
2536      * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2537      * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2538      * incorrectly polling this bit during crashdump and hibernation
2539      * in MSI mode, causing a hang if this bit is never updated.
2540      * Recent releases of Windows do not really shut down, but rather
2541      * log out and hibernate to make the next startup faster.  Hence,
2542      * this manifested as a more serious hang during shutdown with
2543      *
2544      * Next driver release from 2016 fixed this problem, so working around it
2545      * is not a must, but it's easy to do so let's do it here.
2546      *
2547      * Note: it's safe to update ISR from any thread as it was switched
2548      * to an atomic operation.
2549      */
2550     virtio_set_isr(vq->vdev, 0x1);
2551     event_notifier_set(&vq->guest_notifier);
2552 }
2553 
2554 static void virtio_irq(VirtQueue *vq)
2555 {
2556     virtio_set_isr(vq->vdev, 0x1);
2557     virtio_notify_vector(vq->vdev, vq->vector);
2558 }
2559 
2560 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2561 {
2562     WITH_RCU_READ_LOCK_GUARD() {
2563         if (!virtio_should_notify(vdev, vq)) {
2564             return;
2565         }
2566     }
2567 
2568     trace_virtio_notify(vdev, vq);
2569     virtio_irq(vq);
2570 }
2571 
2572 void virtio_notify_config(VirtIODevice *vdev)
2573 {
2574     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2575         return;
2576 
2577     virtio_set_isr(vdev, 0x3);
2578     vdev->generation++;
2579     virtio_notify_vector(vdev, vdev->config_vector);
2580 }
2581 
2582 static bool virtio_device_endian_needed(void *opaque)
2583 {
2584     VirtIODevice *vdev = opaque;
2585 
2586     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2587     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2588         return vdev->device_endian != virtio_default_endian();
2589     }
2590     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2591     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2592 }
2593 
2594 static bool virtio_64bit_features_needed(void *opaque)
2595 {
2596     VirtIODevice *vdev = opaque;
2597 
2598     return (vdev->host_features >> 32) != 0;
2599 }
2600 
2601 static bool virtio_virtqueue_needed(void *opaque)
2602 {
2603     VirtIODevice *vdev = opaque;
2604 
2605     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2606 }
2607 
2608 static bool virtio_packed_virtqueue_needed(void *opaque)
2609 {
2610     VirtIODevice *vdev = opaque;
2611 
2612     return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2613 }
2614 
2615 static bool virtio_ringsize_needed(void *opaque)
2616 {
2617     VirtIODevice *vdev = opaque;
2618     int i;
2619 
2620     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2621         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2622             return true;
2623         }
2624     }
2625     return false;
2626 }
2627 
2628 static bool virtio_extra_state_needed(void *opaque)
2629 {
2630     VirtIODevice *vdev = opaque;
2631     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2632     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2633 
2634     return k->has_extra_state &&
2635         k->has_extra_state(qbus->parent);
2636 }
2637 
2638 static bool virtio_broken_needed(void *opaque)
2639 {
2640     VirtIODevice *vdev = opaque;
2641 
2642     return vdev->broken;
2643 }
2644 
2645 static bool virtio_started_needed(void *opaque)
2646 {
2647     VirtIODevice *vdev = opaque;
2648 
2649     return vdev->started;
2650 }
2651 
2652 static bool virtio_disabled_needed(void *opaque)
2653 {
2654     VirtIODevice *vdev = opaque;
2655 
2656     return vdev->disabled;
2657 }
2658 
2659 static const VMStateDescription vmstate_virtqueue = {
2660     .name = "virtqueue_state",
2661     .version_id = 1,
2662     .minimum_version_id = 1,
2663     .fields = (VMStateField[]) {
2664         VMSTATE_UINT64(vring.avail, struct VirtQueue),
2665         VMSTATE_UINT64(vring.used, struct VirtQueue),
2666         VMSTATE_END_OF_LIST()
2667     }
2668 };
2669 
2670 static const VMStateDescription vmstate_packed_virtqueue = {
2671     .name = "packed_virtqueue_state",
2672     .version_id = 1,
2673     .minimum_version_id = 1,
2674     .fields = (VMStateField[]) {
2675         VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2676         VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2677         VMSTATE_UINT16(used_idx, struct VirtQueue),
2678         VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2679         VMSTATE_UINT32(inuse, struct VirtQueue),
2680         VMSTATE_END_OF_LIST()
2681     }
2682 };
2683 
2684 static const VMStateDescription vmstate_virtio_virtqueues = {
2685     .name = "virtio/virtqueues",
2686     .version_id = 1,
2687     .minimum_version_id = 1,
2688     .needed = &virtio_virtqueue_needed,
2689     .fields = (VMStateField[]) {
2690         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2691                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2692         VMSTATE_END_OF_LIST()
2693     }
2694 };
2695 
2696 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2697     .name = "virtio/packed_virtqueues",
2698     .version_id = 1,
2699     .minimum_version_id = 1,
2700     .needed = &virtio_packed_virtqueue_needed,
2701     .fields = (VMStateField[]) {
2702         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2703                       VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2704         VMSTATE_END_OF_LIST()
2705     }
2706 };
2707 
2708 static const VMStateDescription vmstate_ringsize = {
2709     .name = "ringsize_state",
2710     .version_id = 1,
2711     .minimum_version_id = 1,
2712     .fields = (VMStateField[]) {
2713         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2714         VMSTATE_END_OF_LIST()
2715     }
2716 };
2717 
2718 static const VMStateDescription vmstate_virtio_ringsize = {
2719     .name = "virtio/ringsize",
2720     .version_id = 1,
2721     .minimum_version_id = 1,
2722     .needed = &virtio_ringsize_needed,
2723     .fields = (VMStateField[]) {
2724         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2725                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2726         VMSTATE_END_OF_LIST()
2727     }
2728 };
2729 
2730 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2731                            const VMStateField *field)
2732 {
2733     VirtIODevice *vdev = pv;
2734     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2735     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2736 
2737     if (!k->load_extra_state) {
2738         return -1;
2739     } else {
2740         return k->load_extra_state(qbus->parent, f);
2741     }
2742 }
2743 
2744 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2745                            const VMStateField *field, JSONWriter *vmdesc)
2746 {
2747     VirtIODevice *vdev = pv;
2748     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2749     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2750 
2751     k->save_extra_state(qbus->parent, f);
2752     return 0;
2753 }
2754 
2755 static const VMStateInfo vmstate_info_extra_state = {
2756     .name = "virtqueue_extra_state",
2757     .get = get_extra_state,
2758     .put = put_extra_state,
2759 };
2760 
2761 static const VMStateDescription vmstate_virtio_extra_state = {
2762     .name = "virtio/extra_state",
2763     .version_id = 1,
2764     .minimum_version_id = 1,
2765     .needed = &virtio_extra_state_needed,
2766     .fields = (VMStateField[]) {
2767         {
2768             .name         = "extra_state",
2769             .version_id   = 0,
2770             .field_exists = NULL,
2771             .size         = 0,
2772             .info         = &vmstate_info_extra_state,
2773             .flags        = VMS_SINGLE,
2774             .offset       = 0,
2775         },
2776         VMSTATE_END_OF_LIST()
2777     }
2778 };
2779 
2780 static const VMStateDescription vmstate_virtio_device_endian = {
2781     .name = "virtio/device_endian",
2782     .version_id = 1,
2783     .minimum_version_id = 1,
2784     .needed = &virtio_device_endian_needed,
2785     .fields = (VMStateField[]) {
2786         VMSTATE_UINT8(device_endian, VirtIODevice),
2787         VMSTATE_END_OF_LIST()
2788     }
2789 };
2790 
2791 static const VMStateDescription vmstate_virtio_64bit_features = {
2792     .name = "virtio/64bit_features",
2793     .version_id = 1,
2794     .minimum_version_id = 1,
2795     .needed = &virtio_64bit_features_needed,
2796     .fields = (VMStateField[]) {
2797         VMSTATE_UINT64(guest_features, VirtIODevice),
2798         VMSTATE_END_OF_LIST()
2799     }
2800 };
2801 
2802 static const VMStateDescription vmstate_virtio_broken = {
2803     .name = "virtio/broken",
2804     .version_id = 1,
2805     .minimum_version_id = 1,
2806     .needed = &virtio_broken_needed,
2807     .fields = (VMStateField[]) {
2808         VMSTATE_BOOL(broken, VirtIODevice),
2809         VMSTATE_END_OF_LIST()
2810     }
2811 };
2812 
2813 static const VMStateDescription vmstate_virtio_started = {
2814     .name = "virtio/started",
2815     .version_id = 1,
2816     .minimum_version_id = 1,
2817     .needed = &virtio_started_needed,
2818     .fields = (VMStateField[]) {
2819         VMSTATE_BOOL(started, VirtIODevice),
2820         VMSTATE_END_OF_LIST()
2821     }
2822 };
2823 
2824 static const VMStateDescription vmstate_virtio_disabled = {
2825     .name = "virtio/disabled",
2826     .version_id = 1,
2827     .minimum_version_id = 1,
2828     .needed = &virtio_disabled_needed,
2829     .fields = (VMStateField[]) {
2830         VMSTATE_BOOL(disabled, VirtIODevice),
2831         VMSTATE_END_OF_LIST()
2832     }
2833 };
2834 
2835 static const VMStateDescription vmstate_virtio = {
2836     .name = "virtio",
2837     .version_id = 1,
2838     .minimum_version_id = 1,
2839     .minimum_version_id_old = 1,
2840     .fields = (VMStateField[]) {
2841         VMSTATE_END_OF_LIST()
2842     },
2843     .subsections = (const VMStateDescription*[]) {
2844         &vmstate_virtio_device_endian,
2845         &vmstate_virtio_64bit_features,
2846         &vmstate_virtio_virtqueues,
2847         &vmstate_virtio_ringsize,
2848         &vmstate_virtio_broken,
2849         &vmstate_virtio_extra_state,
2850         &vmstate_virtio_started,
2851         &vmstate_virtio_packed_virtqueues,
2852         &vmstate_virtio_disabled,
2853         NULL
2854     }
2855 };
2856 
2857 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2858 {
2859     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2860     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2861     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2862     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2863     int i;
2864 
2865     if (k->save_config) {
2866         k->save_config(qbus->parent, f);
2867     }
2868 
2869     qemu_put_8s(f, &vdev->status);
2870     qemu_put_8s(f, &vdev->isr);
2871     qemu_put_be16s(f, &vdev->queue_sel);
2872     qemu_put_be32s(f, &guest_features_lo);
2873     qemu_put_be32(f, vdev->config_len);
2874     qemu_put_buffer(f, vdev->config, vdev->config_len);
2875 
2876     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2877         if (vdev->vq[i].vring.num == 0)
2878             break;
2879     }
2880 
2881     qemu_put_be32(f, i);
2882 
2883     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2884         if (vdev->vq[i].vring.num == 0)
2885             break;
2886 
2887         qemu_put_be32(f, vdev->vq[i].vring.num);
2888         if (k->has_variable_vring_alignment) {
2889             qemu_put_be32(f, vdev->vq[i].vring.align);
2890         }
2891         /*
2892          * Save desc now, the rest of the ring addresses are saved in
2893          * subsections for VIRTIO-1 devices.
2894          */
2895         qemu_put_be64(f, vdev->vq[i].vring.desc);
2896         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2897         if (k->save_queue) {
2898             k->save_queue(qbus->parent, i, f);
2899         }
2900     }
2901 
2902     if (vdc->save != NULL) {
2903         vdc->save(vdev, f);
2904     }
2905 
2906     if (vdc->vmsd) {
2907         int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2908         if (ret) {
2909             return ret;
2910         }
2911     }
2912 
2913     /* Subsections */
2914     return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2915 }
2916 
2917 /* A wrapper for use as a VMState .put function */
2918 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2919                               const VMStateField *field, JSONWriter *vmdesc)
2920 {
2921     return virtio_save(VIRTIO_DEVICE(opaque), f);
2922 }
2923 
2924 /* A wrapper for use as a VMState .get function */
2925 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2926                              const VMStateField *field)
2927 {
2928     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2929     DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2930 
2931     return virtio_load(vdev, f, dc->vmsd->version_id);
2932 }
2933 
2934 const VMStateInfo  virtio_vmstate_info = {
2935     .name = "virtio",
2936     .get = virtio_device_get,
2937     .put = virtio_device_put,
2938 };
2939 
2940 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2941 {
2942     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2943     bool bad = (val & ~(vdev->host_features)) != 0;
2944 
2945     val &= vdev->host_features;
2946     if (k->set_features) {
2947         k->set_features(vdev, val);
2948     }
2949     vdev->guest_features = val;
2950     return bad ? -1 : 0;
2951 }
2952 
2953 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2954 {
2955     int ret;
2956     /*
2957      * The driver must not attempt to set features after feature negotiation
2958      * has finished.
2959      */
2960     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2961         return -EINVAL;
2962     }
2963     ret = virtio_set_features_nocheck(vdev, val);
2964     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2965         /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
2966         int i;
2967         for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2968             if (vdev->vq[i].vring.num != 0) {
2969                 virtio_init_region_cache(vdev, i);
2970             }
2971         }
2972     }
2973     if (!ret) {
2974         if (!virtio_device_started(vdev, vdev->status) &&
2975             !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2976             vdev->start_on_kick = true;
2977         }
2978     }
2979     return ret;
2980 }
2981 
2982 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2983                                       uint64_t host_features)
2984 {
2985     size_t config_size = 0;
2986     int i;
2987 
2988     for (i = 0; feature_sizes[i].flags != 0; i++) {
2989         if (host_features & feature_sizes[i].flags) {
2990             config_size = MAX(feature_sizes[i].end, config_size);
2991         }
2992     }
2993 
2994     return config_size;
2995 }
2996 
2997 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2998 {
2999     int i, ret;
3000     int32_t config_len;
3001     uint32_t num;
3002     uint32_t features;
3003     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3004     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3005     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
3006 
3007     /*
3008      * We poison the endianness to ensure it does not get used before
3009      * subsections have been loaded.
3010      */
3011     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3012 
3013     if (k->load_config) {
3014         ret = k->load_config(qbus->parent, f);
3015         if (ret)
3016             return ret;
3017     }
3018 
3019     qemu_get_8s(f, &vdev->status);
3020     qemu_get_8s(f, &vdev->isr);
3021     qemu_get_be16s(f, &vdev->queue_sel);
3022     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3023         return -1;
3024     }
3025     qemu_get_be32s(f, &features);
3026 
3027     /*
3028      * Temporarily set guest_features low bits - needed by
3029      * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3030      * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3031      *
3032      * Note: devices should always test host features in future - don't create
3033      * new dependencies like this.
3034      */
3035     vdev->guest_features = features;
3036 
3037     config_len = qemu_get_be32(f);
3038 
3039     /*
3040      * There are cases where the incoming config can be bigger or smaller
3041      * than what we have; so load what we have space for, and skip
3042      * any excess that's in the stream.
3043      */
3044     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3045 
3046     while (config_len > vdev->config_len) {
3047         qemu_get_byte(f);
3048         config_len--;
3049     }
3050 
3051     num = qemu_get_be32(f);
3052 
3053     if (num > VIRTIO_QUEUE_MAX) {
3054         error_report("Invalid number of virtqueues: 0x%x", num);
3055         return -1;
3056     }
3057 
3058     for (i = 0; i < num; i++) {
3059         vdev->vq[i].vring.num = qemu_get_be32(f);
3060         if (k->has_variable_vring_alignment) {
3061             vdev->vq[i].vring.align = qemu_get_be32(f);
3062         }
3063         vdev->vq[i].vring.desc = qemu_get_be64(f);
3064         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3065         vdev->vq[i].signalled_used_valid = false;
3066         vdev->vq[i].notification = true;
3067 
3068         if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3069             error_report("VQ %d address 0x0 "
3070                          "inconsistent with Host index 0x%x",
3071                          i, vdev->vq[i].last_avail_idx);
3072             return -1;
3073         }
3074         if (k->load_queue) {
3075             ret = k->load_queue(qbus->parent, i, f);
3076             if (ret)
3077                 return ret;
3078         }
3079     }
3080 
3081     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3082 
3083     if (vdc->load != NULL) {
3084         ret = vdc->load(vdev, f, version_id);
3085         if (ret) {
3086             return ret;
3087         }
3088     }
3089 
3090     if (vdc->vmsd) {
3091         ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3092         if (ret) {
3093             return ret;
3094         }
3095     }
3096 
3097     /* Subsections */
3098     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3099     if (ret) {
3100         return ret;
3101     }
3102 
3103     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3104         vdev->device_endian = virtio_default_endian();
3105     }
3106 
3107     if (virtio_64bit_features_needed(vdev)) {
3108         /*
3109          * Subsection load filled vdev->guest_features.  Run them
3110          * through virtio_set_features to sanity-check them against
3111          * host_features.
3112          */
3113         uint64_t features64 = vdev->guest_features;
3114         if (virtio_set_features_nocheck(vdev, features64) < 0) {
3115             error_report("Features 0x%" PRIx64 " unsupported. "
3116                          "Allowed features: 0x%" PRIx64,
3117                          features64, vdev->host_features);
3118             return -1;
3119         }
3120     } else {
3121         if (virtio_set_features_nocheck(vdev, features) < 0) {
3122             error_report("Features 0x%x unsupported. "
3123                          "Allowed features: 0x%" PRIx64,
3124                          features, vdev->host_features);
3125             return -1;
3126         }
3127     }
3128 
3129     if (!virtio_device_started(vdev, vdev->status) &&
3130         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3131         vdev->start_on_kick = true;
3132     }
3133 
3134     RCU_READ_LOCK_GUARD();
3135     for (i = 0; i < num; i++) {
3136         if (vdev->vq[i].vring.desc) {
3137             uint16_t nheads;
3138 
3139             /*
3140              * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3141              * only the region cache needs to be set up.  Legacy devices need
3142              * to calculate used and avail ring addresses based on the desc
3143              * address.
3144              */
3145             if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3146                 virtio_init_region_cache(vdev, i);
3147             } else {
3148                 virtio_queue_update_rings(vdev, i);
3149             }
3150 
3151             if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3152                 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3153                 vdev->vq[i].shadow_avail_wrap_counter =
3154                                         vdev->vq[i].last_avail_wrap_counter;
3155                 continue;
3156             }
3157 
3158             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3159             /* Check it isn't doing strange things with descriptor numbers. */
3160             if (nheads > vdev->vq[i].vring.num) {
3161                 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3162                              "inconsistent with Host index 0x%x: delta 0x%x",
3163                              i, vdev->vq[i].vring.num,
3164                              vring_avail_idx(&vdev->vq[i]),
3165                              vdev->vq[i].last_avail_idx, nheads);
3166                 vdev->vq[i].used_idx = 0;
3167                 vdev->vq[i].shadow_avail_idx = 0;
3168                 vdev->vq[i].inuse = 0;
3169                 continue;
3170             }
3171             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3172             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3173 
3174             /*
3175              * Some devices migrate VirtQueueElements that have been popped
3176              * from the avail ring but not yet returned to the used ring.
3177              * Since max ring size < UINT16_MAX it's safe to use modulo
3178              * UINT16_MAX + 1 subtraction.
3179              */
3180             vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3181                                 vdev->vq[i].used_idx);
3182             if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3183                 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3184                              "used_idx 0x%x",
3185                              i, vdev->vq[i].vring.num,
3186                              vdev->vq[i].last_avail_idx,
3187                              vdev->vq[i].used_idx);
3188                 return -1;
3189             }
3190         }
3191     }
3192 
3193     if (vdc->post_load) {
3194         ret = vdc->post_load(vdev);
3195         if (ret) {
3196             return ret;
3197         }
3198     }
3199 
3200     return 0;
3201 }
3202 
3203 void virtio_cleanup(VirtIODevice *vdev)
3204 {
3205     qemu_del_vm_change_state_handler(vdev->vmstate);
3206 }
3207 
3208 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3209 {
3210     VirtIODevice *vdev = opaque;
3211     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3212     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3213     bool backend_run = running && virtio_device_started(vdev, vdev->status);
3214     vdev->vm_running = running;
3215 
3216     if (backend_run) {
3217         virtio_set_status(vdev, vdev->status);
3218     }
3219 
3220     if (k->vmstate_change) {
3221         k->vmstate_change(qbus->parent, backend_run);
3222     }
3223 
3224     if (!backend_run) {
3225         virtio_set_status(vdev, vdev->status);
3226     }
3227 }
3228 
3229 void virtio_instance_init_common(Object *proxy_obj, void *data,
3230                                  size_t vdev_size, const char *vdev_name)
3231 {
3232     DeviceState *vdev = data;
3233 
3234     object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3235                                        vdev_size, vdev_name, &error_abort,
3236                                        NULL);
3237     qdev_alias_all_properties(vdev, proxy_obj);
3238 }
3239 
3240 void virtio_init(VirtIODevice *vdev, const char *name,
3241                  uint16_t device_id, size_t config_size)
3242 {
3243     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3244     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3245     int i;
3246     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3247 
3248     if (nvectors) {
3249         vdev->vector_queues =
3250             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3251     }
3252 
3253     vdev->start_on_kick = false;
3254     vdev->started = false;
3255     vdev->device_id = device_id;
3256     vdev->status = 0;
3257     qatomic_set(&vdev->isr, 0);
3258     vdev->queue_sel = 0;
3259     vdev->config_vector = VIRTIO_NO_VECTOR;
3260     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3261     vdev->vm_running = runstate_is_running();
3262     vdev->broken = false;
3263     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3264         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3265         vdev->vq[i].vdev = vdev;
3266         vdev->vq[i].queue_index = i;
3267         vdev->vq[i].host_notifier_enabled = false;
3268     }
3269 
3270     vdev->name = name;
3271     vdev->config_len = config_size;
3272     if (vdev->config_len) {
3273         vdev->config = g_malloc0(config_size);
3274     } else {
3275         vdev->config = NULL;
3276     }
3277     vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3278             virtio_vmstate_change, vdev);
3279     vdev->device_endian = virtio_default_endian();
3280     vdev->use_guest_notifier_mask = true;
3281 }
3282 
3283 /*
3284  * Only devices that have already been around prior to defining the virtio
3285  * standard support legacy mode; this includes devices not specified in the
3286  * standard. All newer devices conform to the virtio standard only.
3287  */
3288 bool virtio_legacy_allowed(VirtIODevice *vdev)
3289 {
3290     switch (vdev->device_id) {
3291     case VIRTIO_ID_NET:
3292     case VIRTIO_ID_BLOCK:
3293     case VIRTIO_ID_CONSOLE:
3294     case VIRTIO_ID_RNG:
3295     case VIRTIO_ID_BALLOON:
3296     case VIRTIO_ID_RPMSG:
3297     case VIRTIO_ID_SCSI:
3298     case VIRTIO_ID_9P:
3299     case VIRTIO_ID_RPROC_SERIAL:
3300     case VIRTIO_ID_CAIF:
3301         return true;
3302     default:
3303         return false;
3304     }
3305 }
3306 
3307 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3308 {
3309     return vdev->disable_legacy_check;
3310 }
3311 
3312 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3313 {
3314     return vdev->vq[n].vring.desc;
3315 }
3316 
3317 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3318 {
3319     return virtio_queue_get_desc_addr(vdev, n) != 0;
3320 }
3321 
3322 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3323 {
3324     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3325     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3326 
3327     if (k->queue_enabled) {
3328         return k->queue_enabled(qbus->parent, n);
3329     }
3330     return virtio_queue_enabled_legacy(vdev, n);
3331 }
3332 
3333 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3334 {
3335     return vdev->vq[n].vring.avail;
3336 }
3337 
3338 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3339 {
3340     return vdev->vq[n].vring.used;
3341 }
3342 
3343 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3344 {
3345     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3346 }
3347 
3348 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3349 {
3350     int s;
3351 
3352     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3353         return sizeof(struct VRingPackedDescEvent);
3354     }
3355 
3356     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3357     return offsetof(VRingAvail, ring) +
3358         sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3359 }
3360 
3361 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3362 {
3363     int s;
3364 
3365     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3366         return sizeof(struct VRingPackedDescEvent);
3367     }
3368 
3369     s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3370     return offsetof(VRingUsed, ring) +
3371         sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3372 }
3373 
3374 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3375                                                            int n)
3376 {
3377     unsigned int avail, used;
3378 
3379     avail = vdev->vq[n].last_avail_idx;
3380     avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3381 
3382     used = vdev->vq[n].used_idx;
3383     used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3384 
3385     return avail | used << 16;
3386 }
3387 
3388 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3389                                                       int n)
3390 {
3391     return vdev->vq[n].last_avail_idx;
3392 }
3393 
3394 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3395 {
3396     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3397         return virtio_queue_packed_get_last_avail_idx(vdev, n);
3398     } else {
3399         return virtio_queue_split_get_last_avail_idx(vdev, n);
3400     }
3401 }
3402 
3403 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3404                                                    int n, unsigned int idx)
3405 {
3406     struct VirtQueue *vq = &vdev->vq[n];
3407 
3408     vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3409     vq->last_avail_wrap_counter =
3410         vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3411     idx >>= 16;
3412     vq->used_idx = idx & 0x7ffff;
3413     vq->used_wrap_counter = !!(idx & 0x8000);
3414 }
3415 
3416 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3417                                                   int n, unsigned int idx)
3418 {
3419         vdev->vq[n].last_avail_idx = idx;
3420         vdev->vq[n].shadow_avail_idx = idx;
3421 }
3422 
3423 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3424                                      unsigned int idx)
3425 {
3426     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3427         virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3428     } else {
3429         virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3430     }
3431 }
3432 
3433 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3434                                                        int n)
3435 {
3436     /* We don't have a reference like avail idx in shared memory */
3437     return;
3438 }
3439 
3440 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3441                                                       int n)
3442 {
3443     RCU_READ_LOCK_GUARD();
3444     if (vdev->vq[n].vring.desc) {
3445         vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3446         vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3447     }
3448 }
3449 
3450 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3451 {
3452     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3453         virtio_queue_packed_restore_last_avail_idx(vdev, n);
3454     } else {
3455         virtio_queue_split_restore_last_avail_idx(vdev, n);
3456     }
3457 }
3458 
3459 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3460 {
3461     /* used idx was updated through set_last_avail_idx() */
3462     return;
3463 }
3464 
3465 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3466 {
3467     RCU_READ_LOCK_GUARD();
3468     if (vdev->vq[n].vring.desc) {
3469         vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3470     }
3471 }
3472 
3473 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3474 {
3475     if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3476         return virtio_queue_packed_update_used_idx(vdev, n);
3477     } else {
3478         return virtio_split_packed_update_used_idx(vdev, n);
3479     }
3480 }
3481 
3482 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3483 {
3484     vdev->vq[n].signalled_used_valid = false;
3485 }
3486 
3487 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3488 {
3489     return vdev->vq + n;
3490 }
3491 
3492 uint16_t virtio_get_queue_index(VirtQueue *vq)
3493 {
3494     return vq->queue_index;
3495 }
3496 
3497 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3498 {
3499     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3500     if (event_notifier_test_and_clear(n)) {
3501         virtio_irq(vq);
3502     }
3503 }
3504 
3505 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3506                                                 bool with_irqfd)
3507 {
3508     if (assign && !with_irqfd) {
3509         event_notifier_set_handler(&vq->guest_notifier,
3510                                    virtio_queue_guest_notifier_read);
3511     } else {
3512         event_notifier_set_handler(&vq->guest_notifier, NULL);
3513     }
3514     if (!assign) {
3515         /* Test and clear notifier before closing it,
3516          * in case poll callback didn't have time to run. */
3517         virtio_queue_guest_notifier_read(&vq->guest_notifier);
3518     }
3519 }
3520 
3521 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3522 {
3523     return &vq->guest_notifier;
3524 }
3525 
3526 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3527 {
3528     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3529     if (event_notifier_test_and_clear(n)) {
3530         virtio_queue_notify_aio_vq(vq);
3531     }
3532 }
3533 
3534 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3535 {
3536     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3537 
3538     virtio_queue_set_notification(vq, 0);
3539 }
3540 
3541 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3542 {
3543     EventNotifier *n = opaque;
3544     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3545 
3546     if (!vq->vring.desc || virtio_queue_empty(vq)) {
3547         return false;
3548     }
3549 
3550     return virtio_queue_notify_aio_vq(vq);
3551 }
3552 
3553 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3554 {
3555     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3556 
3557     /* Caller polls once more after this to catch requests that race with us */
3558     virtio_queue_set_notification(vq, 1);
3559 }
3560 
3561 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3562                                                 VirtIOHandleAIOOutput handle_output)
3563 {
3564     if (handle_output) {
3565         vq->handle_aio_output = handle_output;
3566         aio_set_event_notifier(ctx, &vq->host_notifier, true,
3567                                virtio_queue_host_notifier_aio_read,
3568                                virtio_queue_host_notifier_aio_poll);
3569         aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3570                                     virtio_queue_host_notifier_aio_poll_begin,
3571                                     virtio_queue_host_notifier_aio_poll_end);
3572     } else {
3573         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3574         /* Test and clear notifier before after disabling event,
3575          * in case poll callback didn't have time to run. */
3576         virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3577         vq->handle_aio_output = NULL;
3578     }
3579 }
3580 
3581 void virtio_queue_host_notifier_read(EventNotifier *n)
3582 {
3583     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3584     if (event_notifier_test_and_clear(n)) {
3585         virtio_queue_notify_vq(vq);
3586     }
3587 }
3588 
3589 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3590 {
3591     return &vq->host_notifier;
3592 }
3593 
3594 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3595 {
3596     vq->host_notifier_enabled = enabled;
3597 }
3598 
3599 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3600                                       MemoryRegion *mr, bool assign)
3601 {
3602     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3603     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3604 
3605     if (k->set_host_notifier_mr) {
3606         return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3607     }
3608 
3609     return -1;
3610 }
3611 
3612 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3613 {
3614     g_free(vdev->bus_name);
3615     vdev->bus_name = g_strdup(bus_name);
3616 }
3617 
3618 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3619 {
3620     va_list ap;
3621 
3622     va_start(ap, fmt);
3623     error_vreport(fmt, ap);
3624     va_end(ap);
3625 
3626     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3627         vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3628         virtio_notify_config(vdev);
3629     }
3630 
3631     vdev->broken = true;
3632 }
3633 
3634 static void virtio_memory_listener_commit(MemoryListener *listener)
3635 {
3636     VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3637     int i;
3638 
3639     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3640         if (vdev->vq[i].vring.num == 0) {
3641             break;
3642         }
3643         virtio_init_region_cache(vdev, i);
3644     }
3645 }
3646 
3647 static void virtio_device_realize(DeviceState *dev, Error **errp)
3648 {
3649     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3650     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3651     Error *err = NULL;
3652 
3653     /* Devices should either use vmsd or the load/save methods */
3654     assert(!vdc->vmsd || !vdc->load);
3655 
3656     if (vdc->realize != NULL) {
3657         vdc->realize(dev, &err);
3658         if (err != NULL) {
3659             error_propagate(errp, err);
3660             return;
3661         }
3662     }
3663 
3664     virtio_bus_device_plugged(vdev, &err);
3665     if (err != NULL) {
3666         error_propagate(errp, err);
3667         vdc->unrealize(dev);
3668         return;
3669     }
3670 
3671     vdev->listener.commit = virtio_memory_listener_commit;
3672     memory_listener_register(&vdev->listener, vdev->dma_as);
3673 }
3674 
3675 static void virtio_device_unrealize(DeviceState *dev)
3676 {
3677     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3678     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3679 
3680     memory_listener_unregister(&vdev->listener);
3681     virtio_bus_device_unplugged(vdev);
3682 
3683     if (vdc->unrealize != NULL) {
3684         vdc->unrealize(dev);
3685     }
3686 
3687     g_free(vdev->bus_name);
3688     vdev->bus_name = NULL;
3689 }
3690 
3691 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3692 {
3693     int i;
3694     if (!vdev->vq) {
3695         return;
3696     }
3697 
3698     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3699         if (vdev->vq[i].vring.num == 0) {
3700             break;
3701         }
3702         virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3703     }
3704     g_free(vdev->vq);
3705 }
3706 
3707 static void virtio_device_instance_finalize(Object *obj)
3708 {
3709     VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3710 
3711     virtio_device_free_virtqueues(vdev);
3712 
3713     g_free(vdev->config);
3714     g_free(vdev->vector_queues);
3715 }
3716 
3717 static Property virtio_properties[] = {
3718     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3719     DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3720     DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3721     DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3722                      disable_legacy_check, false),
3723     DEFINE_PROP_END_OF_LIST(),
3724 };
3725 
3726 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3727 {
3728     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3729     int i, n, r, err;
3730 
3731     memory_region_transaction_begin();
3732     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3733         VirtQueue *vq = &vdev->vq[n];
3734         if (!virtio_queue_get_num(vdev, n)) {
3735             continue;
3736         }
3737         r = virtio_bus_set_host_notifier(qbus, n, true);
3738         if (r < 0) {
3739             err = r;
3740             goto assign_error;
3741         }
3742         event_notifier_set_handler(&vq->host_notifier,
3743                                    virtio_queue_host_notifier_read);
3744     }
3745 
3746     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3747         /* Kick right away to begin processing requests already in vring */
3748         VirtQueue *vq = &vdev->vq[n];
3749         if (!vq->vring.num) {
3750             continue;
3751         }
3752         event_notifier_set(&vq->host_notifier);
3753     }
3754     memory_region_transaction_commit();
3755     return 0;
3756 
3757 assign_error:
3758     i = n; /* save n for a second iteration after transaction is committed. */
3759     while (--n >= 0) {
3760         VirtQueue *vq = &vdev->vq[n];
3761         if (!virtio_queue_get_num(vdev, n)) {
3762             continue;
3763         }
3764 
3765         event_notifier_set_handler(&vq->host_notifier, NULL);
3766         r = virtio_bus_set_host_notifier(qbus, n, false);
3767         assert(r >= 0);
3768     }
3769     memory_region_transaction_commit();
3770 
3771     while (--i >= 0) {
3772         if (!virtio_queue_get_num(vdev, i)) {
3773             continue;
3774         }
3775         virtio_bus_cleanup_host_notifier(qbus, i);
3776     }
3777     return err;
3778 }
3779 
3780 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3781 {
3782     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3783     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3784 
3785     return virtio_bus_start_ioeventfd(vbus);
3786 }
3787 
3788 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3789 {
3790     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3791     int n, r;
3792 
3793     memory_region_transaction_begin();
3794     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3795         VirtQueue *vq = &vdev->vq[n];
3796 
3797         if (!virtio_queue_get_num(vdev, n)) {
3798             continue;
3799         }
3800         event_notifier_set_handler(&vq->host_notifier, NULL);
3801         r = virtio_bus_set_host_notifier(qbus, n, false);
3802         assert(r >= 0);
3803     }
3804     memory_region_transaction_commit();
3805 
3806     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3807         if (!virtio_queue_get_num(vdev, n)) {
3808             continue;
3809         }
3810         virtio_bus_cleanup_host_notifier(qbus, n);
3811     }
3812 }
3813 
3814 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3815 {
3816     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3817     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3818 
3819     return virtio_bus_grab_ioeventfd(vbus);
3820 }
3821 
3822 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3823 {
3824     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3825     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3826 
3827     virtio_bus_release_ioeventfd(vbus);
3828 }
3829 
3830 static void virtio_device_class_init(ObjectClass *klass, void *data)
3831 {
3832     /* Set the default value here. */
3833     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3834     DeviceClass *dc = DEVICE_CLASS(klass);
3835 
3836     dc->realize = virtio_device_realize;
3837     dc->unrealize = virtio_device_unrealize;
3838     dc->bus_type = TYPE_VIRTIO_BUS;
3839     device_class_set_props(dc, virtio_properties);
3840     vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3841     vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3842 
3843     vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3844 }
3845 
3846 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3847 {
3848     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3849     VirtioBusState *vbus = VIRTIO_BUS(qbus);
3850 
3851     return virtio_bus_ioeventfd_enabled(vbus);
3852 }
3853 
3854 static const TypeInfo virtio_device_info = {
3855     .name = TYPE_VIRTIO_DEVICE,
3856     .parent = TYPE_DEVICE,
3857     .instance_size = sizeof(VirtIODevice),
3858     .class_init = virtio_device_class_init,
3859     .instance_finalize = virtio_device_instance_finalize,
3860     .abstract = true,
3861     .class_size = sizeof(VirtioDeviceClass),
3862 };
3863 
3864 static void virtio_register_types(void)
3865 {
3866     type_register_static(&virtio_device_info);
3867 }
3868 
3869 type_init(virtio_register_types)
3870