xref: /qemu/hw/virtio/virtio.c (revision 3d100d0f)
1 /*
2  * Virtio Support
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26 
27 /*
28  * The alignment to use between consumer and producer parts of vring.
29  * x86 pagesize again. This is the default, used by transports like PCI
30  * which don't provide a means for the guest to tell the host the alignment.
31  */
32 #define VIRTIO_PCI_VRING_ALIGN         4096
33 
34 typedef struct VRingDesc
35 {
36     uint64_t addr;
37     uint32_t len;
38     uint16_t flags;
39     uint16_t next;
40 } VRingDesc;
41 
42 typedef struct VRingAvail
43 {
44     uint16_t flags;
45     uint16_t idx;
46     uint16_t ring[0];
47 } VRingAvail;
48 
49 typedef struct VRingUsedElem
50 {
51     uint32_t id;
52     uint32_t len;
53 } VRingUsedElem;
54 
55 typedef struct VRingUsed
56 {
57     uint16_t flags;
58     uint16_t idx;
59     VRingUsedElem ring[0];
60 } VRingUsed;
61 
62 typedef struct VRing
63 {
64     unsigned int num;
65     unsigned int num_default;
66     unsigned int align;
67     hwaddr desc;
68     hwaddr avail;
69     hwaddr used;
70 } VRing;
71 
72 struct VirtQueue
73 {
74     VRing vring;
75 
76     /* Next head to pop */
77     uint16_t last_avail_idx;
78 
79     /* Last avail_idx read from VQ. */
80     uint16_t shadow_avail_idx;
81 
82     uint16_t used_idx;
83 
84     /* Last used index value we have signalled on */
85     uint16_t signalled_used;
86 
87     /* Last used index value we have signalled on */
88     bool signalled_used_valid;
89 
90     /* Notification enabled? */
91     bool notification;
92 
93     uint16_t queue_index;
94 
95     int inuse;
96 
97     uint16_t vector;
98     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
99     VirtIODevice *vdev;
100     EventNotifier guest_notifier;
101     EventNotifier host_notifier;
102     QLIST_ENTRY(VirtQueue) node;
103 };
104 
105 /* virt queue functions */
106 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
107 {
108     VRing *vring = &vdev->vq[n].vring;
109 
110     if (!vring->desc) {
111         /* not yet setup -> nothing to do */
112         return;
113     }
114     vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
115     vring->used = vring_align(vring->avail +
116                               offsetof(VRingAvail, ring[vring->num]),
117                               vring->align);
118 }
119 
120 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
121                             hwaddr desc_pa, int i)
122 {
123     address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
124                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
125     virtio_tswap64s(vdev, &desc->addr);
126     virtio_tswap32s(vdev, &desc->len);
127     virtio_tswap16s(vdev, &desc->flags);
128     virtio_tswap16s(vdev, &desc->next);
129 }
130 
131 static inline uint16_t vring_avail_flags(VirtQueue *vq)
132 {
133     hwaddr pa;
134     pa = vq->vring.avail + offsetof(VRingAvail, flags);
135     return virtio_lduw_phys(vq->vdev, pa);
136 }
137 
138 static inline uint16_t vring_avail_idx(VirtQueue *vq)
139 {
140     hwaddr pa;
141     pa = vq->vring.avail + offsetof(VRingAvail, idx);
142     vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
143     return vq->shadow_avail_idx;
144 }
145 
146 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
147 {
148     hwaddr pa;
149     pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
150     return virtio_lduw_phys(vq->vdev, pa);
151 }
152 
153 static inline uint16_t vring_get_used_event(VirtQueue *vq)
154 {
155     return vring_avail_ring(vq, vq->vring.num);
156 }
157 
158 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
159                                     int i)
160 {
161     hwaddr pa;
162     virtio_tswap32s(vq->vdev, &uelem->id);
163     virtio_tswap32s(vq->vdev, &uelem->len);
164     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
165     address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
166                        (void *)uelem, sizeof(VRingUsedElem));
167 }
168 
169 static uint16_t vring_used_idx(VirtQueue *vq)
170 {
171     hwaddr pa;
172     pa = vq->vring.used + offsetof(VRingUsed, idx);
173     return virtio_lduw_phys(vq->vdev, pa);
174 }
175 
176 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
177 {
178     hwaddr pa;
179     pa = vq->vring.used + offsetof(VRingUsed, idx);
180     virtio_stw_phys(vq->vdev, pa, val);
181     vq->used_idx = val;
182 }
183 
184 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
185 {
186     VirtIODevice *vdev = vq->vdev;
187     hwaddr pa;
188     pa = vq->vring.used + offsetof(VRingUsed, flags);
189     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
190 }
191 
192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
193 {
194     VirtIODevice *vdev = vq->vdev;
195     hwaddr pa;
196     pa = vq->vring.used + offsetof(VRingUsed, flags);
197     virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
198 }
199 
200 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
201 {
202     hwaddr pa;
203     if (!vq->notification) {
204         return;
205     }
206     pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
207     virtio_stw_phys(vq->vdev, pa, val);
208 }
209 
210 void virtio_queue_set_notification(VirtQueue *vq, int enable)
211 {
212     vq->notification = enable;
213     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
214         vring_set_avail_event(vq, vring_avail_idx(vq));
215     } else if (enable) {
216         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
217     } else {
218         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
219     }
220     if (enable) {
221         /* Expose avail event/used flags before caller checks the avail idx. */
222         smp_mb();
223     }
224 }
225 
226 int virtio_queue_ready(VirtQueue *vq)
227 {
228     return vq->vring.avail != 0;
229 }
230 
231 /* Fetch avail_idx from VQ memory only when we really need to know if
232  * guest has added some buffers. */
233 int virtio_queue_empty(VirtQueue *vq)
234 {
235     if (vq->shadow_avail_idx != vq->last_avail_idx) {
236         return 0;
237     }
238 
239     return vring_avail_idx(vq) == vq->last_avail_idx;
240 }
241 
242 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
243                                unsigned int len)
244 {
245     unsigned int offset;
246     int i;
247 
248     offset = 0;
249     for (i = 0; i < elem->in_num; i++) {
250         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
251 
252         cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
253                                   elem->in_sg[i].iov_len,
254                                   1, size);
255 
256         offset += size;
257     }
258 
259     for (i = 0; i < elem->out_num; i++)
260         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
261                                   elem->out_sg[i].iov_len,
262                                   0, elem->out_sg[i].iov_len);
263 }
264 
265 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
266                        unsigned int len)
267 {
268     vq->last_avail_idx--;
269     virtqueue_unmap_sg(vq, elem, len);
270 }
271 
272 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
273                     unsigned int len, unsigned int idx)
274 {
275     VRingUsedElem uelem;
276 
277     trace_virtqueue_fill(vq, elem, len, idx);
278 
279     virtqueue_unmap_sg(vq, elem, len);
280 
281     idx = (idx + vq->used_idx) % vq->vring.num;
282 
283     uelem.id = elem->index;
284     uelem.len = len;
285     vring_used_write(vq, &uelem, idx);
286 }
287 
288 void virtqueue_flush(VirtQueue *vq, unsigned int count)
289 {
290     uint16_t old, new;
291     /* Make sure buffer is written before we update index. */
292     smp_wmb();
293     trace_virtqueue_flush(vq, count);
294     old = vq->used_idx;
295     new = old + count;
296     vring_used_idx_set(vq, new);
297     vq->inuse -= count;
298     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
299         vq->signalled_used_valid = false;
300 }
301 
302 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
303                     unsigned int len)
304 {
305     virtqueue_fill(vq, elem, len, 0);
306     virtqueue_flush(vq, 1);
307 }
308 
309 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
310 {
311     uint16_t num_heads = vring_avail_idx(vq) - idx;
312 
313     /* Check it isn't doing very strange things with descriptor numbers. */
314     if (num_heads > vq->vring.num) {
315         error_report("Guest moved used index from %u to %u",
316                      idx, vq->shadow_avail_idx);
317         exit(1);
318     }
319     /* On success, callers read a descriptor at vq->last_avail_idx.
320      * Make sure descriptor read does not bypass avail index read. */
321     if (num_heads) {
322         smp_rmb();
323     }
324 
325     return num_heads;
326 }
327 
328 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
329 {
330     unsigned int head;
331 
332     /* Grab the next descriptor number they're advertising, and increment
333      * the index we've seen. */
334     head = vring_avail_ring(vq, idx % vq->vring.num);
335 
336     /* If their number is silly, that's a fatal mistake. */
337     if (head >= vq->vring.num) {
338         error_report("Guest says index %u is available", head);
339         exit(1);
340     }
341 
342     return head;
343 }
344 
345 static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
346                                          hwaddr desc_pa, unsigned int max)
347 {
348     unsigned int next;
349 
350     /* If this descriptor says it doesn't chain, we're done. */
351     if (!(desc->flags & VRING_DESC_F_NEXT)) {
352         return max;
353     }
354 
355     /* Check they're not leading us off end of descriptors. */
356     next = desc->next;
357     /* Make sure compiler knows to grab that: we don't want it changing! */
358     smp_wmb();
359 
360     if (next >= max) {
361         error_report("Desc next is %u", next);
362         exit(1);
363     }
364 
365     vring_desc_read(vdev, desc, desc_pa, next);
366     return next;
367 }
368 
369 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
370                                unsigned int *out_bytes,
371                                unsigned max_in_bytes, unsigned max_out_bytes)
372 {
373     unsigned int idx;
374     unsigned int total_bufs, in_total, out_total;
375 
376     idx = vq->last_avail_idx;
377 
378     total_bufs = in_total = out_total = 0;
379     while (virtqueue_num_heads(vq, idx)) {
380         VirtIODevice *vdev = vq->vdev;
381         unsigned int max, num_bufs, indirect = 0;
382         VRingDesc desc;
383         hwaddr desc_pa;
384         int i;
385 
386         max = vq->vring.num;
387         num_bufs = total_bufs;
388         i = virtqueue_get_head(vq, idx++);
389         desc_pa = vq->vring.desc;
390         vring_desc_read(vdev, &desc, desc_pa, i);
391 
392         if (desc.flags & VRING_DESC_F_INDIRECT) {
393             if (desc.len % sizeof(VRingDesc)) {
394                 error_report("Invalid size for indirect buffer table");
395                 exit(1);
396             }
397 
398             /* If we've got too many, that implies a descriptor loop. */
399             if (num_bufs >= max) {
400                 error_report("Looped descriptor");
401                 exit(1);
402             }
403 
404             /* loop over the indirect descriptor table */
405             indirect = 1;
406             max = desc.len / sizeof(VRingDesc);
407             desc_pa = desc.addr;
408             num_bufs = i = 0;
409             vring_desc_read(vdev, &desc, desc_pa, i);
410         }
411 
412         do {
413             /* If we've got too many, that implies a descriptor loop. */
414             if (++num_bufs > max) {
415                 error_report("Looped descriptor");
416                 exit(1);
417             }
418 
419             if (desc.flags & VRING_DESC_F_WRITE) {
420                 in_total += desc.len;
421             } else {
422                 out_total += desc.len;
423             }
424             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
425                 goto done;
426             }
427         } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
428 
429         if (!indirect)
430             total_bufs = num_bufs;
431         else
432             total_bufs++;
433     }
434 done:
435     if (in_bytes) {
436         *in_bytes = in_total;
437     }
438     if (out_bytes) {
439         *out_bytes = out_total;
440     }
441 }
442 
443 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
444                           unsigned int out_bytes)
445 {
446     unsigned int in_total, out_total;
447 
448     virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
449     return in_bytes <= in_total && out_bytes <= out_total;
450 }
451 
452 static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
453                                unsigned int max_num_sg, bool is_write,
454                                hwaddr pa, size_t sz)
455 {
456     unsigned num_sg = *p_num_sg;
457     assert(num_sg <= max_num_sg);
458 
459     while (sz) {
460         hwaddr len = sz;
461 
462         if (num_sg == max_num_sg) {
463             error_report("virtio: too many write descriptors in indirect table");
464             exit(1);
465         }
466 
467         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
468         iov[num_sg].iov_len = len;
469         addr[num_sg] = pa;
470 
471         sz -= len;
472         pa += len;
473         num_sg++;
474     }
475     *p_num_sg = num_sg;
476 }
477 
478 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
479                                 unsigned int *num_sg, unsigned int max_size,
480                                 int is_write)
481 {
482     unsigned int i;
483     hwaddr len;
484 
485     /* Note: this function MUST validate input, some callers
486      * are passing in num_sg values received over the network.
487      */
488     /* TODO: teach all callers that this can fail, and return failure instead
489      * of asserting here.
490      * When we do, we might be able to re-enable NDEBUG below.
491      */
492 #ifdef NDEBUG
493 #error building with NDEBUG is not supported
494 #endif
495     assert(*num_sg <= max_size);
496 
497     for (i = 0; i < *num_sg; i++) {
498         len = sg[i].iov_len;
499         sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
500         if (!sg[i].iov_base) {
501             error_report("virtio: error trying to map MMIO memory");
502             exit(1);
503         }
504         if (len != sg[i].iov_len) {
505             error_report("virtio: unexpected memory split");
506             exit(1);
507         }
508     }
509 }
510 
511 void virtqueue_map(VirtQueueElement *elem)
512 {
513     virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
514                         VIRTQUEUE_MAX_SIZE, 1);
515     virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
516                         VIRTQUEUE_MAX_SIZE, 0);
517 }
518 
519 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
520 {
521     VirtQueueElement *elem;
522     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
523     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
524     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
525     size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
526     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
527     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
528 
529     assert(sz >= sizeof(VirtQueueElement));
530     elem = g_malloc(out_sg_end);
531     elem->out_num = out_num;
532     elem->in_num = in_num;
533     elem->in_addr = (void *)elem + in_addr_ofs;
534     elem->out_addr = (void *)elem + out_addr_ofs;
535     elem->in_sg = (void *)elem + in_sg_ofs;
536     elem->out_sg = (void *)elem + out_sg_ofs;
537     return elem;
538 }
539 
540 void *virtqueue_pop(VirtQueue *vq, size_t sz)
541 {
542     unsigned int i, head, max;
543     hwaddr desc_pa = vq->vring.desc;
544     VirtIODevice *vdev = vq->vdev;
545     VirtQueueElement *elem;
546     unsigned out_num, in_num;
547     hwaddr addr[VIRTQUEUE_MAX_SIZE];
548     struct iovec iov[VIRTQUEUE_MAX_SIZE];
549     VRingDesc desc;
550 
551     if (virtio_queue_empty(vq)) {
552         return NULL;
553     }
554     /* Needed after virtio_queue_empty(), see comment in
555      * virtqueue_num_heads(). */
556     smp_rmb();
557 
558     /* When we start there are none of either input nor output. */
559     out_num = in_num = 0;
560 
561     max = vq->vring.num;
562 
563     i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
564     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
565         vring_set_avail_event(vq, vq->last_avail_idx);
566     }
567 
568     vring_desc_read(vdev, &desc, desc_pa, i);
569     if (desc.flags & VRING_DESC_F_INDIRECT) {
570         if (desc.len % sizeof(VRingDesc)) {
571             error_report("Invalid size for indirect buffer table");
572             exit(1);
573         }
574 
575         /* loop over the indirect descriptor table */
576         max = desc.len / sizeof(VRingDesc);
577         desc_pa = desc.addr;
578         i = 0;
579         vring_desc_read(vdev, &desc, desc_pa, i);
580     }
581 
582     /* Collect all the descriptors */
583     do {
584         if (desc.flags & VRING_DESC_F_WRITE) {
585             virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
586                                VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
587         } else {
588             if (in_num) {
589                 error_report("Incorrect order for descriptors");
590                 exit(1);
591             }
592             virtqueue_map_desc(&out_num, addr, iov,
593                                VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
594         }
595 
596         /* If we've got too many, that implies a descriptor loop. */
597         if ((in_num + out_num) > max) {
598             error_report("Looped descriptor");
599             exit(1);
600         }
601     } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
602 
603     /* Now copy what we have collected and mapped */
604     elem = virtqueue_alloc_element(sz, out_num, in_num);
605     elem->index = head;
606     for (i = 0; i < out_num; i++) {
607         elem->out_addr[i] = addr[i];
608         elem->out_sg[i] = iov[i];
609     }
610     for (i = 0; i < in_num; i++) {
611         elem->in_addr[i] = addr[out_num + i];
612         elem->in_sg[i] = iov[out_num + i];
613     }
614 
615     vq->inuse++;
616 
617     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
618     return elem;
619 }
620 
621 /* Reading and writing a structure directly to QEMUFile is *awful*, but
622  * it is what QEMU has always done by mistake.  We can change it sooner
623  * or later by bumping the version number of the affected vm states.
624  * In the meanwhile, since the in-memory layout of VirtQueueElement
625  * has changed, we need to marshal to and from the layout that was
626  * used before the change.
627  */
628 typedef struct VirtQueueElementOld {
629     unsigned int index;
630     unsigned int out_num;
631     unsigned int in_num;
632     hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
633     hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
634     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
635     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
636 } VirtQueueElementOld;
637 
638 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
639 {
640     VirtQueueElement *elem;
641     VirtQueueElementOld data;
642     int i;
643 
644     qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
645 
646     elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
647     elem->index = data.index;
648 
649     for (i = 0; i < elem->in_num; i++) {
650         elem->in_addr[i] = data.in_addr[i];
651     }
652 
653     for (i = 0; i < elem->out_num; i++) {
654         elem->out_addr[i] = data.out_addr[i];
655     }
656 
657     for (i = 0; i < elem->in_num; i++) {
658         /* Base is overwritten by virtqueue_map.  */
659         elem->in_sg[i].iov_base = 0;
660         elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
661     }
662 
663     for (i = 0; i < elem->out_num; i++) {
664         /* Base is overwritten by virtqueue_map.  */
665         elem->out_sg[i].iov_base = 0;
666         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
667     }
668 
669     virtqueue_map(elem);
670     return elem;
671 }
672 
673 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
674 {
675     VirtQueueElementOld data;
676     int i;
677 
678     memset(&data, 0, sizeof(data));
679     data.index = elem->index;
680     data.in_num = elem->in_num;
681     data.out_num = elem->out_num;
682 
683     for (i = 0; i < elem->in_num; i++) {
684         data.in_addr[i] = elem->in_addr[i];
685     }
686 
687     for (i = 0; i < elem->out_num; i++) {
688         data.out_addr[i] = elem->out_addr[i];
689     }
690 
691     for (i = 0; i < elem->in_num; i++) {
692         /* Base is overwritten by virtqueue_map when loading.  Do not
693          * save it, as it would leak the QEMU address space layout.  */
694         data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
695     }
696 
697     for (i = 0; i < elem->out_num; i++) {
698         /* Do not save iov_base as above.  */
699         data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
700     }
701     qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
702 }
703 
704 /* virtio device */
705 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
706 {
707     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
708     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
709 
710     if (k->notify) {
711         k->notify(qbus->parent, vector);
712     }
713 }
714 
715 void virtio_update_irq(VirtIODevice *vdev)
716 {
717     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
718 }
719 
720 static int virtio_validate_features(VirtIODevice *vdev)
721 {
722     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
723 
724     if (k->validate_features) {
725         return k->validate_features(vdev);
726     } else {
727         return 0;
728     }
729 }
730 
731 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
732 {
733     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
734     trace_virtio_set_status(vdev, val);
735 
736     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
737         if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
738             val & VIRTIO_CONFIG_S_FEATURES_OK) {
739             int ret = virtio_validate_features(vdev);
740 
741             if (ret) {
742                 return ret;
743             }
744         }
745     }
746     if (k->set_status) {
747         k->set_status(vdev, val);
748     }
749     vdev->status = val;
750     return 0;
751 }
752 
753 bool target_words_bigendian(void);
754 static enum virtio_device_endian virtio_default_endian(void)
755 {
756     if (target_words_bigendian()) {
757         return VIRTIO_DEVICE_ENDIAN_BIG;
758     } else {
759         return VIRTIO_DEVICE_ENDIAN_LITTLE;
760     }
761 }
762 
763 static enum virtio_device_endian virtio_current_cpu_endian(void)
764 {
765     CPUClass *cc = CPU_GET_CLASS(current_cpu);
766 
767     if (cc->virtio_is_big_endian(current_cpu)) {
768         return VIRTIO_DEVICE_ENDIAN_BIG;
769     } else {
770         return VIRTIO_DEVICE_ENDIAN_LITTLE;
771     }
772 }
773 
774 void virtio_reset(void *opaque)
775 {
776     VirtIODevice *vdev = opaque;
777     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
778     int i;
779 
780     virtio_set_status(vdev, 0);
781     if (current_cpu) {
782         /* Guest initiated reset */
783         vdev->device_endian = virtio_current_cpu_endian();
784     } else {
785         /* System reset */
786         vdev->device_endian = virtio_default_endian();
787     }
788 
789     if (k->reset) {
790         k->reset(vdev);
791     }
792 
793     vdev->guest_features = 0;
794     vdev->queue_sel = 0;
795     vdev->status = 0;
796     vdev->isr = 0;
797     vdev->config_vector = VIRTIO_NO_VECTOR;
798     virtio_notify_vector(vdev, vdev->config_vector);
799 
800     for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
801         vdev->vq[i].vring.desc = 0;
802         vdev->vq[i].vring.avail = 0;
803         vdev->vq[i].vring.used = 0;
804         vdev->vq[i].last_avail_idx = 0;
805         vdev->vq[i].shadow_avail_idx = 0;
806         vdev->vq[i].used_idx = 0;
807         virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
808         vdev->vq[i].signalled_used = 0;
809         vdev->vq[i].signalled_used_valid = false;
810         vdev->vq[i].notification = true;
811         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
812     }
813 }
814 
815 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
816 {
817     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
818     uint8_t val;
819 
820     if (addr + sizeof(val) > vdev->config_len) {
821         return (uint32_t)-1;
822     }
823 
824     k->get_config(vdev, vdev->config);
825 
826     val = ldub_p(vdev->config + addr);
827     return val;
828 }
829 
830 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
831 {
832     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
833     uint16_t val;
834 
835     if (addr + sizeof(val) > vdev->config_len) {
836         return (uint32_t)-1;
837     }
838 
839     k->get_config(vdev, vdev->config);
840 
841     val = lduw_p(vdev->config + addr);
842     return val;
843 }
844 
845 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
846 {
847     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
848     uint32_t val;
849 
850     if (addr + sizeof(val) > vdev->config_len) {
851         return (uint32_t)-1;
852     }
853 
854     k->get_config(vdev, vdev->config);
855 
856     val = ldl_p(vdev->config + addr);
857     return val;
858 }
859 
860 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
861 {
862     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
863     uint8_t val = data;
864 
865     if (addr + sizeof(val) > vdev->config_len) {
866         return;
867     }
868 
869     stb_p(vdev->config + addr, val);
870 
871     if (k->set_config) {
872         k->set_config(vdev, vdev->config);
873     }
874 }
875 
876 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
877 {
878     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
879     uint16_t val = data;
880 
881     if (addr + sizeof(val) > vdev->config_len) {
882         return;
883     }
884 
885     stw_p(vdev->config + addr, val);
886 
887     if (k->set_config) {
888         k->set_config(vdev, vdev->config);
889     }
890 }
891 
892 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
893 {
894     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
895     uint32_t val = data;
896 
897     if (addr + sizeof(val) > vdev->config_len) {
898         return;
899     }
900 
901     stl_p(vdev->config + addr, val);
902 
903     if (k->set_config) {
904         k->set_config(vdev, vdev->config);
905     }
906 }
907 
908 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
909 {
910     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
911     uint8_t val;
912 
913     if (addr + sizeof(val) > vdev->config_len) {
914         return (uint32_t)-1;
915     }
916 
917     k->get_config(vdev, vdev->config);
918 
919     val = ldub_p(vdev->config + addr);
920     return val;
921 }
922 
923 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
924 {
925     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
926     uint16_t val;
927 
928     if (addr + sizeof(val) > vdev->config_len) {
929         return (uint32_t)-1;
930     }
931 
932     k->get_config(vdev, vdev->config);
933 
934     val = lduw_le_p(vdev->config + addr);
935     return val;
936 }
937 
938 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
939 {
940     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
941     uint32_t val;
942 
943     if (addr + sizeof(val) > vdev->config_len) {
944         return (uint32_t)-1;
945     }
946 
947     k->get_config(vdev, vdev->config);
948 
949     val = ldl_le_p(vdev->config + addr);
950     return val;
951 }
952 
953 void virtio_config_modern_writeb(VirtIODevice *vdev,
954                                  uint32_t addr, uint32_t data)
955 {
956     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
957     uint8_t val = data;
958 
959     if (addr + sizeof(val) > vdev->config_len) {
960         return;
961     }
962 
963     stb_p(vdev->config + addr, val);
964 
965     if (k->set_config) {
966         k->set_config(vdev, vdev->config);
967     }
968 }
969 
970 void virtio_config_modern_writew(VirtIODevice *vdev,
971                                  uint32_t addr, uint32_t data)
972 {
973     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
974     uint16_t val = data;
975 
976     if (addr + sizeof(val) > vdev->config_len) {
977         return;
978     }
979 
980     stw_le_p(vdev->config + addr, val);
981 
982     if (k->set_config) {
983         k->set_config(vdev, vdev->config);
984     }
985 }
986 
987 void virtio_config_modern_writel(VirtIODevice *vdev,
988                                  uint32_t addr, uint32_t data)
989 {
990     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
991     uint32_t val = data;
992 
993     if (addr + sizeof(val) > vdev->config_len) {
994         return;
995     }
996 
997     stl_le_p(vdev->config + addr, val);
998 
999     if (k->set_config) {
1000         k->set_config(vdev, vdev->config);
1001     }
1002 }
1003 
1004 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1005 {
1006     vdev->vq[n].vring.desc = addr;
1007     virtio_queue_update_rings(vdev, n);
1008 }
1009 
1010 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1011 {
1012     return vdev->vq[n].vring.desc;
1013 }
1014 
1015 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1016                             hwaddr avail, hwaddr used)
1017 {
1018     vdev->vq[n].vring.desc = desc;
1019     vdev->vq[n].vring.avail = avail;
1020     vdev->vq[n].vring.used = used;
1021 }
1022 
1023 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1024 {
1025     /* Don't allow guest to flip queue between existent and
1026      * nonexistent states, or to set it to an invalid size.
1027      */
1028     if (!!num != !!vdev->vq[n].vring.num ||
1029         num > VIRTQUEUE_MAX_SIZE ||
1030         num < 0) {
1031         return;
1032     }
1033     vdev->vq[n].vring.num = num;
1034 }
1035 
1036 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1037 {
1038     return QLIST_FIRST(&vdev->vector_queues[vector]);
1039 }
1040 
1041 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1042 {
1043     return QLIST_NEXT(vq, node);
1044 }
1045 
1046 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1047 {
1048     return vdev->vq[n].vring.num;
1049 }
1050 
1051 int virtio_get_num_queues(VirtIODevice *vdev)
1052 {
1053     int i;
1054 
1055     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1056         if (!virtio_queue_get_num(vdev, i)) {
1057             break;
1058         }
1059     }
1060 
1061     return i;
1062 }
1063 
1064 int virtio_queue_get_id(VirtQueue *vq)
1065 {
1066     VirtIODevice *vdev = vq->vdev;
1067     assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_QUEUE_MAX]);
1068     return vq - &vdev->vq[0];
1069 }
1070 
1071 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1072 {
1073     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1074     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1075 
1076     /* virtio-1 compliant devices cannot change the alignment */
1077     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1078         error_report("tried to modify queue alignment for virtio-1 device");
1079         return;
1080     }
1081     /* Check that the transport told us it was going to do this
1082      * (so a buggy transport will immediately assert rather than
1083      * silently failing to migrate this state)
1084      */
1085     assert(k->has_variable_vring_alignment);
1086 
1087     vdev->vq[n].vring.align = align;
1088     virtio_queue_update_rings(vdev, n);
1089 }
1090 
1091 void virtio_queue_notify_vq(VirtQueue *vq)
1092 {
1093     if (vq->vring.desc && vq->handle_output) {
1094         VirtIODevice *vdev = vq->vdev;
1095 
1096         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1097         vq->handle_output(vdev, vq);
1098     }
1099 }
1100 
1101 void virtio_queue_notify(VirtIODevice *vdev, int n)
1102 {
1103     virtio_queue_notify_vq(&vdev->vq[n]);
1104 }
1105 
1106 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1107 {
1108     return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1109         VIRTIO_NO_VECTOR;
1110 }
1111 
1112 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1113 {
1114     VirtQueue *vq = &vdev->vq[n];
1115 
1116     if (n < VIRTIO_QUEUE_MAX) {
1117         if (vdev->vector_queues &&
1118             vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1119             QLIST_REMOVE(vq, node);
1120         }
1121         vdev->vq[n].vector = vector;
1122         if (vdev->vector_queues &&
1123             vector != VIRTIO_NO_VECTOR) {
1124             QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1125         }
1126     }
1127 }
1128 
1129 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1130                             void (*handle_output)(VirtIODevice *, VirtQueue *))
1131 {
1132     int i;
1133 
1134     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1135         if (vdev->vq[i].vring.num == 0)
1136             break;
1137     }
1138 
1139     if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1140         abort();
1141 
1142     vdev->vq[i].vring.num = queue_size;
1143     vdev->vq[i].vring.num_default = queue_size;
1144     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1145     vdev->vq[i].handle_output = handle_output;
1146 
1147     return &vdev->vq[i];
1148 }
1149 
1150 void virtio_del_queue(VirtIODevice *vdev, int n)
1151 {
1152     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1153         abort();
1154     }
1155 
1156     vdev->vq[n].vring.num = 0;
1157     vdev->vq[n].vring.num_default = 0;
1158 }
1159 
1160 void virtio_irq(VirtQueue *vq)
1161 {
1162     trace_virtio_irq(vq);
1163     vq->vdev->isr |= 0x01;
1164     virtio_notify_vector(vq->vdev, vq->vector);
1165 }
1166 
1167 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1168 {
1169     uint16_t old, new;
1170     bool v;
1171     /* We need to expose used array entries before checking used event. */
1172     smp_mb();
1173     /* Always notify when queue is empty (when feature acknowledge) */
1174     if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1175         !vq->inuse && virtio_queue_empty(vq)) {
1176         return true;
1177     }
1178 
1179     if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1180         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1181     }
1182 
1183     v = vq->signalled_used_valid;
1184     vq->signalled_used_valid = true;
1185     old = vq->signalled_used;
1186     new = vq->signalled_used = vq->used_idx;
1187     return !v || vring_need_event(vring_get_used_event(vq), new, old);
1188 }
1189 
1190 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1191 {
1192     if (!virtio_should_notify(vdev, vq)) {
1193         return;
1194     }
1195 
1196     trace_virtio_notify(vdev, vq);
1197     vdev->isr |= 0x01;
1198     virtio_notify_vector(vdev, vq->vector);
1199 }
1200 
1201 void virtio_notify_config(VirtIODevice *vdev)
1202 {
1203     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1204         return;
1205 
1206     vdev->isr |= 0x03;
1207     vdev->generation++;
1208     virtio_notify_vector(vdev, vdev->config_vector);
1209 }
1210 
1211 static bool virtio_device_endian_needed(void *opaque)
1212 {
1213     VirtIODevice *vdev = opaque;
1214 
1215     assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1216     if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1217         return vdev->device_endian != virtio_default_endian();
1218     }
1219     /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1220     return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1221 }
1222 
1223 static bool virtio_64bit_features_needed(void *opaque)
1224 {
1225     VirtIODevice *vdev = opaque;
1226 
1227     return (vdev->host_features >> 32) != 0;
1228 }
1229 
1230 static bool virtio_virtqueue_needed(void *opaque)
1231 {
1232     VirtIODevice *vdev = opaque;
1233 
1234     return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1235 }
1236 
1237 static bool virtio_ringsize_needed(void *opaque)
1238 {
1239     VirtIODevice *vdev = opaque;
1240     int i;
1241 
1242     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1243         if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1244             return true;
1245         }
1246     }
1247     return false;
1248 }
1249 
1250 static bool virtio_extra_state_needed(void *opaque)
1251 {
1252     VirtIODevice *vdev = opaque;
1253     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1254     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1255 
1256     return k->has_extra_state &&
1257         k->has_extra_state(qbus->parent);
1258 }
1259 
1260 static const VMStateDescription vmstate_virtqueue = {
1261     .name = "virtqueue_state",
1262     .version_id = 1,
1263     .minimum_version_id = 1,
1264     .fields = (VMStateField[]) {
1265         VMSTATE_UINT64(vring.avail, struct VirtQueue),
1266         VMSTATE_UINT64(vring.used, struct VirtQueue),
1267         VMSTATE_END_OF_LIST()
1268     }
1269 };
1270 
1271 static const VMStateDescription vmstate_virtio_virtqueues = {
1272     .name = "virtio/virtqueues",
1273     .version_id = 1,
1274     .minimum_version_id = 1,
1275     .needed = &virtio_virtqueue_needed,
1276     .fields = (VMStateField[]) {
1277         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1278                       VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1279         VMSTATE_END_OF_LIST()
1280     }
1281 };
1282 
1283 static const VMStateDescription vmstate_ringsize = {
1284     .name = "ringsize_state",
1285     .version_id = 1,
1286     .minimum_version_id = 1,
1287     .fields = (VMStateField[]) {
1288         VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1289         VMSTATE_END_OF_LIST()
1290     }
1291 };
1292 
1293 static const VMStateDescription vmstate_virtio_ringsize = {
1294     .name = "virtio/ringsize",
1295     .version_id = 1,
1296     .minimum_version_id = 1,
1297     .needed = &virtio_ringsize_needed,
1298     .fields = (VMStateField[]) {
1299         VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1300                       VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1301         VMSTATE_END_OF_LIST()
1302     }
1303 };
1304 
1305 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1306 {
1307     VirtIODevice *vdev = pv;
1308     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1309     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1310 
1311     if (!k->load_extra_state) {
1312         return -1;
1313     } else {
1314         return k->load_extra_state(qbus->parent, f);
1315     }
1316 }
1317 
1318 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1319 {
1320     VirtIODevice *vdev = pv;
1321     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1322     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1323 
1324     k->save_extra_state(qbus->parent, f);
1325 }
1326 
1327 static const VMStateInfo vmstate_info_extra_state = {
1328     .name = "virtqueue_extra_state",
1329     .get = get_extra_state,
1330     .put = put_extra_state,
1331 };
1332 
1333 static const VMStateDescription vmstate_virtio_extra_state = {
1334     .name = "virtio/extra_state",
1335     .version_id = 1,
1336     .minimum_version_id = 1,
1337     .needed = &virtio_extra_state_needed,
1338     .fields = (VMStateField[]) {
1339         {
1340             .name         = "extra_state",
1341             .version_id   = 0,
1342             .field_exists = NULL,
1343             .size         = 0,
1344             .info         = &vmstate_info_extra_state,
1345             .flags        = VMS_SINGLE,
1346             .offset       = 0,
1347         },
1348         VMSTATE_END_OF_LIST()
1349     }
1350 };
1351 
1352 static const VMStateDescription vmstate_virtio_device_endian = {
1353     .name = "virtio/device_endian",
1354     .version_id = 1,
1355     .minimum_version_id = 1,
1356     .needed = &virtio_device_endian_needed,
1357     .fields = (VMStateField[]) {
1358         VMSTATE_UINT8(device_endian, VirtIODevice),
1359         VMSTATE_END_OF_LIST()
1360     }
1361 };
1362 
1363 static const VMStateDescription vmstate_virtio_64bit_features = {
1364     .name = "virtio/64bit_features",
1365     .version_id = 1,
1366     .minimum_version_id = 1,
1367     .needed = &virtio_64bit_features_needed,
1368     .fields = (VMStateField[]) {
1369         VMSTATE_UINT64(guest_features, VirtIODevice),
1370         VMSTATE_END_OF_LIST()
1371     }
1372 };
1373 
1374 static const VMStateDescription vmstate_virtio = {
1375     .name = "virtio",
1376     .version_id = 1,
1377     .minimum_version_id = 1,
1378     .minimum_version_id_old = 1,
1379     .fields = (VMStateField[]) {
1380         VMSTATE_END_OF_LIST()
1381     },
1382     .subsections = (const VMStateDescription*[]) {
1383         &vmstate_virtio_device_endian,
1384         &vmstate_virtio_64bit_features,
1385         &vmstate_virtio_virtqueues,
1386         &vmstate_virtio_ringsize,
1387         &vmstate_virtio_extra_state,
1388         NULL
1389     }
1390 };
1391 
1392 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1393 {
1394     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1395     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1396     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1397     uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1398     int i;
1399 
1400     if (k->save_config) {
1401         k->save_config(qbus->parent, f);
1402     }
1403 
1404     qemu_put_8s(f, &vdev->status);
1405     qemu_put_8s(f, &vdev->isr);
1406     qemu_put_be16s(f, &vdev->queue_sel);
1407     qemu_put_be32s(f, &guest_features_lo);
1408     qemu_put_be32(f, vdev->config_len);
1409     qemu_put_buffer(f, vdev->config, vdev->config_len);
1410 
1411     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1412         if (vdev->vq[i].vring.num == 0)
1413             break;
1414     }
1415 
1416     qemu_put_be32(f, i);
1417 
1418     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1419         if (vdev->vq[i].vring.num == 0)
1420             break;
1421 
1422         qemu_put_be32(f, vdev->vq[i].vring.num);
1423         if (k->has_variable_vring_alignment) {
1424             qemu_put_be32(f, vdev->vq[i].vring.align);
1425         }
1426         /* XXX virtio-1 devices */
1427         qemu_put_be64(f, vdev->vq[i].vring.desc);
1428         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1429         if (k->save_queue) {
1430             k->save_queue(qbus->parent, i, f);
1431         }
1432     }
1433 
1434     if (vdc->save != NULL) {
1435         vdc->save(vdev, f);
1436     }
1437 
1438     /* Subsections */
1439     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1440 }
1441 
1442 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1443 {
1444     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1445     bool bad = (val & ~(vdev->host_features)) != 0;
1446 
1447     val &= vdev->host_features;
1448     if (k->set_features) {
1449         k->set_features(vdev, val);
1450     }
1451     vdev->guest_features = val;
1452     return bad ? -1 : 0;
1453 }
1454 
1455 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1456 {
1457    /*
1458      * The driver must not attempt to set features after feature negotiation
1459      * has finished.
1460      */
1461     if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1462         return -EINVAL;
1463     }
1464     return virtio_set_features_nocheck(vdev, val);
1465 }
1466 
1467 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1468 {
1469     int i, ret;
1470     int32_t config_len;
1471     uint32_t num;
1472     uint32_t features;
1473     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1474     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1475     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1476 
1477     /*
1478      * We poison the endianness to ensure it does not get used before
1479      * subsections have been loaded.
1480      */
1481     vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1482 
1483     if (k->load_config) {
1484         ret = k->load_config(qbus->parent, f);
1485         if (ret)
1486             return ret;
1487     }
1488 
1489     qemu_get_8s(f, &vdev->status);
1490     qemu_get_8s(f, &vdev->isr);
1491     qemu_get_be16s(f, &vdev->queue_sel);
1492     if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1493         return -1;
1494     }
1495     qemu_get_be32s(f, &features);
1496 
1497     config_len = qemu_get_be32(f);
1498 
1499     /*
1500      * There are cases where the incoming config can be bigger or smaller
1501      * than what we have; so load what we have space for, and skip
1502      * any excess that's in the stream.
1503      */
1504     qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1505 
1506     while (config_len > vdev->config_len) {
1507         qemu_get_byte(f);
1508         config_len--;
1509     }
1510 
1511     num = qemu_get_be32(f);
1512 
1513     if (num > VIRTIO_QUEUE_MAX) {
1514         error_report("Invalid number of virtqueues: 0x%x", num);
1515         return -1;
1516     }
1517 
1518     for (i = 0; i < num; i++) {
1519         vdev->vq[i].vring.num = qemu_get_be32(f);
1520         if (k->has_variable_vring_alignment) {
1521             vdev->vq[i].vring.align = qemu_get_be32(f);
1522         }
1523         vdev->vq[i].vring.desc = qemu_get_be64(f);
1524         qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1525         vdev->vq[i].signalled_used_valid = false;
1526         vdev->vq[i].notification = true;
1527 
1528         if (vdev->vq[i].vring.desc) {
1529             /* XXX virtio-1 devices */
1530             virtio_queue_update_rings(vdev, i);
1531         } else if (vdev->vq[i].last_avail_idx) {
1532             error_report("VQ %d address 0x0 "
1533                          "inconsistent with Host index 0x%x",
1534                          i, vdev->vq[i].last_avail_idx);
1535                 return -1;
1536 	}
1537         if (k->load_queue) {
1538             ret = k->load_queue(qbus->parent, i, f);
1539             if (ret)
1540                 return ret;
1541         }
1542     }
1543 
1544     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1545 
1546     if (vdc->load != NULL) {
1547         ret = vdc->load(vdev, f, version_id);
1548         if (ret) {
1549             return ret;
1550         }
1551     }
1552 
1553     /* Subsections */
1554     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1555     if (ret) {
1556         return ret;
1557     }
1558 
1559     if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1560         vdev->device_endian = virtio_default_endian();
1561     }
1562 
1563     if (virtio_64bit_features_needed(vdev)) {
1564         /*
1565          * Subsection load filled vdev->guest_features.  Run them
1566          * through virtio_set_features to sanity-check them against
1567          * host_features.
1568          */
1569         uint64_t features64 = vdev->guest_features;
1570         if (virtio_set_features_nocheck(vdev, features64) < 0) {
1571             error_report("Features 0x%" PRIx64 " unsupported. "
1572                          "Allowed features: 0x%" PRIx64,
1573                          features64, vdev->host_features);
1574             return -1;
1575         }
1576     } else {
1577         if (virtio_set_features_nocheck(vdev, features) < 0) {
1578             error_report("Features 0x%x unsupported. "
1579                          "Allowed features: 0x%" PRIx64,
1580                          features, vdev->host_features);
1581             return -1;
1582         }
1583     }
1584 
1585     for (i = 0; i < num; i++) {
1586         if (vdev->vq[i].vring.desc) {
1587             uint16_t nheads;
1588             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1589             /* Check it isn't doing strange things with descriptor numbers. */
1590             if (nheads > vdev->vq[i].vring.num) {
1591                 error_report("VQ %d size 0x%x Guest index 0x%x "
1592                              "inconsistent with Host index 0x%x: delta 0x%x",
1593                              i, vdev->vq[i].vring.num,
1594                              vring_avail_idx(&vdev->vq[i]),
1595                              vdev->vq[i].last_avail_idx, nheads);
1596                 return -1;
1597             }
1598             vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1599             vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1600         }
1601     }
1602 
1603     return 0;
1604 }
1605 
1606 void virtio_cleanup(VirtIODevice *vdev)
1607 {
1608     qemu_del_vm_change_state_handler(vdev->vmstate);
1609     g_free(vdev->config);
1610     g_free(vdev->vq);
1611     g_free(vdev->vector_queues);
1612 }
1613 
1614 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1615 {
1616     VirtIODevice *vdev = opaque;
1617     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1618     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1619     bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1620     vdev->vm_running = running;
1621 
1622     if (backend_run) {
1623         virtio_set_status(vdev, vdev->status);
1624     }
1625 
1626     if (k->vmstate_change) {
1627         k->vmstate_change(qbus->parent, backend_run);
1628     }
1629 
1630     if (!backend_run) {
1631         virtio_set_status(vdev, vdev->status);
1632     }
1633 }
1634 
1635 void virtio_instance_init_common(Object *proxy_obj, void *data,
1636                                  size_t vdev_size, const char *vdev_name)
1637 {
1638     DeviceState *vdev = data;
1639 
1640     object_initialize(vdev, vdev_size, vdev_name);
1641     object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1642     object_unref(OBJECT(vdev));
1643     qdev_alias_all_properties(vdev, proxy_obj);
1644 }
1645 
1646 void virtio_init(VirtIODevice *vdev, const char *name,
1647                  uint16_t device_id, size_t config_size)
1648 {
1649     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1650     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1651     int i;
1652     int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1653 
1654     if (nvectors) {
1655         vdev->vector_queues =
1656             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1657     }
1658 
1659     vdev->device_id = device_id;
1660     vdev->status = 0;
1661     vdev->isr = 0;
1662     vdev->queue_sel = 0;
1663     vdev->config_vector = VIRTIO_NO_VECTOR;
1664     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1665     vdev->vm_running = runstate_is_running();
1666     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1667         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1668         vdev->vq[i].vdev = vdev;
1669         vdev->vq[i].queue_index = i;
1670     }
1671 
1672     vdev->name = name;
1673     vdev->config_len = config_size;
1674     if (vdev->config_len) {
1675         vdev->config = g_malloc0(config_size);
1676     } else {
1677         vdev->config = NULL;
1678     }
1679     vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1680                                                      vdev);
1681     vdev->device_endian = virtio_default_endian();
1682     vdev->use_guest_notifier_mask = true;
1683 }
1684 
1685 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1686 {
1687     return vdev->vq[n].vring.desc;
1688 }
1689 
1690 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1691 {
1692     return vdev->vq[n].vring.avail;
1693 }
1694 
1695 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1696 {
1697     return vdev->vq[n].vring.used;
1698 }
1699 
1700 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1701 {
1702     return vdev->vq[n].vring.desc;
1703 }
1704 
1705 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1706 {
1707     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1708 }
1709 
1710 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1711 {
1712     return offsetof(VRingAvail, ring) +
1713         sizeof(uint16_t) * vdev->vq[n].vring.num;
1714 }
1715 
1716 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1717 {
1718     return offsetof(VRingUsed, ring) +
1719         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1720 }
1721 
1722 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1723 {
1724     return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1725 	    virtio_queue_get_used_size(vdev, n);
1726 }
1727 
1728 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1729 {
1730     return vdev->vq[n].last_avail_idx;
1731 }
1732 
1733 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1734 {
1735     vdev->vq[n].last_avail_idx = idx;
1736     vdev->vq[n].shadow_avail_idx = idx;
1737 }
1738 
1739 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1740 {
1741     vdev->vq[n].signalled_used_valid = false;
1742 }
1743 
1744 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1745 {
1746     return vdev->vq + n;
1747 }
1748 
1749 uint16_t virtio_get_queue_index(VirtQueue *vq)
1750 {
1751     return vq->queue_index;
1752 }
1753 
1754 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1755 {
1756     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1757     if (event_notifier_test_and_clear(n)) {
1758         virtio_irq(vq);
1759     }
1760 }
1761 
1762 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1763                                                 bool with_irqfd)
1764 {
1765     if (assign && !with_irqfd) {
1766         event_notifier_set_handler(&vq->guest_notifier,
1767                                    virtio_queue_guest_notifier_read);
1768     } else {
1769         event_notifier_set_handler(&vq->guest_notifier, NULL);
1770     }
1771     if (!assign) {
1772         /* Test and clear notifier before closing it,
1773          * in case poll callback didn't have time to run. */
1774         virtio_queue_guest_notifier_read(&vq->guest_notifier);
1775     }
1776 }
1777 
1778 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1779 {
1780     return &vq->guest_notifier;
1781 }
1782 
1783 static void virtio_queue_host_notifier_read(EventNotifier *n)
1784 {
1785     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1786     if (event_notifier_test_and_clear(n)) {
1787         virtio_queue_notify_vq(vq);
1788     }
1789 }
1790 
1791 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
1792                                                 bool assign, bool set_handler)
1793 {
1794     if (assign && set_handler) {
1795         aio_set_event_notifier(ctx, &vq->host_notifier, true,
1796                                virtio_queue_host_notifier_read);
1797     } else {
1798         aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
1799     }
1800     if (!assign) {
1801         /* Test and clear notifier before after disabling event,
1802          * in case poll callback didn't have time to run. */
1803         virtio_queue_host_notifier_read(&vq->host_notifier);
1804     }
1805 }
1806 
1807 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1808                                                bool set_handler)
1809 {
1810     if (assign && set_handler) {
1811         event_notifier_set_handler(&vq->host_notifier,
1812                                    virtio_queue_host_notifier_read);
1813     } else {
1814         event_notifier_set_handler(&vq->host_notifier, NULL);
1815     }
1816     if (!assign) {
1817         /* Test and clear notifier before after disabling event,
1818          * in case poll callback didn't have time to run. */
1819         virtio_queue_host_notifier_read(&vq->host_notifier);
1820     }
1821 }
1822 
1823 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1824 {
1825     return &vq->host_notifier;
1826 }
1827 
1828 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1829 {
1830     g_free(vdev->bus_name);
1831     vdev->bus_name = g_strdup(bus_name);
1832 }
1833 
1834 static void virtio_device_realize(DeviceState *dev, Error **errp)
1835 {
1836     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1837     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1838     Error *err = NULL;
1839 
1840     if (vdc->realize != NULL) {
1841         vdc->realize(dev, &err);
1842         if (err != NULL) {
1843             error_propagate(errp, err);
1844             return;
1845         }
1846     }
1847 
1848     virtio_bus_device_plugged(vdev, &err);
1849     if (err != NULL) {
1850         error_propagate(errp, err);
1851         return;
1852     }
1853 }
1854 
1855 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1856 {
1857     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1858     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
1859     Error *err = NULL;
1860 
1861     virtio_bus_device_unplugged(vdev);
1862 
1863     if (vdc->unrealize != NULL) {
1864         vdc->unrealize(dev, &err);
1865         if (err != NULL) {
1866             error_propagate(errp, err);
1867             return;
1868         }
1869     }
1870 
1871     g_free(vdev->bus_name);
1872     vdev->bus_name = NULL;
1873 }
1874 
1875 static Property virtio_properties[] = {
1876     DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
1877     DEFINE_PROP_END_OF_LIST(),
1878 };
1879 
1880 static void virtio_device_class_init(ObjectClass *klass, void *data)
1881 {
1882     /* Set the default value here. */
1883     DeviceClass *dc = DEVICE_CLASS(klass);
1884 
1885     dc->realize = virtio_device_realize;
1886     dc->unrealize = virtio_device_unrealize;
1887     dc->bus_type = TYPE_VIRTIO_BUS;
1888     dc->props = virtio_properties;
1889 }
1890 
1891 static const TypeInfo virtio_device_info = {
1892     .name = TYPE_VIRTIO_DEVICE,
1893     .parent = TYPE_DEVICE,
1894     .instance_size = sizeof(VirtIODevice),
1895     .class_init = virtio_device_class_init,
1896     .abstract = true,
1897     .class_size = sizeof(VirtioDeviceClass),
1898 };
1899 
1900 static void virtio_register_types(void)
1901 {
1902     type_register_static(&virtio_device_info);
1903 }
1904 
1905 type_init(virtio_register_types)
1906