xref: /qemu/hw/virtio/vhost-shadow-virtqueue.c (revision 100890f7)
110857ec0SEugenio Pérez /*
210857ec0SEugenio Pérez  * vhost shadow virtqueue
310857ec0SEugenio Pérez  *
410857ec0SEugenio Pérez  * SPDX-FileCopyrightText: Red Hat, Inc. 2021
510857ec0SEugenio Pérez  * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
610857ec0SEugenio Pérez  *
710857ec0SEugenio Pérez  * SPDX-License-Identifier: GPL-2.0-or-later
810857ec0SEugenio Pérez  */
910857ec0SEugenio Pérez 
1010857ec0SEugenio Pérez #include "qemu/osdep.h"
1110857ec0SEugenio Pérez #include "hw/virtio/vhost-shadow-virtqueue.h"
1210857ec0SEugenio Pérez 
1310857ec0SEugenio Pérez #include "qemu/error-report.h"
144725a418SEugenio Pérez #include "qapi/error.h"
15dff4426fSEugenio Pérez #include "qemu/main-loop.h"
16*100890f7SEugenio Pérez #include "qemu/log.h"
17*100890f7SEugenio Pérez #include "qemu/memalign.h"
18dff4426fSEugenio Pérez #include "linux-headers/linux/vhost.h"
19dff4426fSEugenio Pérez 
20dff4426fSEugenio Pérez /**
214725a418SEugenio Pérez  * Validate the transport device features that both guests can use with the SVQ
224725a418SEugenio Pérez  * and SVQs can use with the device.
234725a418SEugenio Pérez  *
244725a418SEugenio Pérez  * @dev_features: The features
254725a418SEugenio Pérez  * @errp: Error pointer
264725a418SEugenio Pérez  */
274725a418SEugenio Pérez bool vhost_svq_valid_features(uint64_t features, Error **errp)
284725a418SEugenio Pérez {
294725a418SEugenio Pérez     bool ok = true;
304725a418SEugenio Pérez     uint64_t svq_features = features;
314725a418SEugenio Pérez 
324725a418SEugenio Pérez     for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
334725a418SEugenio Pérez          ++b) {
344725a418SEugenio Pérez         switch (b) {
354725a418SEugenio Pérez         case VIRTIO_F_ANY_LAYOUT:
364725a418SEugenio Pérez             continue;
374725a418SEugenio Pérez 
384725a418SEugenio Pérez         case VIRTIO_F_ACCESS_PLATFORM:
394725a418SEugenio Pérez             /* SVQ trust in the host's IOMMU to translate addresses */
404725a418SEugenio Pérez         case VIRTIO_F_VERSION_1:
414725a418SEugenio Pérez             /* SVQ trust that the guest vring is little endian */
424725a418SEugenio Pérez             if (!(svq_features & BIT_ULL(b))) {
434725a418SEugenio Pérez                 svq_features |= BIT_ULL(b);
444725a418SEugenio Pérez                 ok = false;
454725a418SEugenio Pérez             }
464725a418SEugenio Pérez             continue;
474725a418SEugenio Pérez 
484725a418SEugenio Pérez         default:
494725a418SEugenio Pérez             if (svq_features & BIT_ULL(b)) {
504725a418SEugenio Pérez                 svq_features &= ~BIT_ULL(b);
514725a418SEugenio Pérez                 ok = false;
524725a418SEugenio Pérez             }
534725a418SEugenio Pérez         }
544725a418SEugenio Pérez     }
554725a418SEugenio Pérez 
564725a418SEugenio Pérez     if (!ok) {
574725a418SEugenio Pérez         error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
584725a418SEugenio Pérez                          ", ok: 0x%"PRIx64, features, svq_features);
594725a418SEugenio Pérez     }
604725a418SEugenio Pérez     return ok;
614725a418SEugenio Pérez }
624725a418SEugenio Pérez 
634725a418SEugenio Pérez /**
64*100890f7SEugenio Pérez  * Number of descriptors that the SVQ can make available from the guest.
65dff4426fSEugenio Pérez  *
66*100890f7SEugenio Pérez  * @svq: The svq
67dff4426fSEugenio Pérez  */
68*100890f7SEugenio Pérez static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
69dff4426fSEugenio Pérez {
70*100890f7SEugenio Pérez     return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
71*100890f7SEugenio Pérez }
72*100890f7SEugenio Pérez 
73*100890f7SEugenio Pérez static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
74*100890f7SEugenio Pérez                                     const struct iovec *iovec, size_t num,
75*100890f7SEugenio Pérez                                     bool more_descs, bool write)
76*100890f7SEugenio Pérez {
77*100890f7SEugenio Pérez     uint16_t i = svq->free_head, last = svq->free_head;
78*100890f7SEugenio Pérez     unsigned n;
79*100890f7SEugenio Pérez     uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
80*100890f7SEugenio Pérez     vring_desc_t *descs = svq->vring.desc;
81*100890f7SEugenio Pérez 
82*100890f7SEugenio Pérez     if (num == 0) {
83*100890f7SEugenio Pérez         return;
84*100890f7SEugenio Pérez     }
85*100890f7SEugenio Pérez 
86*100890f7SEugenio Pérez     for (n = 0; n < num; n++) {
87*100890f7SEugenio Pérez         if (more_descs || (n + 1 < num)) {
88*100890f7SEugenio Pérez             descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
89*100890f7SEugenio Pérez         } else {
90*100890f7SEugenio Pérez             descs[i].flags = flags;
91*100890f7SEugenio Pérez         }
92*100890f7SEugenio Pérez         descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
93*100890f7SEugenio Pérez         descs[i].len = cpu_to_le32(iovec[n].iov_len);
94*100890f7SEugenio Pérez 
95*100890f7SEugenio Pérez         last = i;
96*100890f7SEugenio Pérez         i = cpu_to_le16(descs[i].next);
97*100890f7SEugenio Pérez     }
98*100890f7SEugenio Pérez 
99*100890f7SEugenio Pérez     svq->free_head = le16_to_cpu(descs[last].next);
100*100890f7SEugenio Pérez }
101*100890f7SEugenio Pérez 
102*100890f7SEugenio Pérez static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
103*100890f7SEugenio Pérez                                 VirtQueueElement *elem, unsigned *head)
104*100890f7SEugenio Pérez {
105*100890f7SEugenio Pérez     unsigned avail_idx;
106*100890f7SEugenio Pérez     vring_avail_t *avail = svq->vring.avail;
107*100890f7SEugenio Pérez 
108*100890f7SEugenio Pérez     *head = svq->free_head;
109*100890f7SEugenio Pérez 
110*100890f7SEugenio Pérez     /* We need some descriptors here */
111*100890f7SEugenio Pérez     if (unlikely(!elem->out_num && !elem->in_num)) {
112*100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
113*100890f7SEugenio Pérez                       "Guest provided element with no descriptors");
114*100890f7SEugenio Pérez         return false;
115*100890f7SEugenio Pérez     }
116*100890f7SEugenio Pérez 
117*100890f7SEugenio Pérez     vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
118*100890f7SEugenio Pérez                             false);
119*100890f7SEugenio Pérez     vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
120*100890f7SEugenio Pérez 
121*100890f7SEugenio Pérez     /*
122*100890f7SEugenio Pérez      * Put the entry in the available array (but don't update avail->idx until
123*100890f7SEugenio Pérez      * they do sync).
124*100890f7SEugenio Pérez      */
125*100890f7SEugenio Pérez     avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
126*100890f7SEugenio Pérez     avail->ring[avail_idx] = cpu_to_le16(*head);
127*100890f7SEugenio Pérez     svq->shadow_avail_idx++;
128*100890f7SEugenio Pérez 
129*100890f7SEugenio Pérez     /* Update the avail index after write the descriptor */
130*100890f7SEugenio Pérez     smp_wmb();
131*100890f7SEugenio Pérez     avail->idx = cpu_to_le16(svq->shadow_avail_idx);
132*100890f7SEugenio Pérez 
133*100890f7SEugenio Pérez     return true;
134*100890f7SEugenio Pérez }
135*100890f7SEugenio Pérez 
136*100890f7SEugenio Pérez static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
137*100890f7SEugenio Pérez {
138*100890f7SEugenio Pérez     unsigned qemu_head;
139*100890f7SEugenio Pérez     bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
140*100890f7SEugenio Pérez     if (unlikely(!ok)) {
141*100890f7SEugenio Pérez         return false;
142*100890f7SEugenio Pérez     }
143*100890f7SEugenio Pérez 
144*100890f7SEugenio Pérez     svq->ring_id_maps[qemu_head] = elem;
145*100890f7SEugenio Pérez     return true;
146*100890f7SEugenio Pérez }
147*100890f7SEugenio Pérez 
148*100890f7SEugenio Pérez static void vhost_svq_kick(VhostShadowVirtqueue *svq)
149*100890f7SEugenio Pérez {
150*100890f7SEugenio Pérez     /*
151*100890f7SEugenio Pérez      * We need to expose the available array entries before checking the used
152*100890f7SEugenio Pérez      * flags
153*100890f7SEugenio Pérez      */
154*100890f7SEugenio Pérez     smp_mb();
155*100890f7SEugenio Pérez     if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
156*100890f7SEugenio Pérez         return;
157*100890f7SEugenio Pérez     }
158*100890f7SEugenio Pérez 
159dff4426fSEugenio Pérez     event_notifier_set(&svq->hdev_kick);
160dff4426fSEugenio Pérez }
161dff4426fSEugenio Pérez 
162dff4426fSEugenio Pérez /**
163*100890f7SEugenio Pérez  * Forward available buffers.
164*100890f7SEugenio Pérez  *
165*100890f7SEugenio Pérez  * @svq: Shadow VirtQueue
166*100890f7SEugenio Pérez  *
167*100890f7SEugenio Pérez  * Note that this function does not guarantee that all guest's available
168*100890f7SEugenio Pérez  * buffers are available to the device in SVQ avail ring. The guest may have
169*100890f7SEugenio Pérez  * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
170*100890f7SEugenio Pérez  * qemu vaddr.
171*100890f7SEugenio Pérez  *
172*100890f7SEugenio Pérez  * If that happens, guest's kick notifications will be disabled until the
173*100890f7SEugenio Pérez  * device uses some buffers.
174*100890f7SEugenio Pérez  */
175*100890f7SEugenio Pérez static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
176*100890f7SEugenio Pérez {
177*100890f7SEugenio Pérez     /* Clear event notifier */
178*100890f7SEugenio Pérez     event_notifier_test_and_clear(&svq->svq_kick);
179*100890f7SEugenio Pérez 
180*100890f7SEugenio Pérez     /* Forward to the device as many available buffers as possible */
181*100890f7SEugenio Pérez     do {
182*100890f7SEugenio Pérez         virtio_queue_set_notification(svq->vq, false);
183*100890f7SEugenio Pérez 
184*100890f7SEugenio Pérez         while (true) {
185*100890f7SEugenio Pérez             VirtQueueElement *elem;
186*100890f7SEugenio Pérez             bool ok;
187*100890f7SEugenio Pérez 
188*100890f7SEugenio Pérez             if (svq->next_guest_avail_elem) {
189*100890f7SEugenio Pérez                 elem = g_steal_pointer(&svq->next_guest_avail_elem);
190*100890f7SEugenio Pérez             } else {
191*100890f7SEugenio Pérez                 elem = virtqueue_pop(svq->vq, sizeof(*elem));
192*100890f7SEugenio Pérez             }
193*100890f7SEugenio Pérez 
194*100890f7SEugenio Pérez             if (!elem) {
195*100890f7SEugenio Pérez                 break;
196*100890f7SEugenio Pérez             }
197*100890f7SEugenio Pérez 
198*100890f7SEugenio Pérez             if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
199*100890f7SEugenio Pérez                 /*
200*100890f7SEugenio Pérez                  * This condition is possible since a contiguous buffer in GPA
201*100890f7SEugenio Pérez                  * does not imply a contiguous buffer in qemu's VA
202*100890f7SEugenio Pérez                  * scatter-gather segments. If that happens, the buffer exposed
203*100890f7SEugenio Pérez                  * to the device needs to be a chain of descriptors at this
204*100890f7SEugenio Pérez                  * moment.
205*100890f7SEugenio Pérez                  *
206*100890f7SEugenio Pérez                  * SVQ cannot hold more available buffers if we are here:
207*100890f7SEugenio Pérez                  * queue the current guest descriptor and ignore further kicks
208*100890f7SEugenio Pérez                  * until some elements are used.
209*100890f7SEugenio Pérez                  */
210*100890f7SEugenio Pérez                 svq->next_guest_avail_elem = elem;
211*100890f7SEugenio Pérez                 return;
212*100890f7SEugenio Pérez             }
213*100890f7SEugenio Pérez 
214*100890f7SEugenio Pérez             ok = vhost_svq_add(svq, elem);
215*100890f7SEugenio Pérez             if (unlikely(!ok)) {
216*100890f7SEugenio Pérez                 /* VQ is broken, just return and ignore any other kicks */
217*100890f7SEugenio Pérez                 return;
218*100890f7SEugenio Pérez             }
219*100890f7SEugenio Pérez             vhost_svq_kick(svq);
220*100890f7SEugenio Pérez         }
221*100890f7SEugenio Pérez 
222*100890f7SEugenio Pérez         virtio_queue_set_notification(svq->vq, true);
223*100890f7SEugenio Pérez     } while (!virtio_queue_empty(svq->vq));
224*100890f7SEugenio Pérez }
225*100890f7SEugenio Pérez 
226*100890f7SEugenio Pérez /**
227*100890f7SEugenio Pérez  * Handle guest's kick.
228*100890f7SEugenio Pérez  *
229*100890f7SEugenio Pérez  * @n: guest kick event notifier, the one that guest set to notify svq.
230*100890f7SEugenio Pérez  */
231*100890f7SEugenio Pérez static void vhost_handle_guest_kick_notifier(EventNotifier *n)
232*100890f7SEugenio Pérez {
233*100890f7SEugenio Pérez     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
234*100890f7SEugenio Pérez     event_notifier_test_and_clear(n);
235*100890f7SEugenio Pérez     vhost_handle_guest_kick(svq);
236*100890f7SEugenio Pérez }
237*100890f7SEugenio Pérez 
238*100890f7SEugenio Pérez static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
239*100890f7SEugenio Pérez {
240*100890f7SEugenio Pérez     if (svq->last_used_idx != svq->shadow_used_idx) {
241*100890f7SEugenio Pérez         return true;
242*100890f7SEugenio Pérez     }
243*100890f7SEugenio Pérez 
244*100890f7SEugenio Pérez     svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
245*100890f7SEugenio Pérez 
246*100890f7SEugenio Pérez     return svq->last_used_idx != svq->shadow_used_idx;
247*100890f7SEugenio Pérez }
248*100890f7SEugenio Pérez 
249*100890f7SEugenio Pérez /**
250*100890f7SEugenio Pérez  * Enable vhost device calls after disable them.
251*100890f7SEugenio Pérez  *
252*100890f7SEugenio Pérez  * @svq: The svq
253*100890f7SEugenio Pérez  *
254*100890f7SEugenio Pérez  * It returns false if there are pending used buffers from the vhost device,
255*100890f7SEugenio Pérez  * avoiding the possible races between SVQ checking for more work and enabling
256*100890f7SEugenio Pérez  * callbacks. True if SVQ used vring has no more pending buffers.
257*100890f7SEugenio Pérez  */
258*100890f7SEugenio Pérez static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
259*100890f7SEugenio Pérez {
260*100890f7SEugenio Pérez     svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
261*100890f7SEugenio Pérez     /* Make sure the flag is written before the read of used_idx */
262*100890f7SEugenio Pérez     smp_mb();
263*100890f7SEugenio Pérez     return !vhost_svq_more_used(svq);
264*100890f7SEugenio Pérez }
265*100890f7SEugenio Pérez 
266*100890f7SEugenio Pérez static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
267*100890f7SEugenio Pérez {
268*100890f7SEugenio Pérez     svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
269*100890f7SEugenio Pérez }
270*100890f7SEugenio Pérez 
271*100890f7SEugenio Pérez static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
272*100890f7SEugenio Pérez                                            uint32_t *len)
273*100890f7SEugenio Pérez {
274*100890f7SEugenio Pérez     vring_desc_t *descs = svq->vring.desc;
275*100890f7SEugenio Pérez     const vring_used_t *used = svq->vring.used;
276*100890f7SEugenio Pérez     vring_used_elem_t used_elem;
277*100890f7SEugenio Pérez     uint16_t last_used;
278*100890f7SEugenio Pérez 
279*100890f7SEugenio Pérez     if (!vhost_svq_more_used(svq)) {
280*100890f7SEugenio Pérez         return NULL;
281*100890f7SEugenio Pérez     }
282*100890f7SEugenio Pérez 
283*100890f7SEugenio Pérez     /* Only get used array entries after they have been exposed by dev */
284*100890f7SEugenio Pérez     smp_rmb();
285*100890f7SEugenio Pérez     last_used = svq->last_used_idx & (svq->vring.num - 1);
286*100890f7SEugenio Pérez     used_elem.id = le32_to_cpu(used->ring[last_used].id);
287*100890f7SEugenio Pérez     used_elem.len = le32_to_cpu(used->ring[last_used].len);
288*100890f7SEugenio Pérez 
289*100890f7SEugenio Pérez     svq->last_used_idx++;
290*100890f7SEugenio Pérez     if (unlikely(used_elem.id >= svq->vring.num)) {
291*100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
292*100890f7SEugenio Pérez                       svq->vdev->name, used_elem.id);
293*100890f7SEugenio Pérez         return NULL;
294*100890f7SEugenio Pérez     }
295*100890f7SEugenio Pérez 
296*100890f7SEugenio Pérez     if (unlikely(!svq->ring_id_maps[used_elem.id])) {
297*100890f7SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
298*100890f7SEugenio Pérez             "Device %s says index %u is used, but it was not available",
299*100890f7SEugenio Pérez             svq->vdev->name, used_elem.id);
300*100890f7SEugenio Pérez         return NULL;
301*100890f7SEugenio Pérez     }
302*100890f7SEugenio Pérez 
303*100890f7SEugenio Pérez     descs[used_elem.id].next = svq->free_head;
304*100890f7SEugenio Pérez     svq->free_head = used_elem.id;
305*100890f7SEugenio Pérez 
306*100890f7SEugenio Pérez     *len = used_elem.len;
307*100890f7SEugenio Pérez     return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
308*100890f7SEugenio Pérez }
309*100890f7SEugenio Pérez 
310*100890f7SEugenio Pérez static void vhost_svq_flush(VhostShadowVirtqueue *svq,
311*100890f7SEugenio Pérez                             bool check_for_avail_queue)
312*100890f7SEugenio Pérez {
313*100890f7SEugenio Pérez     VirtQueue *vq = svq->vq;
314*100890f7SEugenio Pérez 
315*100890f7SEugenio Pérez     /* Forward as many used buffers as possible. */
316*100890f7SEugenio Pérez     do {
317*100890f7SEugenio Pérez         unsigned i = 0;
318*100890f7SEugenio Pérez 
319*100890f7SEugenio Pérez         vhost_svq_disable_notification(svq);
320*100890f7SEugenio Pérez         while (true) {
321*100890f7SEugenio Pérez             uint32_t len;
322*100890f7SEugenio Pérez             g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
323*100890f7SEugenio Pérez             if (!elem) {
324*100890f7SEugenio Pérez                 break;
325*100890f7SEugenio Pérez             }
326*100890f7SEugenio Pérez 
327*100890f7SEugenio Pérez             if (unlikely(i >= svq->vring.num)) {
328*100890f7SEugenio Pérez                 qemu_log_mask(LOG_GUEST_ERROR,
329*100890f7SEugenio Pérez                          "More than %u used buffers obtained in a %u size SVQ",
330*100890f7SEugenio Pérez                          i, svq->vring.num);
331*100890f7SEugenio Pérez                 virtqueue_fill(vq, elem, len, i);
332*100890f7SEugenio Pérez                 virtqueue_flush(vq, i);
333*100890f7SEugenio Pérez                 return;
334*100890f7SEugenio Pérez             }
335*100890f7SEugenio Pérez             virtqueue_fill(vq, elem, len, i++);
336*100890f7SEugenio Pérez         }
337*100890f7SEugenio Pérez 
338*100890f7SEugenio Pérez         virtqueue_flush(vq, i);
339*100890f7SEugenio Pérez         event_notifier_set(&svq->svq_call);
340*100890f7SEugenio Pérez 
341*100890f7SEugenio Pérez         if (check_for_avail_queue && svq->next_guest_avail_elem) {
342*100890f7SEugenio Pérez             /*
343*100890f7SEugenio Pérez              * Avail ring was full when vhost_svq_flush was called, so it's a
344*100890f7SEugenio Pérez              * good moment to make more descriptors available if possible.
345*100890f7SEugenio Pérez              */
346*100890f7SEugenio Pérez             vhost_handle_guest_kick(svq);
347*100890f7SEugenio Pérez         }
348*100890f7SEugenio Pérez     } while (!vhost_svq_enable_notification(svq));
349*100890f7SEugenio Pérez }
350*100890f7SEugenio Pérez 
351*100890f7SEugenio Pérez /**
352*100890f7SEugenio Pérez  * Forward used buffers.
353a8ac8858SEugenio Pérez  *
354a8ac8858SEugenio Pérez  * @n: hdev call event notifier, the one that device set to notify svq.
355*100890f7SEugenio Pérez  *
356*100890f7SEugenio Pérez  * Note that we are not making any buffers available in the loop, there is no
357*100890f7SEugenio Pérez  * way that it runs more than virtqueue size times.
358a8ac8858SEugenio Pérez  */
359a8ac8858SEugenio Pérez static void vhost_svq_handle_call(EventNotifier *n)
360a8ac8858SEugenio Pérez {
361a8ac8858SEugenio Pérez     VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
362a8ac8858SEugenio Pérez                                              hdev_call);
363a8ac8858SEugenio Pérez     event_notifier_test_and_clear(n);
364*100890f7SEugenio Pérez     vhost_svq_flush(svq, true);
365a8ac8858SEugenio Pérez }
366a8ac8858SEugenio Pérez 
367a8ac8858SEugenio Pérez /**
368a8ac8858SEugenio Pérez  * Set the call notifier for the SVQ to call the guest
369a8ac8858SEugenio Pérez  *
370a8ac8858SEugenio Pérez  * @svq: Shadow virtqueue
371a8ac8858SEugenio Pérez  * @call_fd: call notifier
372a8ac8858SEugenio Pérez  *
373a8ac8858SEugenio Pérez  * Called on BQL context.
374a8ac8858SEugenio Pérez  */
375a8ac8858SEugenio Pérez void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
376a8ac8858SEugenio Pérez {
377a8ac8858SEugenio Pérez     if (call_fd == VHOST_FILE_UNBIND) {
378a8ac8858SEugenio Pérez         /*
379a8ac8858SEugenio Pérez          * Fail event_notifier_set if called handling device call.
380a8ac8858SEugenio Pérez          *
381a8ac8858SEugenio Pérez          * SVQ still needs device notifications, since it needs to keep
382a8ac8858SEugenio Pérez          * forwarding used buffers even with the unbind.
383a8ac8858SEugenio Pérez          */
384a8ac8858SEugenio Pérez         memset(&svq->svq_call, 0, sizeof(svq->svq_call));
385a8ac8858SEugenio Pérez     } else {
386a8ac8858SEugenio Pérez         event_notifier_init_fd(&svq->svq_call, call_fd);
387a8ac8858SEugenio Pérez     }
388a8ac8858SEugenio Pérez }
389a8ac8858SEugenio Pérez 
390a8ac8858SEugenio Pérez /**
391dafb34c9SEugenio Pérez  * Get the shadow vq vring address.
392dafb34c9SEugenio Pérez  * @svq: Shadow virtqueue
393dafb34c9SEugenio Pérez  * @addr: Destination to store address
394dafb34c9SEugenio Pérez  */
395dafb34c9SEugenio Pérez void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
396dafb34c9SEugenio Pérez                               struct vhost_vring_addr *addr)
397dafb34c9SEugenio Pérez {
398dafb34c9SEugenio Pérez     addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
399dafb34c9SEugenio Pérez     addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
400dafb34c9SEugenio Pérez     addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
401dafb34c9SEugenio Pérez }
402dafb34c9SEugenio Pérez 
403dafb34c9SEugenio Pérez size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
404dafb34c9SEugenio Pérez {
405dafb34c9SEugenio Pérez     size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
406dafb34c9SEugenio Pérez     size_t avail_size = offsetof(vring_avail_t, ring) +
407dafb34c9SEugenio Pérez                                              sizeof(uint16_t) * svq->vring.num;
408dafb34c9SEugenio Pérez 
409dafb34c9SEugenio Pérez     return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
410dafb34c9SEugenio Pérez }
411dafb34c9SEugenio Pérez 
412dafb34c9SEugenio Pérez size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
413dafb34c9SEugenio Pérez {
414dafb34c9SEugenio Pérez     size_t used_size = offsetof(vring_used_t, ring) +
415dafb34c9SEugenio Pérez                                     sizeof(vring_used_elem_t) * svq->vring.num;
416dafb34c9SEugenio Pérez     return ROUND_UP(used_size, qemu_real_host_page_size);
417dafb34c9SEugenio Pérez }
418dafb34c9SEugenio Pérez 
419dafb34c9SEugenio Pérez /**
420dff4426fSEugenio Pérez  * Set a new file descriptor for the guest to kick the SVQ and notify for avail
421dff4426fSEugenio Pérez  *
422dff4426fSEugenio Pérez  * @svq: The svq
423dff4426fSEugenio Pérez  * @svq_kick_fd: The svq kick fd
424dff4426fSEugenio Pérez  *
425dff4426fSEugenio Pérez  * Note that the SVQ will never close the old file descriptor.
426dff4426fSEugenio Pérez  */
427dff4426fSEugenio Pérez void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
428dff4426fSEugenio Pérez {
429dff4426fSEugenio Pérez     EventNotifier *svq_kick = &svq->svq_kick;
430dff4426fSEugenio Pérez     bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
431dff4426fSEugenio Pérez     bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
432dff4426fSEugenio Pérez 
433dff4426fSEugenio Pérez     if (poll_stop) {
434dff4426fSEugenio Pérez         event_notifier_set_handler(svq_kick, NULL);
435dff4426fSEugenio Pérez     }
436dff4426fSEugenio Pérez 
437dff4426fSEugenio Pérez     /*
438dff4426fSEugenio Pérez      * event_notifier_set_handler already checks for guest's notifications if
439dff4426fSEugenio Pérez      * they arrive at the new file descriptor in the switch, so there is no
440dff4426fSEugenio Pérez      * need to explicitly check for them.
441dff4426fSEugenio Pérez      */
442dff4426fSEugenio Pérez     if (poll_start) {
443dff4426fSEugenio Pérez         event_notifier_init_fd(svq_kick, svq_kick_fd);
444dff4426fSEugenio Pérez         event_notifier_set(svq_kick);
445*100890f7SEugenio Pérez         event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
446*100890f7SEugenio Pérez     }
447*100890f7SEugenio Pérez }
448*100890f7SEugenio Pérez 
449*100890f7SEugenio Pérez /**
450*100890f7SEugenio Pérez  * Start the shadow virtqueue operation.
451*100890f7SEugenio Pérez  *
452*100890f7SEugenio Pérez  * @svq: Shadow Virtqueue
453*100890f7SEugenio Pérez  * @vdev: VirtIO device
454*100890f7SEugenio Pérez  * @vq: Virtqueue to shadow
455*100890f7SEugenio Pérez  */
456*100890f7SEugenio Pérez void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
457*100890f7SEugenio Pérez                      VirtQueue *vq)
458*100890f7SEugenio Pérez {
459*100890f7SEugenio Pérez     size_t desc_size, driver_size, device_size;
460*100890f7SEugenio Pérez 
461*100890f7SEugenio Pérez     svq->next_guest_avail_elem = NULL;
462*100890f7SEugenio Pérez     svq->shadow_avail_idx = 0;
463*100890f7SEugenio Pérez     svq->shadow_used_idx = 0;
464*100890f7SEugenio Pérez     svq->last_used_idx = 0;
465*100890f7SEugenio Pérez     svq->vdev = vdev;
466*100890f7SEugenio Pérez     svq->vq = vq;
467*100890f7SEugenio Pérez 
468*100890f7SEugenio Pérez     svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
469*100890f7SEugenio Pérez     driver_size = vhost_svq_driver_area_size(svq);
470*100890f7SEugenio Pérez     device_size = vhost_svq_device_area_size(svq);
471*100890f7SEugenio Pérez     svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
472*100890f7SEugenio Pérez     desc_size = sizeof(vring_desc_t) * svq->vring.num;
473*100890f7SEugenio Pérez     svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
474*100890f7SEugenio Pérez     memset(svq->vring.desc, 0, driver_size);
475*100890f7SEugenio Pérez     svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
476*100890f7SEugenio Pérez     memset(svq->vring.used, 0, device_size);
477*100890f7SEugenio Pérez     svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
478*100890f7SEugenio Pérez     for (unsigned i = 0; i < svq->vring.num - 1; i++) {
479*100890f7SEugenio Pérez         svq->vring.desc[i].next = cpu_to_le16(i + 1);
480dff4426fSEugenio Pérez     }
481dff4426fSEugenio Pérez }
482dff4426fSEugenio Pérez 
483dff4426fSEugenio Pérez /**
484dff4426fSEugenio Pérez  * Stop the shadow virtqueue operation.
485dff4426fSEugenio Pérez  * @svq: Shadow Virtqueue
486dff4426fSEugenio Pérez  */
487dff4426fSEugenio Pérez void vhost_svq_stop(VhostShadowVirtqueue *svq)
488dff4426fSEugenio Pérez {
489dff4426fSEugenio Pérez     event_notifier_set_handler(&svq->svq_kick, NULL);
490*100890f7SEugenio Pérez     g_autofree VirtQueueElement *next_avail_elem = NULL;
491*100890f7SEugenio Pérez 
492*100890f7SEugenio Pérez     if (!svq->vq) {
493*100890f7SEugenio Pérez         return;
494*100890f7SEugenio Pérez     }
495*100890f7SEugenio Pérez 
496*100890f7SEugenio Pérez     /* Send all pending used descriptors to guest */
497*100890f7SEugenio Pérez     vhost_svq_flush(svq, false);
498*100890f7SEugenio Pérez 
499*100890f7SEugenio Pérez     for (unsigned i = 0; i < svq->vring.num; ++i) {
500*100890f7SEugenio Pérez         g_autofree VirtQueueElement *elem = NULL;
501*100890f7SEugenio Pérez         elem = g_steal_pointer(&svq->ring_id_maps[i]);
502*100890f7SEugenio Pérez         if (elem) {
503*100890f7SEugenio Pérez             virtqueue_detach_element(svq->vq, elem, 0);
504*100890f7SEugenio Pérez         }
505*100890f7SEugenio Pérez     }
506*100890f7SEugenio Pérez 
507*100890f7SEugenio Pérez     next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
508*100890f7SEugenio Pérez     if (next_avail_elem) {
509*100890f7SEugenio Pérez         virtqueue_detach_element(svq->vq, next_avail_elem, 0);
510*100890f7SEugenio Pérez     }
511*100890f7SEugenio Pérez     svq->vq = NULL;
512*100890f7SEugenio Pérez     g_free(svq->ring_id_maps);
513*100890f7SEugenio Pérez     qemu_vfree(svq->vring.desc);
514*100890f7SEugenio Pérez     qemu_vfree(svq->vring.used);
515dff4426fSEugenio Pérez }
51610857ec0SEugenio Pérez 
51710857ec0SEugenio Pérez /**
51810857ec0SEugenio Pérez  * Creates vhost shadow virtqueue, and instructs the vhost device to use the
51910857ec0SEugenio Pérez  * shadow methods and file descriptors.
52010857ec0SEugenio Pérez  *
52110857ec0SEugenio Pérez  * Returns the new virtqueue or NULL.
52210857ec0SEugenio Pérez  *
52310857ec0SEugenio Pérez  * In case of error, reason is reported through error_report.
52410857ec0SEugenio Pérez  */
52510857ec0SEugenio Pérez VhostShadowVirtqueue *vhost_svq_new(void)
52610857ec0SEugenio Pérez {
52710857ec0SEugenio Pérez     g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
52810857ec0SEugenio Pérez     int r;
52910857ec0SEugenio Pérez 
53010857ec0SEugenio Pérez     r = event_notifier_init(&svq->hdev_kick, 0);
53110857ec0SEugenio Pérez     if (r != 0) {
53210857ec0SEugenio Pérez         error_report("Couldn't create kick event notifier: %s (%d)",
53310857ec0SEugenio Pérez                      g_strerror(errno), errno);
53410857ec0SEugenio Pérez         goto err_init_hdev_kick;
53510857ec0SEugenio Pérez     }
53610857ec0SEugenio Pérez 
53710857ec0SEugenio Pérez     r = event_notifier_init(&svq->hdev_call, 0);
53810857ec0SEugenio Pérez     if (r != 0) {
53910857ec0SEugenio Pérez         error_report("Couldn't create call event notifier: %s (%d)",
54010857ec0SEugenio Pérez                      g_strerror(errno), errno);
54110857ec0SEugenio Pérez         goto err_init_hdev_call;
54210857ec0SEugenio Pérez     }
54310857ec0SEugenio Pérez 
544dff4426fSEugenio Pérez     event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
545a8ac8858SEugenio Pérez     event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
54610857ec0SEugenio Pérez     return g_steal_pointer(&svq);
54710857ec0SEugenio Pérez 
54810857ec0SEugenio Pérez err_init_hdev_call:
54910857ec0SEugenio Pérez     event_notifier_cleanup(&svq->hdev_kick);
55010857ec0SEugenio Pérez 
55110857ec0SEugenio Pérez err_init_hdev_kick:
55210857ec0SEugenio Pérez     return NULL;
55310857ec0SEugenio Pérez }
55410857ec0SEugenio Pérez 
55510857ec0SEugenio Pérez /**
55610857ec0SEugenio Pérez  * Free the resources of the shadow virtqueue.
55710857ec0SEugenio Pérez  *
55810857ec0SEugenio Pérez  * @pvq: gpointer to SVQ so it can be used by autofree functions.
55910857ec0SEugenio Pérez  */
56010857ec0SEugenio Pérez void vhost_svq_free(gpointer pvq)
56110857ec0SEugenio Pérez {
56210857ec0SEugenio Pérez     VhostShadowVirtqueue *vq = pvq;
563dff4426fSEugenio Pérez     vhost_svq_stop(vq);
56410857ec0SEugenio Pérez     event_notifier_cleanup(&vq->hdev_kick);
565a8ac8858SEugenio Pérez     event_notifier_set_handler(&vq->hdev_call, NULL);
56610857ec0SEugenio Pérez     event_notifier_cleanup(&vq->hdev_call);
56710857ec0SEugenio Pérez     g_free(vq);
56810857ec0SEugenio Pérez }
569