110857ec0SEugenio Pérez /* 210857ec0SEugenio Pérez * vhost shadow virtqueue 310857ec0SEugenio Pérez * 410857ec0SEugenio Pérez * SPDX-FileCopyrightText: Red Hat, Inc. 2021 510857ec0SEugenio Pérez * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> 610857ec0SEugenio Pérez * 710857ec0SEugenio Pérez * SPDX-License-Identifier: GPL-2.0-or-later 810857ec0SEugenio Pérez */ 910857ec0SEugenio Pérez 1010857ec0SEugenio Pérez #include "qemu/osdep.h" 1110857ec0SEugenio Pérez #include "hw/virtio/vhost-shadow-virtqueue.h" 1210857ec0SEugenio Pérez 1310857ec0SEugenio Pérez #include "qemu/error-report.h" 144725a418SEugenio Pérez #include "qapi/error.h" 15dff4426fSEugenio Pérez #include "qemu/main-loop.h" 16*100890f7SEugenio Pérez #include "qemu/log.h" 17*100890f7SEugenio Pérez #include "qemu/memalign.h" 18dff4426fSEugenio Pérez #include "linux-headers/linux/vhost.h" 19dff4426fSEugenio Pérez 20dff4426fSEugenio Pérez /** 214725a418SEugenio Pérez * Validate the transport device features that both guests can use with the SVQ 224725a418SEugenio Pérez * and SVQs can use with the device. 234725a418SEugenio Pérez * 244725a418SEugenio Pérez * @dev_features: The features 254725a418SEugenio Pérez * @errp: Error pointer 264725a418SEugenio Pérez */ 274725a418SEugenio Pérez bool vhost_svq_valid_features(uint64_t features, Error **errp) 284725a418SEugenio Pérez { 294725a418SEugenio Pérez bool ok = true; 304725a418SEugenio Pérez uint64_t svq_features = features; 314725a418SEugenio Pérez 324725a418SEugenio Pérez for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; 334725a418SEugenio Pérez ++b) { 344725a418SEugenio Pérez switch (b) { 354725a418SEugenio Pérez case VIRTIO_F_ANY_LAYOUT: 364725a418SEugenio Pérez continue; 374725a418SEugenio Pérez 384725a418SEugenio Pérez case VIRTIO_F_ACCESS_PLATFORM: 394725a418SEugenio Pérez /* SVQ trust in the host's IOMMU to translate addresses */ 404725a418SEugenio Pérez case VIRTIO_F_VERSION_1: 414725a418SEugenio Pérez /* SVQ trust that the guest vring is little endian */ 424725a418SEugenio Pérez if (!(svq_features & BIT_ULL(b))) { 434725a418SEugenio Pérez svq_features |= BIT_ULL(b); 444725a418SEugenio Pérez ok = false; 454725a418SEugenio Pérez } 464725a418SEugenio Pérez continue; 474725a418SEugenio Pérez 484725a418SEugenio Pérez default: 494725a418SEugenio Pérez if (svq_features & BIT_ULL(b)) { 504725a418SEugenio Pérez svq_features &= ~BIT_ULL(b); 514725a418SEugenio Pérez ok = false; 524725a418SEugenio Pérez } 534725a418SEugenio Pérez } 544725a418SEugenio Pérez } 554725a418SEugenio Pérez 564725a418SEugenio Pérez if (!ok) { 574725a418SEugenio Pérez error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 584725a418SEugenio Pérez ", ok: 0x%"PRIx64, features, svq_features); 594725a418SEugenio Pérez } 604725a418SEugenio Pérez return ok; 614725a418SEugenio Pérez } 624725a418SEugenio Pérez 634725a418SEugenio Pérez /** 64*100890f7SEugenio Pérez * Number of descriptors that the SVQ can make available from the guest. 65dff4426fSEugenio Pérez * 66*100890f7SEugenio Pérez * @svq: The svq 67dff4426fSEugenio Pérez */ 68*100890f7SEugenio Pérez static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) 69dff4426fSEugenio Pérez { 70*100890f7SEugenio Pérez return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); 71*100890f7SEugenio Pérez } 72*100890f7SEugenio Pérez 73*100890f7SEugenio Pérez static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, 74*100890f7SEugenio Pérez const struct iovec *iovec, size_t num, 75*100890f7SEugenio Pérez bool more_descs, bool write) 76*100890f7SEugenio Pérez { 77*100890f7SEugenio Pérez uint16_t i = svq->free_head, last = svq->free_head; 78*100890f7SEugenio Pérez unsigned n; 79*100890f7SEugenio Pérez uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; 80*100890f7SEugenio Pérez vring_desc_t *descs = svq->vring.desc; 81*100890f7SEugenio Pérez 82*100890f7SEugenio Pérez if (num == 0) { 83*100890f7SEugenio Pérez return; 84*100890f7SEugenio Pérez } 85*100890f7SEugenio Pérez 86*100890f7SEugenio Pérez for (n = 0; n < num; n++) { 87*100890f7SEugenio Pérez if (more_descs || (n + 1 < num)) { 88*100890f7SEugenio Pérez descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); 89*100890f7SEugenio Pérez } else { 90*100890f7SEugenio Pérez descs[i].flags = flags; 91*100890f7SEugenio Pérez } 92*100890f7SEugenio Pérez descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base); 93*100890f7SEugenio Pérez descs[i].len = cpu_to_le32(iovec[n].iov_len); 94*100890f7SEugenio Pérez 95*100890f7SEugenio Pérez last = i; 96*100890f7SEugenio Pérez i = cpu_to_le16(descs[i].next); 97*100890f7SEugenio Pérez } 98*100890f7SEugenio Pérez 99*100890f7SEugenio Pérez svq->free_head = le16_to_cpu(descs[last].next); 100*100890f7SEugenio Pérez } 101*100890f7SEugenio Pérez 102*100890f7SEugenio Pérez static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, 103*100890f7SEugenio Pérez VirtQueueElement *elem, unsigned *head) 104*100890f7SEugenio Pérez { 105*100890f7SEugenio Pérez unsigned avail_idx; 106*100890f7SEugenio Pérez vring_avail_t *avail = svq->vring.avail; 107*100890f7SEugenio Pérez 108*100890f7SEugenio Pérez *head = svq->free_head; 109*100890f7SEugenio Pérez 110*100890f7SEugenio Pérez /* We need some descriptors here */ 111*100890f7SEugenio Pérez if (unlikely(!elem->out_num && !elem->in_num)) { 112*100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 113*100890f7SEugenio Pérez "Guest provided element with no descriptors"); 114*100890f7SEugenio Pérez return false; 115*100890f7SEugenio Pérez } 116*100890f7SEugenio Pérez 117*100890f7SEugenio Pérez vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0, 118*100890f7SEugenio Pérez false); 119*100890f7SEugenio Pérez vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); 120*100890f7SEugenio Pérez 121*100890f7SEugenio Pérez /* 122*100890f7SEugenio Pérez * Put the entry in the available array (but don't update avail->idx until 123*100890f7SEugenio Pérez * they do sync). 124*100890f7SEugenio Pérez */ 125*100890f7SEugenio Pérez avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); 126*100890f7SEugenio Pérez avail->ring[avail_idx] = cpu_to_le16(*head); 127*100890f7SEugenio Pérez svq->shadow_avail_idx++; 128*100890f7SEugenio Pérez 129*100890f7SEugenio Pérez /* Update the avail index after write the descriptor */ 130*100890f7SEugenio Pérez smp_wmb(); 131*100890f7SEugenio Pérez avail->idx = cpu_to_le16(svq->shadow_avail_idx); 132*100890f7SEugenio Pérez 133*100890f7SEugenio Pérez return true; 134*100890f7SEugenio Pérez } 135*100890f7SEugenio Pérez 136*100890f7SEugenio Pérez static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) 137*100890f7SEugenio Pérez { 138*100890f7SEugenio Pérez unsigned qemu_head; 139*100890f7SEugenio Pérez bool ok = vhost_svq_add_split(svq, elem, &qemu_head); 140*100890f7SEugenio Pérez if (unlikely(!ok)) { 141*100890f7SEugenio Pérez return false; 142*100890f7SEugenio Pérez } 143*100890f7SEugenio Pérez 144*100890f7SEugenio Pérez svq->ring_id_maps[qemu_head] = elem; 145*100890f7SEugenio Pérez return true; 146*100890f7SEugenio Pérez } 147*100890f7SEugenio Pérez 148*100890f7SEugenio Pérez static void vhost_svq_kick(VhostShadowVirtqueue *svq) 149*100890f7SEugenio Pérez { 150*100890f7SEugenio Pérez /* 151*100890f7SEugenio Pérez * We need to expose the available array entries before checking the used 152*100890f7SEugenio Pérez * flags 153*100890f7SEugenio Pérez */ 154*100890f7SEugenio Pérez smp_mb(); 155*100890f7SEugenio Pérez if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { 156*100890f7SEugenio Pérez return; 157*100890f7SEugenio Pérez } 158*100890f7SEugenio Pérez 159dff4426fSEugenio Pérez event_notifier_set(&svq->hdev_kick); 160dff4426fSEugenio Pérez } 161dff4426fSEugenio Pérez 162dff4426fSEugenio Pérez /** 163*100890f7SEugenio Pérez * Forward available buffers. 164*100890f7SEugenio Pérez * 165*100890f7SEugenio Pérez * @svq: Shadow VirtQueue 166*100890f7SEugenio Pérez * 167*100890f7SEugenio Pérez * Note that this function does not guarantee that all guest's available 168*100890f7SEugenio Pérez * buffers are available to the device in SVQ avail ring. The guest may have 169*100890f7SEugenio Pérez * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in 170*100890f7SEugenio Pérez * qemu vaddr. 171*100890f7SEugenio Pérez * 172*100890f7SEugenio Pérez * If that happens, guest's kick notifications will be disabled until the 173*100890f7SEugenio Pérez * device uses some buffers. 174*100890f7SEugenio Pérez */ 175*100890f7SEugenio Pérez static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) 176*100890f7SEugenio Pérez { 177*100890f7SEugenio Pérez /* Clear event notifier */ 178*100890f7SEugenio Pérez event_notifier_test_and_clear(&svq->svq_kick); 179*100890f7SEugenio Pérez 180*100890f7SEugenio Pérez /* Forward to the device as many available buffers as possible */ 181*100890f7SEugenio Pérez do { 182*100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, false); 183*100890f7SEugenio Pérez 184*100890f7SEugenio Pérez while (true) { 185*100890f7SEugenio Pérez VirtQueueElement *elem; 186*100890f7SEugenio Pérez bool ok; 187*100890f7SEugenio Pérez 188*100890f7SEugenio Pérez if (svq->next_guest_avail_elem) { 189*100890f7SEugenio Pérez elem = g_steal_pointer(&svq->next_guest_avail_elem); 190*100890f7SEugenio Pérez } else { 191*100890f7SEugenio Pérez elem = virtqueue_pop(svq->vq, sizeof(*elem)); 192*100890f7SEugenio Pérez } 193*100890f7SEugenio Pérez 194*100890f7SEugenio Pérez if (!elem) { 195*100890f7SEugenio Pérez break; 196*100890f7SEugenio Pérez } 197*100890f7SEugenio Pérez 198*100890f7SEugenio Pérez if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { 199*100890f7SEugenio Pérez /* 200*100890f7SEugenio Pérez * This condition is possible since a contiguous buffer in GPA 201*100890f7SEugenio Pérez * does not imply a contiguous buffer in qemu's VA 202*100890f7SEugenio Pérez * scatter-gather segments. If that happens, the buffer exposed 203*100890f7SEugenio Pérez * to the device needs to be a chain of descriptors at this 204*100890f7SEugenio Pérez * moment. 205*100890f7SEugenio Pérez * 206*100890f7SEugenio Pérez * SVQ cannot hold more available buffers if we are here: 207*100890f7SEugenio Pérez * queue the current guest descriptor and ignore further kicks 208*100890f7SEugenio Pérez * until some elements are used. 209*100890f7SEugenio Pérez */ 210*100890f7SEugenio Pérez svq->next_guest_avail_elem = elem; 211*100890f7SEugenio Pérez return; 212*100890f7SEugenio Pérez } 213*100890f7SEugenio Pérez 214*100890f7SEugenio Pérez ok = vhost_svq_add(svq, elem); 215*100890f7SEugenio Pérez if (unlikely(!ok)) { 216*100890f7SEugenio Pérez /* VQ is broken, just return and ignore any other kicks */ 217*100890f7SEugenio Pérez return; 218*100890f7SEugenio Pérez } 219*100890f7SEugenio Pérez vhost_svq_kick(svq); 220*100890f7SEugenio Pérez } 221*100890f7SEugenio Pérez 222*100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, true); 223*100890f7SEugenio Pérez } while (!virtio_queue_empty(svq->vq)); 224*100890f7SEugenio Pérez } 225*100890f7SEugenio Pérez 226*100890f7SEugenio Pérez /** 227*100890f7SEugenio Pérez * Handle guest's kick. 228*100890f7SEugenio Pérez * 229*100890f7SEugenio Pérez * @n: guest kick event notifier, the one that guest set to notify svq. 230*100890f7SEugenio Pérez */ 231*100890f7SEugenio Pérez static void vhost_handle_guest_kick_notifier(EventNotifier *n) 232*100890f7SEugenio Pérez { 233*100890f7SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); 234*100890f7SEugenio Pérez event_notifier_test_and_clear(n); 235*100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 236*100890f7SEugenio Pérez } 237*100890f7SEugenio Pérez 238*100890f7SEugenio Pérez static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) 239*100890f7SEugenio Pérez { 240*100890f7SEugenio Pérez if (svq->last_used_idx != svq->shadow_used_idx) { 241*100890f7SEugenio Pérez return true; 242*100890f7SEugenio Pérez } 243*100890f7SEugenio Pérez 244*100890f7SEugenio Pérez svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); 245*100890f7SEugenio Pérez 246*100890f7SEugenio Pérez return svq->last_used_idx != svq->shadow_used_idx; 247*100890f7SEugenio Pérez } 248*100890f7SEugenio Pérez 249*100890f7SEugenio Pérez /** 250*100890f7SEugenio Pérez * Enable vhost device calls after disable them. 251*100890f7SEugenio Pérez * 252*100890f7SEugenio Pérez * @svq: The svq 253*100890f7SEugenio Pérez * 254*100890f7SEugenio Pérez * It returns false if there are pending used buffers from the vhost device, 255*100890f7SEugenio Pérez * avoiding the possible races between SVQ checking for more work and enabling 256*100890f7SEugenio Pérez * callbacks. True if SVQ used vring has no more pending buffers. 257*100890f7SEugenio Pérez */ 258*100890f7SEugenio Pérez static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) 259*100890f7SEugenio Pérez { 260*100890f7SEugenio Pérez svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 261*100890f7SEugenio Pérez /* Make sure the flag is written before the read of used_idx */ 262*100890f7SEugenio Pérez smp_mb(); 263*100890f7SEugenio Pérez return !vhost_svq_more_used(svq); 264*100890f7SEugenio Pérez } 265*100890f7SEugenio Pérez 266*100890f7SEugenio Pérez static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) 267*100890f7SEugenio Pérez { 268*100890f7SEugenio Pérez svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 269*100890f7SEugenio Pérez } 270*100890f7SEugenio Pérez 271*100890f7SEugenio Pérez static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, 272*100890f7SEugenio Pérez uint32_t *len) 273*100890f7SEugenio Pérez { 274*100890f7SEugenio Pérez vring_desc_t *descs = svq->vring.desc; 275*100890f7SEugenio Pérez const vring_used_t *used = svq->vring.used; 276*100890f7SEugenio Pérez vring_used_elem_t used_elem; 277*100890f7SEugenio Pérez uint16_t last_used; 278*100890f7SEugenio Pérez 279*100890f7SEugenio Pérez if (!vhost_svq_more_used(svq)) { 280*100890f7SEugenio Pérez return NULL; 281*100890f7SEugenio Pérez } 282*100890f7SEugenio Pérez 283*100890f7SEugenio Pérez /* Only get used array entries after they have been exposed by dev */ 284*100890f7SEugenio Pérez smp_rmb(); 285*100890f7SEugenio Pérez last_used = svq->last_used_idx & (svq->vring.num - 1); 286*100890f7SEugenio Pérez used_elem.id = le32_to_cpu(used->ring[last_used].id); 287*100890f7SEugenio Pérez used_elem.len = le32_to_cpu(used->ring[last_used].len); 288*100890f7SEugenio Pérez 289*100890f7SEugenio Pérez svq->last_used_idx++; 290*100890f7SEugenio Pérez if (unlikely(used_elem.id >= svq->vring.num)) { 291*100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", 292*100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 293*100890f7SEugenio Pérez return NULL; 294*100890f7SEugenio Pérez } 295*100890f7SEugenio Pérez 296*100890f7SEugenio Pérez if (unlikely(!svq->ring_id_maps[used_elem.id])) { 297*100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 298*100890f7SEugenio Pérez "Device %s says index %u is used, but it was not available", 299*100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 300*100890f7SEugenio Pérez return NULL; 301*100890f7SEugenio Pérez } 302*100890f7SEugenio Pérez 303*100890f7SEugenio Pérez descs[used_elem.id].next = svq->free_head; 304*100890f7SEugenio Pérez svq->free_head = used_elem.id; 305*100890f7SEugenio Pérez 306*100890f7SEugenio Pérez *len = used_elem.len; 307*100890f7SEugenio Pérez return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); 308*100890f7SEugenio Pérez } 309*100890f7SEugenio Pérez 310*100890f7SEugenio Pérez static void vhost_svq_flush(VhostShadowVirtqueue *svq, 311*100890f7SEugenio Pérez bool check_for_avail_queue) 312*100890f7SEugenio Pérez { 313*100890f7SEugenio Pérez VirtQueue *vq = svq->vq; 314*100890f7SEugenio Pérez 315*100890f7SEugenio Pérez /* Forward as many used buffers as possible. */ 316*100890f7SEugenio Pérez do { 317*100890f7SEugenio Pérez unsigned i = 0; 318*100890f7SEugenio Pérez 319*100890f7SEugenio Pérez vhost_svq_disable_notification(svq); 320*100890f7SEugenio Pérez while (true) { 321*100890f7SEugenio Pérez uint32_t len; 322*100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); 323*100890f7SEugenio Pérez if (!elem) { 324*100890f7SEugenio Pérez break; 325*100890f7SEugenio Pérez } 326*100890f7SEugenio Pérez 327*100890f7SEugenio Pérez if (unlikely(i >= svq->vring.num)) { 328*100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 329*100890f7SEugenio Pérez "More than %u used buffers obtained in a %u size SVQ", 330*100890f7SEugenio Pérez i, svq->vring.num); 331*100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i); 332*100890f7SEugenio Pérez virtqueue_flush(vq, i); 333*100890f7SEugenio Pérez return; 334*100890f7SEugenio Pérez } 335*100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i++); 336*100890f7SEugenio Pérez } 337*100890f7SEugenio Pérez 338*100890f7SEugenio Pérez virtqueue_flush(vq, i); 339*100890f7SEugenio Pérez event_notifier_set(&svq->svq_call); 340*100890f7SEugenio Pérez 341*100890f7SEugenio Pérez if (check_for_avail_queue && svq->next_guest_avail_elem) { 342*100890f7SEugenio Pérez /* 343*100890f7SEugenio Pérez * Avail ring was full when vhost_svq_flush was called, so it's a 344*100890f7SEugenio Pérez * good moment to make more descriptors available if possible. 345*100890f7SEugenio Pérez */ 346*100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 347*100890f7SEugenio Pérez } 348*100890f7SEugenio Pérez } while (!vhost_svq_enable_notification(svq)); 349*100890f7SEugenio Pérez } 350*100890f7SEugenio Pérez 351*100890f7SEugenio Pérez /** 352*100890f7SEugenio Pérez * Forward used buffers. 353a8ac8858SEugenio Pérez * 354a8ac8858SEugenio Pérez * @n: hdev call event notifier, the one that device set to notify svq. 355*100890f7SEugenio Pérez * 356*100890f7SEugenio Pérez * Note that we are not making any buffers available in the loop, there is no 357*100890f7SEugenio Pérez * way that it runs more than virtqueue size times. 358a8ac8858SEugenio Pérez */ 359a8ac8858SEugenio Pérez static void vhost_svq_handle_call(EventNotifier *n) 360a8ac8858SEugenio Pérez { 361a8ac8858SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, 362a8ac8858SEugenio Pérez hdev_call); 363a8ac8858SEugenio Pérez event_notifier_test_and_clear(n); 364*100890f7SEugenio Pérez vhost_svq_flush(svq, true); 365a8ac8858SEugenio Pérez } 366a8ac8858SEugenio Pérez 367a8ac8858SEugenio Pérez /** 368a8ac8858SEugenio Pérez * Set the call notifier for the SVQ to call the guest 369a8ac8858SEugenio Pérez * 370a8ac8858SEugenio Pérez * @svq: Shadow virtqueue 371a8ac8858SEugenio Pérez * @call_fd: call notifier 372a8ac8858SEugenio Pérez * 373a8ac8858SEugenio Pérez * Called on BQL context. 374a8ac8858SEugenio Pérez */ 375a8ac8858SEugenio Pérez void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) 376a8ac8858SEugenio Pérez { 377a8ac8858SEugenio Pérez if (call_fd == VHOST_FILE_UNBIND) { 378a8ac8858SEugenio Pérez /* 379a8ac8858SEugenio Pérez * Fail event_notifier_set if called handling device call. 380a8ac8858SEugenio Pérez * 381a8ac8858SEugenio Pérez * SVQ still needs device notifications, since it needs to keep 382a8ac8858SEugenio Pérez * forwarding used buffers even with the unbind. 383a8ac8858SEugenio Pérez */ 384a8ac8858SEugenio Pérez memset(&svq->svq_call, 0, sizeof(svq->svq_call)); 385a8ac8858SEugenio Pérez } else { 386a8ac8858SEugenio Pérez event_notifier_init_fd(&svq->svq_call, call_fd); 387a8ac8858SEugenio Pérez } 388a8ac8858SEugenio Pérez } 389a8ac8858SEugenio Pérez 390a8ac8858SEugenio Pérez /** 391dafb34c9SEugenio Pérez * Get the shadow vq vring address. 392dafb34c9SEugenio Pérez * @svq: Shadow virtqueue 393dafb34c9SEugenio Pérez * @addr: Destination to store address 394dafb34c9SEugenio Pérez */ 395dafb34c9SEugenio Pérez void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, 396dafb34c9SEugenio Pérez struct vhost_vring_addr *addr) 397dafb34c9SEugenio Pérez { 398dafb34c9SEugenio Pérez addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc; 399dafb34c9SEugenio Pérez addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail; 400dafb34c9SEugenio Pérez addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used; 401dafb34c9SEugenio Pérez } 402dafb34c9SEugenio Pérez 403dafb34c9SEugenio Pérez size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) 404dafb34c9SEugenio Pérez { 405dafb34c9SEugenio Pérez size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; 406dafb34c9SEugenio Pérez size_t avail_size = offsetof(vring_avail_t, ring) + 407dafb34c9SEugenio Pérez sizeof(uint16_t) * svq->vring.num; 408dafb34c9SEugenio Pérez 409dafb34c9SEugenio Pérez return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size); 410dafb34c9SEugenio Pérez } 411dafb34c9SEugenio Pérez 412dafb34c9SEugenio Pérez size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) 413dafb34c9SEugenio Pérez { 414dafb34c9SEugenio Pérez size_t used_size = offsetof(vring_used_t, ring) + 415dafb34c9SEugenio Pérez sizeof(vring_used_elem_t) * svq->vring.num; 416dafb34c9SEugenio Pérez return ROUND_UP(used_size, qemu_real_host_page_size); 417dafb34c9SEugenio Pérez } 418dafb34c9SEugenio Pérez 419dafb34c9SEugenio Pérez /** 420dff4426fSEugenio Pérez * Set a new file descriptor for the guest to kick the SVQ and notify for avail 421dff4426fSEugenio Pérez * 422dff4426fSEugenio Pérez * @svq: The svq 423dff4426fSEugenio Pérez * @svq_kick_fd: The svq kick fd 424dff4426fSEugenio Pérez * 425dff4426fSEugenio Pérez * Note that the SVQ will never close the old file descriptor. 426dff4426fSEugenio Pérez */ 427dff4426fSEugenio Pérez void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) 428dff4426fSEugenio Pérez { 429dff4426fSEugenio Pérez EventNotifier *svq_kick = &svq->svq_kick; 430dff4426fSEugenio Pérez bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); 431dff4426fSEugenio Pérez bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; 432dff4426fSEugenio Pérez 433dff4426fSEugenio Pérez if (poll_stop) { 434dff4426fSEugenio Pérez event_notifier_set_handler(svq_kick, NULL); 435dff4426fSEugenio Pérez } 436dff4426fSEugenio Pérez 437dff4426fSEugenio Pérez /* 438dff4426fSEugenio Pérez * event_notifier_set_handler already checks for guest's notifications if 439dff4426fSEugenio Pérez * they arrive at the new file descriptor in the switch, so there is no 440dff4426fSEugenio Pérez * need to explicitly check for them. 441dff4426fSEugenio Pérez */ 442dff4426fSEugenio Pérez if (poll_start) { 443dff4426fSEugenio Pérez event_notifier_init_fd(svq_kick, svq_kick_fd); 444dff4426fSEugenio Pérez event_notifier_set(svq_kick); 445*100890f7SEugenio Pérez event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); 446*100890f7SEugenio Pérez } 447*100890f7SEugenio Pérez } 448*100890f7SEugenio Pérez 449*100890f7SEugenio Pérez /** 450*100890f7SEugenio Pérez * Start the shadow virtqueue operation. 451*100890f7SEugenio Pérez * 452*100890f7SEugenio Pérez * @svq: Shadow Virtqueue 453*100890f7SEugenio Pérez * @vdev: VirtIO device 454*100890f7SEugenio Pérez * @vq: Virtqueue to shadow 455*100890f7SEugenio Pérez */ 456*100890f7SEugenio Pérez void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, 457*100890f7SEugenio Pérez VirtQueue *vq) 458*100890f7SEugenio Pérez { 459*100890f7SEugenio Pérez size_t desc_size, driver_size, device_size; 460*100890f7SEugenio Pérez 461*100890f7SEugenio Pérez svq->next_guest_avail_elem = NULL; 462*100890f7SEugenio Pérez svq->shadow_avail_idx = 0; 463*100890f7SEugenio Pérez svq->shadow_used_idx = 0; 464*100890f7SEugenio Pérez svq->last_used_idx = 0; 465*100890f7SEugenio Pérez svq->vdev = vdev; 466*100890f7SEugenio Pérez svq->vq = vq; 467*100890f7SEugenio Pérez 468*100890f7SEugenio Pérez svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); 469*100890f7SEugenio Pérez driver_size = vhost_svq_driver_area_size(svq); 470*100890f7SEugenio Pérez device_size = vhost_svq_device_area_size(svq); 471*100890f7SEugenio Pérez svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size); 472*100890f7SEugenio Pérez desc_size = sizeof(vring_desc_t) * svq->vring.num; 473*100890f7SEugenio Pérez svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); 474*100890f7SEugenio Pérez memset(svq->vring.desc, 0, driver_size); 475*100890f7SEugenio Pérez svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); 476*100890f7SEugenio Pérez memset(svq->vring.used, 0, device_size); 477*100890f7SEugenio Pérez svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); 478*100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num - 1; i++) { 479*100890f7SEugenio Pérez svq->vring.desc[i].next = cpu_to_le16(i + 1); 480dff4426fSEugenio Pérez } 481dff4426fSEugenio Pérez } 482dff4426fSEugenio Pérez 483dff4426fSEugenio Pérez /** 484dff4426fSEugenio Pérez * Stop the shadow virtqueue operation. 485dff4426fSEugenio Pérez * @svq: Shadow Virtqueue 486dff4426fSEugenio Pérez */ 487dff4426fSEugenio Pérez void vhost_svq_stop(VhostShadowVirtqueue *svq) 488dff4426fSEugenio Pérez { 489dff4426fSEugenio Pérez event_notifier_set_handler(&svq->svq_kick, NULL); 490*100890f7SEugenio Pérez g_autofree VirtQueueElement *next_avail_elem = NULL; 491*100890f7SEugenio Pérez 492*100890f7SEugenio Pérez if (!svq->vq) { 493*100890f7SEugenio Pérez return; 494*100890f7SEugenio Pérez } 495*100890f7SEugenio Pérez 496*100890f7SEugenio Pérez /* Send all pending used descriptors to guest */ 497*100890f7SEugenio Pérez vhost_svq_flush(svq, false); 498*100890f7SEugenio Pérez 499*100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num; ++i) { 500*100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = NULL; 501*100890f7SEugenio Pérez elem = g_steal_pointer(&svq->ring_id_maps[i]); 502*100890f7SEugenio Pérez if (elem) { 503*100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, elem, 0); 504*100890f7SEugenio Pérez } 505*100890f7SEugenio Pérez } 506*100890f7SEugenio Pérez 507*100890f7SEugenio Pérez next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); 508*100890f7SEugenio Pérez if (next_avail_elem) { 509*100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, next_avail_elem, 0); 510*100890f7SEugenio Pérez } 511*100890f7SEugenio Pérez svq->vq = NULL; 512*100890f7SEugenio Pérez g_free(svq->ring_id_maps); 513*100890f7SEugenio Pérez qemu_vfree(svq->vring.desc); 514*100890f7SEugenio Pérez qemu_vfree(svq->vring.used); 515dff4426fSEugenio Pérez } 51610857ec0SEugenio Pérez 51710857ec0SEugenio Pérez /** 51810857ec0SEugenio Pérez * Creates vhost shadow virtqueue, and instructs the vhost device to use the 51910857ec0SEugenio Pérez * shadow methods and file descriptors. 52010857ec0SEugenio Pérez * 52110857ec0SEugenio Pérez * Returns the new virtqueue or NULL. 52210857ec0SEugenio Pérez * 52310857ec0SEugenio Pérez * In case of error, reason is reported through error_report. 52410857ec0SEugenio Pérez */ 52510857ec0SEugenio Pérez VhostShadowVirtqueue *vhost_svq_new(void) 52610857ec0SEugenio Pérez { 52710857ec0SEugenio Pérez g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); 52810857ec0SEugenio Pérez int r; 52910857ec0SEugenio Pérez 53010857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_kick, 0); 53110857ec0SEugenio Pérez if (r != 0) { 53210857ec0SEugenio Pérez error_report("Couldn't create kick event notifier: %s (%d)", 53310857ec0SEugenio Pérez g_strerror(errno), errno); 53410857ec0SEugenio Pérez goto err_init_hdev_kick; 53510857ec0SEugenio Pérez } 53610857ec0SEugenio Pérez 53710857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_call, 0); 53810857ec0SEugenio Pérez if (r != 0) { 53910857ec0SEugenio Pérez error_report("Couldn't create call event notifier: %s (%d)", 54010857ec0SEugenio Pérez g_strerror(errno), errno); 54110857ec0SEugenio Pérez goto err_init_hdev_call; 54210857ec0SEugenio Pérez } 54310857ec0SEugenio Pérez 544dff4426fSEugenio Pérez event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); 545a8ac8858SEugenio Pérez event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); 54610857ec0SEugenio Pérez return g_steal_pointer(&svq); 54710857ec0SEugenio Pérez 54810857ec0SEugenio Pérez err_init_hdev_call: 54910857ec0SEugenio Pérez event_notifier_cleanup(&svq->hdev_kick); 55010857ec0SEugenio Pérez 55110857ec0SEugenio Pérez err_init_hdev_kick: 55210857ec0SEugenio Pérez return NULL; 55310857ec0SEugenio Pérez } 55410857ec0SEugenio Pérez 55510857ec0SEugenio Pérez /** 55610857ec0SEugenio Pérez * Free the resources of the shadow virtqueue. 55710857ec0SEugenio Pérez * 55810857ec0SEugenio Pérez * @pvq: gpointer to SVQ so it can be used by autofree functions. 55910857ec0SEugenio Pérez */ 56010857ec0SEugenio Pérez void vhost_svq_free(gpointer pvq) 56110857ec0SEugenio Pérez { 56210857ec0SEugenio Pérez VhostShadowVirtqueue *vq = pvq; 563dff4426fSEugenio Pérez vhost_svq_stop(vq); 56410857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_kick); 565a8ac8858SEugenio Pérez event_notifier_set_handler(&vq->hdev_call, NULL); 56610857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_call); 56710857ec0SEugenio Pérez g_free(vq); 56810857ec0SEugenio Pérez } 569