110857ec0SEugenio Pérez /* 210857ec0SEugenio Pérez * vhost shadow virtqueue 310857ec0SEugenio Pérez * 410857ec0SEugenio Pérez * SPDX-FileCopyrightText: Red Hat, Inc. 2021 510857ec0SEugenio Pérez * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> 610857ec0SEugenio Pérez * 710857ec0SEugenio Pérez * SPDX-License-Identifier: GPL-2.0-or-later 810857ec0SEugenio Pérez */ 910857ec0SEugenio Pérez 1010857ec0SEugenio Pérez #include "qemu/osdep.h" 1110857ec0SEugenio Pérez #include "hw/virtio/vhost-shadow-virtqueue.h" 1210857ec0SEugenio Pérez 1310857ec0SEugenio Pérez #include "qemu/error-report.h" 144725a418SEugenio Pérez #include "qapi/error.h" 15dff4426fSEugenio Pérez #include "qemu/main-loop.h" 16100890f7SEugenio Pérez #include "qemu/log.h" 17100890f7SEugenio Pérez #include "qemu/memalign.h" 18dff4426fSEugenio Pérez #include "linux-headers/linux/vhost.h" 19dff4426fSEugenio Pérez 20dff4426fSEugenio Pérez /** 214725a418SEugenio Pérez * Validate the transport device features that both guests can use with the SVQ 224725a418SEugenio Pérez * and SVQs can use with the device. 234725a418SEugenio Pérez * 244725a418SEugenio Pérez * @dev_features: The features 254725a418SEugenio Pérez * @errp: Error pointer 264725a418SEugenio Pérez */ 274725a418SEugenio Pérez bool vhost_svq_valid_features(uint64_t features, Error **errp) 284725a418SEugenio Pérez { 294725a418SEugenio Pérez bool ok = true; 304725a418SEugenio Pérez uint64_t svq_features = features; 314725a418SEugenio Pérez 324725a418SEugenio Pérez for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; 334725a418SEugenio Pérez ++b) { 344725a418SEugenio Pérez switch (b) { 354725a418SEugenio Pérez case VIRTIO_F_ANY_LAYOUT: 364725a418SEugenio Pérez continue; 374725a418SEugenio Pérez 384725a418SEugenio Pérez case VIRTIO_F_ACCESS_PLATFORM: 394725a418SEugenio Pérez /* SVQ trust in the host's IOMMU to translate addresses */ 404725a418SEugenio Pérez case VIRTIO_F_VERSION_1: 414725a418SEugenio Pérez /* SVQ trust that the guest vring is little endian */ 424725a418SEugenio Pérez if (!(svq_features & BIT_ULL(b))) { 434725a418SEugenio Pérez svq_features |= BIT_ULL(b); 444725a418SEugenio Pérez ok = false; 454725a418SEugenio Pérez } 464725a418SEugenio Pérez continue; 474725a418SEugenio Pérez 484725a418SEugenio Pérez default: 494725a418SEugenio Pérez if (svq_features & BIT_ULL(b)) { 504725a418SEugenio Pérez svq_features &= ~BIT_ULL(b); 514725a418SEugenio Pérez ok = false; 524725a418SEugenio Pérez } 534725a418SEugenio Pérez } 544725a418SEugenio Pérez } 554725a418SEugenio Pérez 564725a418SEugenio Pérez if (!ok) { 574725a418SEugenio Pérez error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 584725a418SEugenio Pérez ", ok: 0x%"PRIx64, features, svq_features); 594725a418SEugenio Pérez } 604725a418SEugenio Pérez return ok; 614725a418SEugenio Pérez } 624725a418SEugenio Pérez 634725a418SEugenio Pérez /** 64100890f7SEugenio Pérez * Number of descriptors that the SVQ can make available from the guest. 65dff4426fSEugenio Pérez * 66100890f7SEugenio Pérez * @svq: The svq 67dff4426fSEugenio Pérez */ 68100890f7SEugenio Pérez static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) 69dff4426fSEugenio Pérez { 70100890f7SEugenio Pérez return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); 71100890f7SEugenio Pérez } 72100890f7SEugenio Pérez 7334e3c94eSEugenio Pérez /** 7434e3c94eSEugenio Pérez * Translate addresses between the qemu's virtual address and the SVQ IOVA 7534e3c94eSEugenio Pérez * 7634e3c94eSEugenio Pérez * @svq: Shadow VirtQueue 7734e3c94eSEugenio Pérez * @vaddr: Translated IOVA addresses 7834e3c94eSEugenio Pérez * @iovec: Source qemu's VA addresses 7934e3c94eSEugenio Pérez * @num: Length of iovec and minimum length of vaddr 8034e3c94eSEugenio Pérez */ 8134e3c94eSEugenio Pérez static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, 8234e3c94eSEugenio Pérez hwaddr *addrs, const struct iovec *iovec, 8334e3c94eSEugenio Pérez size_t num) 8434e3c94eSEugenio Pérez { 8534e3c94eSEugenio Pérez if (num == 0) { 8634e3c94eSEugenio Pérez return true; 8734e3c94eSEugenio Pérez } 8834e3c94eSEugenio Pérez 8934e3c94eSEugenio Pérez for (size_t i = 0; i < num; ++i) { 9034e3c94eSEugenio Pérez DMAMap needle = { 9134e3c94eSEugenio Pérez .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, 9234e3c94eSEugenio Pérez .size = iovec[i].iov_len, 9334e3c94eSEugenio Pérez }; 9434e3c94eSEugenio Pérez Int128 needle_last, map_last; 9534e3c94eSEugenio Pérez size_t off; 9634e3c94eSEugenio Pérez 9734e3c94eSEugenio Pérez const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); 9834e3c94eSEugenio Pérez /* 9934e3c94eSEugenio Pérez * Map cannot be NULL since iova map contains all guest space and 10034e3c94eSEugenio Pérez * qemu already has a physical address mapped 10134e3c94eSEugenio Pérez */ 10234e3c94eSEugenio Pérez if (unlikely(!map)) { 10334e3c94eSEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 10434e3c94eSEugenio Pérez "Invalid address 0x%"HWADDR_PRIx" given by guest", 10534e3c94eSEugenio Pérez needle.translated_addr); 10634e3c94eSEugenio Pérez return false; 10734e3c94eSEugenio Pérez } 10834e3c94eSEugenio Pérez 10934e3c94eSEugenio Pérez off = needle.translated_addr - map->translated_addr; 11034e3c94eSEugenio Pérez addrs[i] = map->iova + off; 11134e3c94eSEugenio Pérez 11234e3c94eSEugenio Pérez needle_last = int128_add(int128_make64(needle.translated_addr), 11334e3c94eSEugenio Pérez int128_make64(iovec[i].iov_len)); 11434e3c94eSEugenio Pérez map_last = int128_make64(map->translated_addr + map->size); 11534e3c94eSEugenio Pérez if (unlikely(int128_gt(needle_last, map_last))) { 11634e3c94eSEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 11734e3c94eSEugenio Pérez "Guest buffer expands over iova range"); 11834e3c94eSEugenio Pérez return false; 11934e3c94eSEugenio Pérez } 12034e3c94eSEugenio Pérez } 12134e3c94eSEugenio Pérez 12234e3c94eSEugenio Pérez return true; 12334e3c94eSEugenio Pérez } 12434e3c94eSEugenio Pérez 125009c2549SEugenio Pérez /** 126009c2549SEugenio Pérez * Write descriptors to SVQ vring 127009c2549SEugenio Pérez * 128009c2549SEugenio Pérez * @svq: The shadow virtqueue 129009c2549SEugenio Pérez * @sg: Cache for hwaddr 130009c2549SEugenio Pérez * @iovec: The iovec from the guest 131009c2549SEugenio Pérez * @num: iovec length 132009c2549SEugenio Pérez * @more_descs: True if more descriptors come in the chain 133009c2549SEugenio Pérez * @write: True if they are writeable descriptors 134009c2549SEugenio Pérez * 135009c2549SEugenio Pérez * Return true if success, false otherwise and print error. 136009c2549SEugenio Pérez */ 137009c2549SEugenio Pérez static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, 138100890f7SEugenio Pérez const struct iovec *iovec, size_t num, 139100890f7SEugenio Pérez bool more_descs, bool write) 140100890f7SEugenio Pérez { 141100890f7SEugenio Pérez uint16_t i = svq->free_head, last = svq->free_head; 142100890f7SEugenio Pérez unsigned n; 143100890f7SEugenio Pérez uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; 144100890f7SEugenio Pérez vring_desc_t *descs = svq->vring.desc; 145009c2549SEugenio Pérez bool ok; 146100890f7SEugenio Pérez 147100890f7SEugenio Pérez if (num == 0) { 148009c2549SEugenio Pérez return true; 149009c2549SEugenio Pérez } 150009c2549SEugenio Pérez 151009c2549SEugenio Pérez ok = vhost_svq_translate_addr(svq, sg, iovec, num); 152009c2549SEugenio Pérez if (unlikely(!ok)) { 153009c2549SEugenio Pérez return false; 154100890f7SEugenio Pérez } 155100890f7SEugenio Pérez 156100890f7SEugenio Pérez for (n = 0; n < num; n++) { 157100890f7SEugenio Pérez if (more_descs || (n + 1 < num)) { 158100890f7SEugenio Pérez descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); 159495fe3a7SEugenio Pérez descs[i].next = cpu_to_le16(svq->desc_next[i]); 160100890f7SEugenio Pérez } else { 161100890f7SEugenio Pérez descs[i].flags = flags; 162100890f7SEugenio Pérez } 16334e3c94eSEugenio Pérez descs[i].addr = cpu_to_le64(sg[n]); 164100890f7SEugenio Pérez descs[i].len = cpu_to_le32(iovec[n].iov_len); 165100890f7SEugenio Pérez 166100890f7SEugenio Pérez last = i; 167495fe3a7SEugenio Pérez i = cpu_to_le16(svq->desc_next[i]); 168100890f7SEugenio Pérez } 169100890f7SEugenio Pérez 170495fe3a7SEugenio Pérez svq->free_head = le16_to_cpu(svq->desc_next[last]); 171009c2549SEugenio Pérez return true; 172100890f7SEugenio Pérez } 173100890f7SEugenio Pérez 174100890f7SEugenio Pérez static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, 1751f46ae65SEugenio Pérez const struct iovec *out_sg, size_t out_num, 1761f46ae65SEugenio Pérez const struct iovec *in_sg, size_t in_num, 1771f46ae65SEugenio Pérez unsigned *head) 178100890f7SEugenio Pérez { 179100890f7SEugenio Pérez unsigned avail_idx; 180100890f7SEugenio Pérez vring_avail_t *avail = svq->vring.avail; 18134e3c94eSEugenio Pérez bool ok; 1821f46ae65SEugenio Pérez g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); 183100890f7SEugenio Pérez 184100890f7SEugenio Pérez *head = svq->free_head; 185100890f7SEugenio Pérez 186100890f7SEugenio Pérez /* We need some descriptors here */ 1871f46ae65SEugenio Pérez if (unlikely(!out_num && !in_num)) { 188100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 189100890f7SEugenio Pérez "Guest provided element with no descriptors"); 190100890f7SEugenio Pérez return false; 191100890f7SEugenio Pérez } 192100890f7SEugenio Pérez 1931f46ae65SEugenio Pérez ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, 1941f46ae65SEugenio Pérez false); 19534e3c94eSEugenio Pérez if (unlikely(!ok)) { 19634e3c94eSEugenio Pérez return false; 19734e3c94eSEugenio Pérez } 19834e3c94eSEugenio Pérez 1991f46ae65SEugenio Pérez ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); 200009c2549SEugenio Pérez if (unlikely(!ok)) { 201009c2549SEugenio Pérez return false; 202009c2549SEugenio Pérez } 203100890f7SEugenio Pérez 204100890f7SEugenio Pérez /* 205100890f7SEugenio Pérez * Put the entry in the available array (but don't update avail->idx until 206100890f7SEugenio Pérez * they do sync). 207100890f7SEugenio Pérez */ 208100890f7SEugenio Pérez avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); 209100890f7SEugenio Pérez avail->ring[avail_idx] = cpu_to_le16(*head); 210100890f7SEugenio Pérez svq->shadow_avail_idx++; 211100890f7SEugenio Pérez 212100890f7SEugenio Pérez /* Update the avail index after write the descriptor */ 213100890f7SEugenio Pérez smp_wmb(); 214100890f7SEugenio Pérez avail->idx = cpu_to_le16(svq->shadow_avail_idx); 215100890f7SEugenio Pérez 216100890f7SEugenio Pérez return true; 217100890f7SEugenio Pérez } 218100890f7SEugenio Pérez 219d93a2405SEugenio Pérez static void vhost_svq_kick(VhostShadowVirtqueue *svq) 220d93a2405SEugenio Pérez { 221d93a2405SEugenio Pérez /* 222d93a2405SEugenio Pérez * We need to expose the available array entries before checking the used 223d93a2405SEugenio Pérez * flags 224d93a2405SEugenio Pérez */ 225d93a2405SEugenio Pérez smp_mb(); 226d93a2405SEugenio Pérez if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { 227d93a2405SEugenio Pérez return; 228d93a2405SEugenio Pérez } 229d93a2405SEugenio Pérez 230d93a2405SEugenio Pérez event_notifier_set(&svq->hdev_kick); 231d93a2405SEugenio Pérez } 232d93a2405SEugenio Pérez 2335181db13SEugenio Pérez /** 2345181db13SEugenio Pérez * Add an element to a SVQ. 2355181db13SEugenio Pérez * 2365181db13SEugenio Pérez * The caller must check that there is enough slots for the new element. It 237f20b70ebSEugenio Pérez * takes ownership of the element: In case of failure not ENOSPC, it is free. 238f20b70ebSEugenio Pérez * 239f20b70ebSEugenio Pérez * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full 2405181db13SEugenio Pérez */ 2411f46ae65SEugenio Pérez static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, 2421f46ae65SEugenio Pérez size_t out_num, const struct iovec *in_sg, 2431f46ae65SEugenio Pérez size_t in_num, VirtQueueElement *elem) 244100890f7SEugenio Pérez { 245100890f7SEugenio Pérez unsigned qemu_head; 2461f46ae65SEugenio Pérez unsigned ndescs = in_num + out_num; 247f20b70ebSEugenio Pérez bool ok; 248f20b70ebSEugenio Pérez 249f20b70ebSEugenio Pérez if (unlikely(ndescs > vhost_svq_available_slots(svq))) { 250f20b70ebSEugenio Pérez return -ENOSPC; 251f20b70ebSEugenio Pérez } 252f20b70ebSEugenio Pérez 2531f46ae65SEugenio Pérez ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); 254100890f7SEugenio Pérez if (unlikely(!ok)) { 2555181db13SEugenio Pérez g_free(elem); 256f20b70ebSEugenio Pérez return -EINVAL; 257100890f7SEugenio Pérez } 258100890f7SEugenio Pérez 2599e87868fSEugenio Pérez svq->desc_state[qemu_head].elem = elem; 260ac4cfdc6SEugenio Pérez svq->desc_state[qemu_head].ndescs = ndescs; 26198b5adefSEugenio Pérez vhost_svq_kick(svq); 262f20b70ebSEugenio Pérez return 0; 263100890f7SEugenio Pérez } 264100890f7SEugenio Pérez 2651f46ae65SEugenio Pérez /* Convenience wrapper to add a guest's element to SVQ */ 2661f46ae65SEugenio Pérez static int vhost_svq_add_element(VhostShadowVirtqueue *svq, 2671f46ae65SEugenio Pérez VirtQueueElement *elem) 2681f46ae65SEugenio Pérez { 2691f46ae65SEugenio Pérez return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, 2701f46ae65SEugenio Pérez elem->in_num, elem); 2711f46ae65SEugenio Pérez } 2721f46ae65SEugenio Pérez 273dff4426fSEugenio Pérez /** 274100890f7SEugenio Pérez * Forward available buffers. 275100890f7SEugenio Pérez * 276100890f7SEugenio Pérez * @svq: Shadow VirtQueue 277100890f7SEugenio Pérez * 278100890f7SEugenio Pérez * Note that this function does not guarantee that all guest's available 279100890f7SEugenio Pérez * buffers are available to the device in SVQ avail ring. The guest may have 280100890f7SEugenio Pérez * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in 281100890f7SEugenio Pérez * qemu vaddr. 282100890f7SEugenio Pérez * 283100890f7SEugenio Pérez * If that happens, guest's kick notifications will be disabled until the 284100890f7SEugenio Pérez * device uses some buffers. 285100890f7SEugenio Pérez */ 286100890f7SEugenio Pérez static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) 287100890f7SEugenio Pérez { 288100890f7SEugenio Pérez /* Clear event notifier */ 289100890f7SEugenio Pérez event_notifier_test_and_clear(&svq->svq_kick); 290100890f7SEugenio Pérez 291100890f7SEugenio Pérez /* Forward to the device as many available buffers as possible */ 292100890f7SEugenio Pérez do { 293100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, false); 294100890f7SEugenio Pérez 295100890f7SEugenio Pérez while (true) { 296100890f7SEugenio Pérez VirtQueueElement *elem; 297f20b70ebSEugenio Pérez int r; 298100890f7SEugenio Pérez 299100890f7SEugenio Pérez if (svq->next_guest_avail_elem) { 300100890f7SEugenio Pérez elem = g_steal_pointer(&svq->next_guest_avail_elem); 301100890f7SEugenio Pérez } else { 302100890f7SEugenio Pérez elem = virtqueue_pop(svq->vq, sizeof(*elem)); 303100890f7SEugenio Pérez } 304100890f7SEugenio Pérez 305100890f7SEugenio Pérez if (!elem) { 306100890f7SEugenio Pérez break; 307100890f7SEugenio Pérez } 308100890f7SEugenio Pérez 3091f46ae65SEugenio Pérez r = vhost_svq_add_element(svq, elem); 310f20b70ebSEugenio Pérez if (unlikely(r != 0)) { 311f20b70ebSEugenio Pérez if (r == -ENOSPC) { 312100890f7SEugenio Pérez /* 313f20b70ebSEugenio Pérez * This condition is possible since a contiguous buffer in 314f20b70ebSEugenio Pérez * GPA does not imply a contiguous buffer in qemu's VA 315f20b70ebSEugenio Pérez * scatter-gather segments. If that happens, the buffer 316f20b70ebSEugenio Pérez * exposed to the device needs to be a chain of descriptors 317f20b70ebSEugenio Pérez * at this moment. 318100890f7SEugenio Pérez * 319100890f7SEugenio Pérez * SVQ cannot hold more available buffers if we are here: 320f20b70ebSEugenio Pérez * queue the current guest descriptor and ignore kicks 321100890f7SEugenio Pérez * until some elements are used. 322100890f7SEugenio Pérez */ 323100890f7SEugenio Pérez svq->next_guest_avail_elem = elem; 324100890f7SEugenio Pérez } 325100890f7SEugenio Pérez 326f20b70ebSEugenio Pérez /* VQ is full or broken, just return and ignore kicks */ 327100890f7SEugenio Pérez return; 328100890f7SEugenio Pérez } 329100890f7SEugenio Pérez } 330100890f7SEugenio Pérez 331100890f7SEugenio Pérez virtio_queue_set_notification(svq->vq, true); 332100890f7SEugenio Pérez } while (!virtio_queue_empty(svq->vq)); 333100890f7SEugenio Pérez } 334100890f7SEugenio Pérez 335100890f7SEugenio Pérez /** 336100890f7SEugenio Pérez * Handle guest's kick. 337100890f7SEugenio Pérez * 338100890f7SEugenio Pérez * @n: guest kick event notifier, the one that guest set to notify svq. 339100890f7SEugenio Pérez */ 340100890f7SEugenio Pérez static void vhost_handle_guest_kick_notifier(EventNotifier *n) 341100890f7SEugenio Pérez { 342100890f7SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); 343100890f7SEugenio Pérez event_notifier_test_and_clear(n); 344100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 345100890f7SEugenio Pérez } 346100890f7SEugenio Pérez 347100890f7SEugenio Pérez static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) 348100890f7SEugenio Pérez { 349c381abc3SEugenio Pérez uint16_t *used_idx = &svq->vring.used->idx; 350100890f7SEugenio Pérez if (svq->last_used_idx != svq->shadow_used_idx) { 351100890f7SEugenio Pérez return true; 352100890f7SEugenio Pérez } 353100890f7SEugenio Pérez 354c381abc3SEugenio Pérez svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); 355100890f7SEugenio Pérez 356100890f7SEugenio Pérez return svq->last_used_idx != svq->shadow_used_idx; 357100890f7SEugenio Pérez } 358100890f7SEugenio Pérez 359100890f7SEugenio Pérez /** 360100890f7SEugenio Pérez * Enable vhost device calls after disable them. 361100890f7SEugenio Pérez * 362100890f7SEugenio Pérez * @svq: The svq 363100890f7SEugenio Pérez * 364100890f7SEugenio Pérez * It returns false if there are pending used buffers from the vhost device, 365100890f7SEugenio Pérez * avoiding the possible races between SVQ checking for more work and enabling 366100890f7SEugenio Pérez * callbacks. True if SVQ used vring has no more pending buffers. 367100890f7SEugenio Pérez */ 368100890f7SEugenio Pérez static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) 369100890f7SEugenio Pérez { 370100890f7SEugenio Pérez svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 371100890f7SEugenio Pérez /* Make sure the flag is written before the read of used_idx */ 372100890f7SEugenio Pérez smp_mb(); 373100890f7SEugenio Pérez return !vhost_svq_more_used(svq); 374100890f7SEugenio Pérez } 375100890f7SEugenio Pérez 376100890f7SEugenio Pérez static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) 377100890f7SEugenio Pérez { 378100890f7SEugenio Pérez svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); 379100890f7SEugenio Pérez } 380100890f7SEugenio Pérez 38181abfa57SEugenio Pérez static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, 38281abfa57SEugenio Pérez uint16_t num, uint16_t i) 38381abfa57SEugenio Pérez { 38481abfa57SEugenio Pérez for (uint16_t j = 0; j < (num - 1); ++j) { 38581abfa57SEugenio Pérez i = le16_to_cpu(svq->desc_next[i]); 38681abfa57SEugenio Pérez } 38781abfa57SEugenio Pérez 38881abfa57SEugenio Pérez return i; 38981abfa57SEugenio Pérez } 39081abfa57SEugenio Pérez 391100890f7SEugenio Pérez static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, 392100890f7SEugenio Pérez uint32_t *len) 393100890f7SEugenio Pérez { 394100890f7SEugenio Pérez const vring_used_t *used = svq->vring.used; 395100890f7SEugenio Pérez vring_used_elem_t used_elem; 39681abfa57SEugenio Pérez uint16_t last_used, last_used_chain, num; 397100890f7SEugenio Pérez 398100890f7SEugenio Pérez if (!vhost_svq_more_used(svq)) { 399100890f7SEugenio Pérez return NULL; 400100890f7SEugenio Pérez } 401100890f7SEugenio Pérez 402100890f7SEugenio Pérez /* Only get used array entries after they have been exposed by dev */ 403100890f7SEugenio Pérez smp_rmb(); 404100890f7SEugenio Pérez last_used = svq->last_used_idx & (svq->vring.num - 1); 405100890f7SEugenio Pérez used_elem.id = le32_to_cpu(used->ring[last_used].id); 406100890f7SEugenio Pérez used_elem.len = le32_to_cpu(used->ring[last_used].len); 407100890f7SEugenio Pérez 408100890f7SEugenio Pérez svq->last_used_idx++; 409100890f7SEugenio Pérez if (unlikely(used_elem.id >= svq->vring.num)) { 410100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", 411100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 412100890f7SEugenio Pérez return NULL; 413100890f7SEugenio Pérez } 414100890f7SEugenio Pérez 4159e87868fSEugenio Pérez if (unlikely(!svq->desc_state[used_elem.id].elem)) { 416100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 417100890f7SEugenio Pérez "Device %s says index %u is used, but it was not available", 418100890f7SEugenio Pérez svq->vdev->name, used_elem.id); 419100890f7SEugenio Pérez return NULL; 420100890f7SEugenio Pérez } 421100890f7SEugenio Pérez 422ac4cfdc6SEugenio Pérez num = svq->desc_state[used_elem.id].ndescs; 42381abfa57SEugenio Pérez last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); 42481abfa57SEugenio Pérez svq->desc_next[last_used_chain] = svq->free_head; 425100890f7SEugenio Pérez svq->free_head = used_elem.id; 426100890f7SEugenio Pérez 427100890f7SEugenio Pérez *len = used_elem.len; 4289e87868fSEugenio Pérez return g_steal_pointer(&svq->desc_state[used_elem.id].elem); 429100890f7SEugenio Pérez } 430100890f7SEugenio Pérez 431*432efd14SEugenio Pérez /** 432*432efd14SEugenio Pérez * Push an element to SVQ, returning it to the guest. 433*432efd14SEugenio Pérez */ 434*432efd14SEugenio Pérez void vhost_svq_push_elem(VhostShadowVirtqueue *svq, 435*432efd14SEugenio Pérez const VirtQueueElement *elem, uint32_t len) 436*432efd14SEugenio Pérez { 437*432efd14SEugenio Pérez virtqueue_push(svq->vq, elem, len); 438*432efd14SEugenio Pérez if (svq->next_guest_avail_elem) { 439*432efd14SEugenio Pérez /* 440*432efd14SEugenio Pérez * Avail ring was full when vhost_svq_flush was called, so it's a 441*432efd14SEugenio Pérez * good moment to make more descriptors available if possible. 442*432efd14SEugenio Pérez */ 443*432efd14SEugenio Pérez vhost_handle_guest_kick(svq); 444*432efd14SEugenio Pérez } 445*432efd14SEugenio Pérez } 446*432efd14SEugenio Pérez 447100890f7SEugenio Pérez static void vhost_svq_flush(VhostShadowVirtqueue *svq, 448100890f7SEugenio Pérez bool check_for_avail_queue) 449100890f7SEugenio Pérez { 450100890f7SEugenio Pérez VirtQueue *vq = svq->vq; 451100890f7SEugenio Pérez 452100890f7SEugenio Pérez /* Forward as many used buffers as possible. */ 453100890f7SEugenio Pérez do { 454100890f7SEugenio Pérez unsigned i = 0; 455100890f7SEugenio Pérez 456100890f7SEugenio Pérez vhost_svq_disable_notification(svq); 457100890f7SEugenio Pérez while (true) { 458100890f7SEugenio Pérez uint32_t len; 459100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); 460100890f7SEugenio Pérez if (!elem) { 461100890f7SEugenio Pérez break; 462100890f7SEugenio Pérez } 463100890f7SEugenio Pérez 464100890f7SEugenio Pérez if (unlikely(i >= svq->vring.num)) { 465100890f7SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, 466100890f7SEugenio Pérez "More than %u used buffers obtained in a %u size SVQ", 467100890f7SEugenio Pérez i, svq->vring.num); 468100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i); 469100890f7SEugenio Pérez virtqueue_flush(vq, i); 470100890f7SEugenio Pérez return; 471100890f7SEugenio Pérez } 472100890f7SEugenio Pérez virtqueue_fill(vq, elem, len, i++); 473100890f7SEugenio Pérez } 474100890f7SEugenio Pérez 475100890f7SEugenio Pérez virtqueue_flush(vq, i); 476100890f7SEugenio Pérez event_notifier_set(&svq->svq_call); 477100890f7SEugenio Pérez 478100890f7SEugenio Pérez if (check_for_avail_queue && svq->next_guest_avail_elem) { 479100890f7SEugenio Pérez /* 480100890f7SEugenio Pérez * Avail ring was full when vhost_svq_flush was called, so it's a 481100890f7SEugenio Pérez * good moment to make more descriptors available if possible. 482100890f7SEugenio Pérez */ 483100890f7SEugenio Pérez vhost_handle_guest_kick(svq); 484100890f7SEugenio Pérez } 485100890f7SEugenio Pérez } while (!vhost_svq_enable_notification(svq)); 486100890f7SEugenio Pérez } 487100890f7SEugenio Pérez 488100890f7SEugenio Pérez /** 489100890f7SEugenio Pérez * Forward used buffers. 490a8ac8858SEugenio Pérez * 491a8ac8858SEugenio Pérez * @n: hdev call event notifier, the one that device set to notify svq. 492100890f7SEugenio Pérez * 493100890f7SEugenio Pérez * Note that we are not making any buffers available in the loop, there is no 494100890f7SEugenio Pérez * way that it runs more than virtqueue size times. 495a8ac8858SEugenio Pérez */ 496a8ac8858SEugenio Pérez static void vhost_svq_handle_call(EventNotifier *n) 497a8ac8858SEugenio Pérez { 498a8ac8858SEugenio Pérez VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, 499a8ac8858SEugenio Pérez hdev_call); 500a8ac8858SEugenio Pérez event_notifier_test_and_clear(n); 501100890f7SEugenio Pérez vhost_svq_flush(svq, true); 502a8ac8858SEugenio Pérez } 503a8ac8858SEugenio Pérez 504a8ac8858SEugenio Pérez /** 505a8ac8858SEugenio Pérez * Set the call notifier for the SVQ to call the guest 506a8ac8858SEugenio Pérez * 507a8ac8858SEugenio Pérez * @svq: Shadow virtqueue 508a8ac8858SEugenio Pérez * @call_fd: call notifier 509a8ac8858SEugenio Pérez * 510a8ac8858SEugenio Pérez * Called on BQL context. 511a8ac8858SEugenio Pérez */ 512a8ac8858SEugenio Pérez void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) 513a8ac8858SEugenio Pérez { 514a8ac8858SEugenio Pérez if (call_fd == VHOST_FILE_UNBIND) { 515a8ac8858SEugenio Pérez /* 516a8ac8858SEugenio Pérez * Fail event_notifier_set if called handling device call. 517a8ac8858SEugenio Pérez * 518a8ac8858SEugenio Pérez * SVQ still needs device notifications, since it needs to keep 519a8ac8858SEugenio Pérez * forwarding used buffers even with the unbind. 520a8ac8858SEugenio Pérez */ 521a8ac8858SEugenio Pérez memset(&svq->svq_call, 0, sizeof(svq->svq_call)); 522a8ac8858SEugenio Pérez } else { 523a8ac8858SEugenio Pérez event_notifier_init_fd(&svq->svq_call, call_fd); 524a8ac8858SEugenio Pérez } 525a8ac8858SEugenio Pérez } 526a8ac8858SEugenio Pérez 527a8ac8858SEugenio Pérez /** 528dafb34c9SEugenio Pérez * Get the shadow vq vring address. 529dafb34c9SEugenio Pérez * @svq: Shadow virtqueue 530dafb34c9SEugenio Pérez * @addr: Destination to store address 531dafb34c9SEugenio Pérez */ 532dafb34c9SEugenio Pérez void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, 533dafb34c9SEugenio Pérez struct vhost_vring_addr *addr) 534dafb34c9SEugenio Pérez { 53534e3c94eSEugenio Pérez addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; 53634e3c94eSEugenio Pérez addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; 53734e3c94eSEugenio Pérez addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; 538dafb34c9SEugenio Pérez } 539dafb34c9SEugenio Pérez 540dafb34c9SEugenio Pérez size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) 541dafb34c9SEugenio Pérez { 542dafb34c9SEugenio Pérez size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; 543dafb34c9SEugenio Pérez size_t avail_size = offsetof(vring_avail_t, ring) + 544dafb34c9SEugenio Pérez sizeof(uint16_t) * svq->vring.num; 545dafb34c9SEugenio Pérez 5468e3b0cbbSMarc-André Lureau return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); 547dafb34c9SEugenio Pérez } 548dafb34c9SEugenio Pérez 549dafb34c9SEugenio Pérez size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) 550dafb34c9SEugenio Pérez { 551dafb34c9SEugenio Pérez size_t used_size = offsetof(vring_used_t, ring) + 552dafb34c9SEugenio Pérez sizeof(vring_used_elem_t) * svq->vring.num; 5538e3b0cbbSMarc-André Lureau return ROUND_UP(used_size, qemu_real_host_page_size()); 554dafb34c9SEugenio Pérez } 555dafb34c9SEugenio Pérez 556dafb34c9SEugenio Pérez /** 557dff4426fSEugenio Pérez * Set a new file descriptor for the guest to kick the SVQ and notify for avail 558dff4426fSEugenio Pérez * 559dff4426fSEugenio Pérez * @svq: The svq 560dff4426fSEugenio Pérez * @svq_kick_fd: The svq kick fd 561dff4426fSEugenio Pérez * 562dff4426fSEugenio Pérez * Note that the SVQ will never close the old file descriptor. 563dff4426fSEugenio Pérez */ 564dff4426fSEugenio Pérez void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) 565dff4426fSEugenio Pérez { 566dff4426fSEugenio Pérez EventNotifier *svq_kick = &svq->svq_kick; 567dff4426fSEugenio Pérez bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); 568dff4426fSEugenio Pérez bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; 569dff4426fSEugenio Pérez 570dff4426fSEugenio Pérez if (poll_stop) { 571dff4426fSEugenio Pérez event_notifier_set_handler(svq_kick, NULL); 572dff4426fSEugenio Pérez } 573dff4426fSEugenio Pérez 574dff4426fSEugenio Pérez /* 575dff4426fSEugenio Pérez * event_notifier_set_handler already checks for guest's notifications if 576dff4426fSEugenio Pérez * they arrive at the new file descriptor in the switch, so there is no 577dff4426fSEugenio Pérez * need to explicitly check for them. 578dff4426fSEugenio Pérez */ 579dff4426fSEugenio Pérez if (poll_start) { 580dff4426fSEugenio Pérez event_notifier_init_fd(svq_kick, svq_kick_fd); 581dff4426fSEugenio Pérez event_notifier_set(svq_kick); 582100890f7SEugenio Pérez event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); 583100890f7SEugenio Pérez } 584100890f7SEugenio Pérez } 585100890f7SEugenio Pérez 586100890f7SEugenio Pérez /** 587100890f7SEugenio Pérez * Start the shadow virtqueue operation. 588100890f7SEugenio Pérez * 589100890f7SEugenio Pérez * @svq: Shadow Virtqueue 590100890f7SEugenio Pérez * @vdev: VirtIO device 591100890f7SEugenio Pérez * @vq: Virtqueue to shadow 592100890f7SEugenio Pérez */ 593100890f7SEugenio Pérez void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, 594100890f7SEugenio Pérez VirtQueue *vq) 595100890f7SEugenio Pérez { 596100890f7SEugenio Pérez size_t desc_size, driver_size, device_size; 597100890f7SEugenio Pérez 598100890f7SEugenio Pérez svq->next_guest_avail_elem = NULL; 599100890f7SEugenio Pérez svq->shadow_avail_idx = 0; 600100890f7SEugenio Pérez svq->shadow_used_idx = 0; 601100890f7SEugenio Pérez svq->last_used_idx = 0; 602100890f7SEugenio Pérez svq->vdev = vdev; 603100890f7SEugenio Pérez svq->vq = vq; 604100890f7SEugenio Pérez 605100890f7SEugenio Pérez svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); 606100890f7SEugenio Pérez driver_size = vhost_svq_driver_area_size(svq); 607100890f7SEugenio Pérez device_size = vhost_svq_device_area_size(svq); 6088e3b0cbbSMarc-André Lureau svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); 609100890f7SEugenio Pérez desc_size = sizeof(vring_desc_t) * svq->vring.num; 610100890f7SEugenio Pérez svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); 611100890f7SEugenio Pérez memset(svq->vring.desc, 0, driver_size); 6128e3b0cbbSMarc-André Lureau svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); 613100890f7SEugenio Pérez memset(svq->vring.used, 0, device_size); 6149e87868fSEugenio Pérez svq->desc_state = g_new0(SVQDescState, svq->vring.num); 615495fe3a7SEugenio Pérez svq->desc_next = g_new0(uint16_t, svq->vring.num); 616100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num - 1; i++) { 617495fe3a7SEugenio Pérez svq->desc_next[i] = cpu_to_le16(i + 1); 618dff4426fSEugenio Pérez } 619dff4426fSEugenio Pérez } 620dff4426fSEugenio Pérez 621dff4426fSEugenio Pérez /** 622dff4426fSEugenio Pérez * Stop the shadow virtqueue operation. 623dff4426fSEugenio Pérez * @svq: Shadow Virtqueue 624dff4426fSEugenio Pérez */ 625dff4426fSEugenio Pérez void vhost_svq_stop(VhostShadowVirtqueue *svq) 626dff4426fSEugenio Pérez { 627dff4426fSEugenio Pérez event_notifier_set_handler(&svq->svq_kick, NULL); 628100890f7SEugenio Pérez g_autofree VirtQueueElement *next_avail_elem = NULL; 629100890f7SEugenio Pérez 630100890f7SEugenio Pérez if (!svq->vq) { 631100890f7SEugenio Pérez return; 632100890f7SEugenio Pérez } 633100890f7SEugenio Pérez 634100890f7SEugenio Pérez /* Send all pending used descriptors to guest */ 635100890f7SEugenio Pérez vhost_svq_flush(svq, false); 636100890f7SEugenio Pérez 637100890f7SEugenio Pérez for (unsigned i = 0; i < svq->vring.num; ++i) { 638100890f7SEugenio Pérez g_autofree VirtQueueElement *elem = NULL; 6399e87868fSEugenio Pérez elem = g_steal_pointer(&svq->desc_state[i].elem); 640100890f7SEugenio Pérez if (elem) { 641100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, elem, 0); 642100890f7SEugenio Pérez } 643100890f7SEugenio Pérez } 644100890f7SEugenio Pérez 645100890f7SEugenio Pérez next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); 646100890f7SEugenio Pérez if (next_avail_elem) { 647100890f7SEugenio Pérez virtqueue_detach_element(svq->vq, next_avail_elem, 0); 648100890f7SEugenio Pérez } 649100890f7SEugenio Pérez svq->vq = NULL; 650495fe3a7SEugenio Pérez g_free(svq->desc_next); 6519e87868fSEugenio Pérez g_free(svq->desc_state); 652100890f7SEugenio Pérez qemu_vfree(svq->vring.desc); 653100890f7SEugenio Pérez qemu_vfree(svq->vring.used); 654dff4426fSEugenio Pérez } 65510857ec0SEugenio Pérez 65610857ec0SEugenio Pérez /** 65710857ec0SEugenio Pérez * Creates vhost shadow virtqueue, and instructs the vhost device to use the 65810857ec0SEugenio Pérez * shadow methods and file descriptors. 65910857ec0SEugenio Pérez * 66034e3c94eSEugenio Pérez * @iova_tree: Tree to perform descriptors translations 66134e3c94eSEugenio Pérez * 66210857ec0SEugenio Pérez * Returns the new virtqueue or NULL. 66310857ec0SEugenio Pérez * 66410857ec0SEugenio Pérez * In case of error, reason is reported through error_report. 66510857ec0SEugenio Pérez */ 66634e3c94eSEugenio Pérez VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) 66710857ec0SEugenio Pérez { 66810857ec0SEugenio Pérez g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); 66910857ec0SEugenio Pérez int r; 67010857ec0SEugenio Pérez 67110857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_kick, 0); 67210857ec0SEugenio Pérez if (r != 0) { 67310857ec0SEugenio Pérez error_report("Couldn't create kick event notifier: %s (%d)", 67410857ec0SEugenio Pérez g_strerror(errno), errno); 67510857ec0SEugenio Pérez goto err_init_hdev_kick; 67610857ec0SEugenio Pérez } 67710857ec0SEugenio Pérez 67810857ec0SEugenio Pérez r = event_notifier_init(&svq->hdev_call, 0); 67910857ec0SEugenio Pérez if (r != 0) { 68010857ec0SEugenio Pérez error_report("Couldn't create call event notifier: %s (%d)", 68110857ec0SEugenio Pérez g_strerror(errno), errno); 68210857ec0SEugenio Pérez goto err_init_hdev_call; 68310857ec0SEugenio Pérez } 68410857ec0SEugenio Pérez 685dff4426fSEugenio Pérez event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); 686a8ac8858SEugenio Pérez event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); 68734e3c94eSEugenio Pérez svq->iova_tree = iova_tree; 68810857ec0SEugenio Pérez return g_steal_pointer(&svq); 68910857ec0SEugenio Pérez 69010857ec0SEugenio Pérez err_init_hdev_call: 69110857ec0SEugenio Pérez event_notifier_cleanup(&svq->hdev_kick); 69210857ec0SEugenio Pérez 69310857ec0SEugenio Pérez err_init_hdev_kick: 69410857ec0SEugenio Pérez return NULL; 69510857ec0SEugenio Pérez } 69610857ec0SEugenio Pérez 69710857ec0SEugenio Pérez /** 69810857ec0SEugenio Pérez * Free the resources of the shadow virtqueue. 69910857ec0SEugenio Pérez * 70010857ec0SEugenio Pérez * @pvq: gpointer to SVQ so it can be used by autofree functions. 70110857ec0SEugenio Pérez */ 70210857ec0SEugenio Pérez void vhost_svq_free(gpointer pvq) 70310857ec0SEugenio Pérez { 70410857ec0SEugenio Pérez VhostShadowVirtqueue *vq = pvq; 705dff4426fSEugenio Pérez vhost_svq_stop(vq); 70610857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_kick); 707a8ac8858SEugenio Pérez event_notifier_set_handler(&vq->hdev_call, NULL); 70810857ec0SEugenio Pérez event_notifier_cleanup(&vq->hdev_call); 70910857ec0SEugenio Pérez g_free(vq); 71010857ec0SEugenio Pérez } 711