xref: /qemu/subprojects/libvduse/libvduse.c (revision 3cc72cdb)
1a6caeee8SXie Yongji /*
2a6caeee8SXie Yongji  * VDUSE (vDPA Device in Userspace) library
3a6caeee8SXie Yongji  *
4a6caeee8SXie Yongji  * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5a6caeee8SXie Yongji  *   Portions of codes and concepts borrowed from libvhost-user.c, so:
6a6caeee8SXie Yongji  *     Copyright IBM, Corp. 2007
7a6caeee8SXie Yongji  *     Copyright (c) 2016 Red Hat, Inc.
8a6caeee8SXie Yongji  *
9a6caeee8SXie Yongji  * Author:
10a6caeee8SXie Yongji  *   Xie Yongji <xieyongji@bytedance.com>
11a6caeee8SXie Yongji  *   Anthony Liguori <aliguori@us.ibm.com>
12a6caeee8SXie Yongji  *   Marc-André Lureau <mlureau@redhat.com>
13a6caeee8SXie Yongji  *   Victor Kaplansky <victork@redhat.com>
14a6caeee8SXie Yongji  *
15a6caeee8SXie Yongji  * This work is licensed under the terms of the GNU GPL, version 2 or
16a6caeee8SXie Yongji  * later.  See the COPYING file in the top-level directory.
17a6caeee8SXie Yongji  */
18a6caeee8SXie Yongji 
19518ac428SMarcel Holtmann #ifndef _GNU_SOURCE
20518ac428SMarcel Holtmann #define _GNU_SOURCE
21518ac428SMarcel Holtmann #endif
22518ac428SMarcel Holtmann 
23a6caeee8SXie Yongji #include <stdlib.h>
24a6caeee8SXie Yongji #include <stdio.h>
25a6caeee8SXie Yongji #include <stdbool.h>
26a6caeee8SXie Yongji #include <stddef.h>
27a6caeee8SXie Yongji #include <errno.h>
28a6caeee8SXie Yongji #include <string.h>
29a6caeee8SXie Yongji #include <assert.h>
30a6caeee8SXie Yongji #include <endian.h>
31a6caeee8SXie Yongji #include <unistd.h>
32a6caeee8SXie Yongji #include <limits.h>
33a6caeee8SXie Yongji #include <fcntl.h>
34a6caeee8SXie Yongji #include <inttypes.h>
35a6caeee8SXie Yongji 
36a6caeee8SXie Yongji #include <sys/ioctl.h>
37a6caeee8SXie Yongji #include <sys/eventfd.h>
38a6caeee8SXie Yongji #include <sys/mman.h>
39a6caeee8SXie Yongji 
40a6caeee8SXie Yongji #include "include/atomic.h"
41a6caeee8SXie Yongji #include "linux-headers/linux/virtio_ring.h"
42a6caeee8SXie Yongji #include "linux-headers/linux/virtio_config.h"
43a6caeee8SXie Yongji #include "linux-headers/linux/vduse.h"
44a6caeee8SXie Yongji #include "libvduse.h"
45a6caeee8SXie Yongji 
46a6caeee8SXie Yongji #define VDUSE_VQ_ALIGN 4096
47a6caeee8SXie Yongji #define MAX_IOVA_REGIONS 256
48a6caeee8SXie Yongji 
49d043e2dbSXie Yongji #define LOG_ALIGNMENT 64
50d043e2dbSXie Yongji 
51a6caeee8SXie Yongji /* Round number down to multiple */
52a6caeee8SXie Yongji #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
53a6caeee8SXie Yongji 
54a6caeee8SXie Yongji /* Round number up to multiple */
55a6caeee8SXie Yongji #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
56a6caeee8SXie Yongji 
57a6caeee8SXie Yongji #ifndef unlikely
58a6caeee8SXie Yongji #define unlikely(x)   __builtin_expect(!!(x), 0)
59a6caeee8SXie Yongji #endif
60a6caeee8SXie Yongji 
61d043e2dbSXie Yongji typedef struct VduseDescStateSplit {
62d043e2dbSXie Yongji     uint8_t inflight;
63d043e2dbSXie Yongji     uint8_t padding[5];
64d043e2dbSXie Yongji     uint16_t next;
65d043e2dbSXie Yongji     uint64_t counter;
66d043e2dbSXie Yongji } VduseDescStateSplit;
67d043e2dbSXie Yongji 
68d043e2dbSXie Yongji typedef struct VduseVirtqLogInflight {
69d043e2dbSXie Yongji     uint64_t features;
70d043e2dbSXie Yongji     uint16_t version;
71d043e2dbSXie Yongji     uint16_t desc_num;
72d043e2dbSXie Yongji     uint16_t last_batch_head;
73d043e2dbSXie Yongji     uint16_t used_idx;
74d043e2dbSXie Yongji     VduseDescStateSplit desc[];
75d043e2dbSXie Yongji } VduseVirtqLogInflight;
76d043e2dbSXie Yongji 
77d043e2dbSXie Yongji typedef struct VduseVirtqLog {
78d043e2dbSXie Yongji     VduseVirtqLogInflight inflight;
79d043e2dbSXie Yongji } VduseVirtqLog;
80d043e2dbSXie Yongji 
81d043e2dbSXie Yongji typedef struct VduseVirtqInflightDesc {
82d043e2dbSXie Yongji     uint16_t index;
83d043e2dbSXie Yongji     uint64_t counter;
84d043e2dbSXie Yongji } VduseVirtqInflightDesc;
85d043e2dbSXie Yongji 
86a6caeee8SXie Yongji typedef struct VduseRing {
87a6caeee8SXie Yongji     unsigned int num;
88a6caeee8SXie Yongji     uint64_t desc_addr;
89a6caeee8SXie Yongji     uint64_t avail_addr;
90a6caeee8SXie Yongji     uint64_t used_addr;
91a6caeee8SXie Yongji     struct vring_desc *desc;
92a6caeee8SXie Yongji     struct vring_avail *avail;
93a6caeee8SXie Yongji     struct vring_used *used;
94a6caeee8SXie Yongji } VduseRing;
95a6caeee8SXie Yongji 
96a6caeee8SXie Yongji struct VduseVirtq {
97a6caeee8SXie Yongji     VduseRing vring;
98a6caeee8SXie Yongji     uint16_t last_avail_idx;
99a6caeee8SXie Yongji     uint16_t shadow_avail_idx;
100a6caeee8SXie Yongji     uint16_t used_idx;
101a6caeee8SXie Yongji     uint16_t signalled_used;
102a6caeee8SXie Yongji     bool signalled_used_valid;
103a6caeee8SXie Yongji     int index;
10485899f8eSMarcel Holtmann     unsigned int inuse;
105a6caeee8SXie Yongji     bool ready;
106a6caeee8SXie Yongji     int fd;
107a6caeee8SXie Yongji     VduseDev *dev;
108d043e2dbSXie Yongji     VduseVirtqInflightDesc *resubmit_list;
109d043e2dbSXie Yongji     uint16_t resubmit_num;
110d043e2dbSXie Yongji     uint64_t counter;
111d043e2dbSXie Yongji     VduseVirtqLog *log;
112a6caeee8SXie Yongji };
113a6caeee8SXie Yongji 
114a6caeee8SXie Yongji typedef struct VduseIovaRegion {
115a6caeee8SXie Yongji     uint64_t iova;
116a6caeee8SXie Yongji     uint64_t size;
117a6caeee8SXie Yongji     uint64_t mmap_offset;
118a6caeee8SXie Yongji     uint64_t mmap_addr;
119a6caeee8SXie Yongji } VduseIovaRegion;
120a6caeee8SXie Yongji 
121a6caeee8SXie Yongji struct VduseDev {
122a6caeee8SXie Yongji     VduseVirtq *vqs;
123a6caeee8SXie Yongji     VduseIovaRegion regions[MAX_IOVA_REGIONS];
124a6caeee8SXie Yongji     int num_regions;
125a6caeee8SXie Yongji     char *name;
126a6caeee8SXie Yongji     uint32_t device_id;
127a6caeee8SXie Yongji     uint32_t vendor_id;
128a6caeee8SXie Yongji     uint16_t num_queues;
129a6caeee8SXie Yongji     uint16_t queue_size;
130a6caeee8SXie Yongji     uint64_t features;
131a6caeee8SXie Yongji     const VduseOps *ops;
132a6caeee8SXie Yongji     int fd;
133a6caeee8SXie Yongji     int ctrl_fd;
134a6caeee8SXie Yongji     void *priv;
135d043e2dbSXie Yongji     void *log;
136a6caeee8SXie Yongji };
137a6caeee8SXie Yongji 
vduse_vq_log_size(uint16_t queue_size)138d043e2dbSXie Yongji static inline size_t vduse_vq_log_size(uint16_t queue_size)
139d043e2dbSXie Yongji {
140d043e2dbSXie Yongji     return ALIGN_UP(sizeof(VduseDescStateSplit) * queue_size +
141d043e2dbSXie Yongji                     sizeof(VduseVirtqLogInflight), LOG_ALIGNMENT);
142d043e2dbSXie Yongji }
143d043e2dbSXie Yongji 
vduse_log_get(const char * filename,size_t size)144d043e2dbSXie Yongji static void *vduse_log_get(const char *filename, size_t size)
145d043e2dbSXie Yongji {
146d043e2dbSXie Yongji     void *ptr = MAP_FAILED;
147d043e2dbSXie Yongji     int fd;
148d043e2dbSXie Yongji 
149d043e2dbSXie Yongji     fd = open(filename, O_RDWR | O_CREAT, 0600);
150d043e2dbSXie Yongji     if (fd == -1) {
151d043e2dbSXie Yongji         return MAP_FAILED;
152d043e2dbSXie Yongji     }
153d043e2dbSXie Yongji 
154d043e2dbSXie Yongji     if (ftruncate(fd, size) == -1) {
155d043e2dbSXie Yongji         goto out;
156d043e2dbSXie Yongji     }
157d043e2dbSXie Yongji 
158d043e2dbSXie Yongji     ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
159d043e2dbSXie Yongji 
160d043e2dbSXie Yongji out:
161d043e2dbSXie Yongji     close(fd);
162d043e2dbSXie Yongji     return ptr;
163d043e2dbSXie Yongji }
164d043e2dbSXie Yongji 
has_feature(uint64_t features,unsigned int fbit)165a6caeee8SXie Yongji static inline bool has_feature(uint64_t features, unsigned int fbit)
166a6caeee8SXie Yongji {
167a6caeee8SXie Yongji     assert(fbit < 64);
168a6caeee8SXie Yongji     return !!(features & (1ULL << fbit));
169a6caeee8SXie Yongji }
170a6caeee8SXie Yongji 
vduse_dev_has_feature(VduseDev * dev,unsigned int fbit)171a6caeee8SXie Yongji static inline bool vduse_dev_has_feature(VduseDev *dev, unsigned int fbit)
172a6caeee8SXie Yongji {
173a6caeee8SXie Yongji     return has_feature(dev->features, fbit);
174a6caeee8SXie Yongji }
175a6caeee8SXie Yongji 
vduse_get_virtio_features(void)176a6caeee8SXie Yongji uint64_t vduse_get_virtio_features(void)
177a6caeee8SXie Yongji {
178a6caeee8SXie Yongji     return (1ULL << VIRTIO_F_IOMMU_PLATFORM) |
179a6caeee8SXie Yongji            (1ULL << VIRTIO_F_VERSION_1) |
180a6caeee8SXie Yongji            (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
181a6caeee8SXie Yongji            (1ULL << VIRTIO_RING_F_EVENT_IDX) |
182a6caeee8SXie Yongji            (1ULL << VIRTIO_RING_F_INDIRECT_DESC);
183a6caeee8SXie Yongji }
184a6caeee8SXie Yongji 
vduse_queue_get_dev(VduseVirtq * vq)185a6caeee8SXie Yongji VduseDev *vduse_queue_get_dev(VduseVirtq *vq)
186a6caeee8SXie Yongji {
187a6caeee8SXie Yongji     return vq->dev;
188a6caeee8SXie Yongji }
189a6caeee8SXie Yongji 
vduse_queue_get_fd(VduseVirtq * vq)190a6caeee8SXie Yongji int vduse_queue_get_fd(VduseVirtq *vq)
191a6caeee8SXie Yongji {
192a6caeee8SXie Yongji     return vq->fd;
193a6caeee8SXie Yongji }
194a6caeee8SXie Yongji 
vduse_dev_get_priv(VduseDev * dev)195a6caeee8SXie Yongji void *vduse_dev_get_priv(VduseDev *dev)
196a6caeee8SXie Yongji {
197a6caeee8SXie Yongji     return dev->priv;
198a6caeee8SXie Yongji }
199a6caeee8SXie Yongji 
vduse_dev_get_queue(VduseDev * dev,int index)200a6caeee8SXie Yongji VduseVirtq *vduse_dev_get_queue(VduseDev *dev, int index)
201a6caeee8SXie Yongji {
202a6caeee8SXie Yongji     return &dev->vqs[index];
203a6caeee8SXie Yongji }
204a6caeee8SXie Yongji 
vduse_dev_get_fd(VduseDev * dev)205a6caeee8SXie Yongji int vduse_dev_get_fd(VduseDev *dev)
206a6caeee8SXie Yongji {
207a6caeee8SXie Yongji     return dev->fd;
208a6caeee8SXie Yongji }
209a6caeee8SXie Yongji 
vduse_inject_irq(VduseDev * dev,int index)210a6caeee8SXie Yongji static int vduse_inject_irq(VduseDev *dev, int index)
211a6caeee8SXie Yongji {
212a6caeee8SXie Yongji     return ioctl(dev->fd, VDUSE_VQ_INJECT_IRQ, &index);
213a6caeee8SXie Yongji }
214a6caeee8SXie Yongji 
inflight_desc_compare(const void * a,const void * b)215d043e2dbSXie Yongji static int inflight_desc_compare(const void *a, const void *b)
216d043e2dbSXie Yongji {
217d043e2dbSXie Yongji     VduseVirtqInflightDesc *desc0 = (VduseVirtqInflightDesc *)a,
218d043e2dbSXie Yongji                            *desc1 = (VduseVirtqInflightDesc *)b;
219d043e2dbSXie Yongji 
220d043e2dbSXie Yongji     if (desc1->counter > desc0->counter &&
221d043e2dbSXie Yongji         (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
222d043e2dbSXie Yongji         return 1;
223d043e2dbSXie Yongji     }
224d043e2dbSXie Yongji 
225d043e2dbSXie Yongji     return -1;
226d043e2dbSXie Yongji }
227d043e2dbSXie Yongji 
vduse_queue_check_inflights(VduseVirtq * vq)228d043e2dbSXie Yongji static int vduse_queue_check_inflights(VduseVirtq *vq)
229d043e2dbSXie Yongji {
230d043e2dbSXie Yongji     int i = 0;
231d043e2dbSXie Yongji     VduseDev *dev = vq->dev;
232d043e2dbSXie Yongji 
233d043e2dbSXie Yongji     vq->used_idx = le16toh(vq->vring.used->idx);
234d043e2dbSXie Yongji     vq->resubmit_num = 0;
235d043e2dbSXie Yongji     vq->resubmit_list = NULL;
236d043e2dbSXie Yongji     vq->counter = 0;
237d043e2dbSXie Yongji 
238d043e2dbSXie Yongji     if (unlikely(vq->log->inflight.used_idx != vq->used_idx)) {
239d043e2dbSXie Yongji         if (vq->log->inflight.last_batch_head > VIRTQUEUE_MAX_SIZE) {
240d043e2dbSXie Yongji             return -1;
241d043e2dbSXie Yongji         }
242d043e2dbSXie Yongji 
243d043e2dbSXie Yongji         vq->log->inflight.desc[vq->log->inflight.last_batch_head].inflight = 0;
244d043e2dbSXie Yongji 
245d043e2dbSXie Yongji         barrier();
246d043e2dbSXie Yongji 
247d043e2dbSXie Yongji         vq->log->inflight.used_idx = vq->used_idx;
248d043e2dbSXie Yongji     }
249d043e2dbSXie Yongji 
250d043e2dbSXie Yongji     for (i = 0; i < vq->log->inflight.desc_num; i++) {
251d043e2dbSXie Yongji         if (vq->log->inflight.desc[i].inflight == 1) {
252d043e2dbSXie Yongji             vq->inuse++;
253d043e2dbSXie Yongji         }
254d043e2dbSXie Yongji     }
255d043e2dbSXie Yongji 
256d043e2dbSXie Yongji     vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
257d043e2dbSXie Yongji 
258d043e2dbSXie Yongji     if (vq->inuse) {
259d043e2dbSXie Yongji         vq->resubmit_list = calloc(vq->inuse, sizeof(VduseVirtqInflightDesc));
260d043e2dbSXie Yongji         if (!vq->resubmit_list) {
261d043e2dbSXie Yongji             return -1;
262d043e2dbSXie Yongji         }
263d043e2dbSXie Yongji 
264d043e2dbSXie Yongji         for (i = 0; i < vq->log->inflight.desc_num; i++) {
265d043e2dbSXie Yongji             if (vq->log->inflight.desc[i].inflight) {
266d043e2dbSXie Yongji                 vq->resubmit_list[vq->resubmit_num].index = i;
267d043e2dbSXie Yongji                 vq->resubmit_list[vq->resubmit_num].counter =
268d043e2dbSXie Yongji                                         vq->log->inflight.desc[i].counter;
269d043e2dbSXie Yongji                 vq->resubmit_num++;
270d043e2dbSXie Yongji             }
271d043e2dbSXie Yongji         }
272d043e2dbSXie Yongji 
273d043e2dbSXie Yongji         if (vq->resubmit_num > 1) {
274d043e2dbSXie Yongji             qsort(vq->resubmit_list, vq->resubmit_num,
275d043e2dbSXie Yongji                   sizeof(VduseVirtqInflightDesc), inflight_desc_compare);
276d043e2dbSXie Yongji         }
277d043e2dbSXie Yongji         vq->counter = vq->resubmit_list[0].counter + 1;
278d043e2dbSXie Yongji     }
279d043e2dbSXie Yongji 
280d043e2dbSXie Yongji     vduse_inject_irq(dev, vq->index);
281d043e2dbSXie Yongji 
282d043e2dbSXie Yongji     return 0;
283d043e2dbSXie Yongji }
284d043e2dbSXie Yongji 
vduse_queue_inflight_get(VduseVirtq * vq,int desc_idx)285d043e2dbSXie Yongji static int vduse_queue_inflight_get(VduseVirtq *vq, int desc_idx)
286d043e2dbSXie Yongji {
287d043e2dbSXie Yongji     vq->log->inflight.desc[desc_idx].counter = vq->counter++;
288d043e2dbSXie Yongji 
289d043e2dbSXie Yongji     barrier();
290d043e2dbSXie Yongji 
291d043e2dbSXie Yongji     vq->log->inflight.desc[desc_idx].inflight = 1;
292d043e2dbSXie Yongji 
293d043e2dbSXie Yongji     return 0;
294d043e2dbSXie Yongji }
295d043e2dbSXie Yongji 
vduse_queue_inflight_pre_put(VduseVirtq * vq,int desc_idx)296d043e2dbSXie Yongji static int vduse_queue_inflight_pre_put(VduseVirtq *vq, int desc_idx)
297d043e2dbSXie Yongji {
298d043e2dbSXie Yongji     vq->log->inflight.last_batch_head = desc_idx;
299d043e2dbSXie Yongji 
300d043e2dbSXie Yongji     return 0;
301d043e2dbSXie Yongji }
302d043e2dbSXie Yongji 
vduse_queue_inflight_post_put(VduseVirtq * vq,int desc_idx)303d043e2dbSXie Yongji static int vduse_queue_inflight_post_put(VduseVirtq *vq, int desc_idx)
304d043e2dbSXie Yongji {
305d043e2dbSXie Yongji     vq->log->inflight.desc[desc_idx].inflight = 0;
306d043e2dbSXie Yongji 
307d043e2dbSXie Yongji     barrier();
308d043e2dbSXie Yongji 
309d043e2dbSXie Yongji     vq->log->inflight.used_idx = vq->used_idx;
310d043e2dbSXie Yongji 
311d043e2dbSXie Yongji     return 0;
312d043e2dbSXie Yongji }
313d043e2dbSXie Yongji 
vduse_iova_remove_region(VduseDev * dev,uint64_t start,uint64_t last)314a6caeee8SXie Yongji static void vduse_iova_remove_region(VduseDev *dev, uint64_t start,
315a6caeee8SXie Yongji                                      uint64_t last)
316a6caeee8SXie Yongji {
317a6caeee8SXie Yongji     int i;
318a6caeee8SXie Yongji 
319a6caeee8SXie Yongji     if (last == start) {
320a6caeee8SXie Yongji         return;
321a6caeee8SXie Yongji     }
322a6caeee8SXie Yongji 
323a6caeee8SXie Yongji     for (i = 0; i < MAX_IOVA_REGIONS; i++) {
324a6caeee8SXie Yongji         if (!dev->regions[i].mmap_addr) {
325a6caeee8SXie Yongji             continue;
326a6caeee8SXie Yongji         }
327a6caeee8SXie Yongji 
328a6caeee8SXie Yongji         if (start <= dev->regions[i].iova &&
329a6caeee8SXie Yongji             last >= (dev->regions[i].iova + dev->regions[i].size - 1)) {
330a6caeee8SXie Yongji             munmap((void *)(uintptr_t)dev->regions[i].mmap_addr,
331a6caeee8SXie Yongji                    dev->regions[i].mmap_offset + dev->regions[i].size);
332a6caeee8SXie Yongji             dev->regions[i].mmap_addr = 0;
333a6caeee8SXie Yongji             dev->num_regions--;
334a6caeee8SXie Yongji         }
335a6caeee8SXie Yongji     }
336a6caeee8SXie Yongji }
337a6caeee8SXie Yongji 
vduse_iova_add_region(VduseDev * dev,int fd,uint64_t offset,uint64_t start,uint64_t last,int prot)338a6caeee8SXie Yongji static int vduse_iova_add_region(VduseDev *dev, int fd,
339a6caeee8SXie Yongji                                  uint64_t offset, uint64_t start,
340a6caeee8SXie Yongji                                  uint64_t last, int prot)
341a6caeee8SXie Yongji {
342a6caeee8SXie Yongji     int i;
343a6caeee8SXie Yongji     uint64_t size = last - start + 1;
344a6caeee8SXie Yongji     void *mmap_addr = mmap(0, size + offset, prot, MAP_SHARED, fd, 0);
345a6caeee8SXie Yongji 
346a6caeee8SXie Yongji     if (mmap_addr == MAP_FAILED) {
347a6caeee8SXie Yongji         close(fd);
348a6caeee8SXie Yongji         return -EINVAL;
349a6caeee8SXie Yongji     }
350a6caeee8SXie Yongji 
351a6caeee8SXie Yongji     for (i = 0; i < MAX_IOVA_REGIONS; i++) {
352a6caeee8SXie Yongji         if (!dev->regions[i].mmap_addr) {
353a6caeee8SXie Yongji             dev->regions[i].mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
354a6caeee8SXie Yongji             dev->regions[i].mmap_offset = offset;
355a6caeee8SXie Yongji             dev->regions[i].iova = start;
356a6caeee8SXie Yongji             dev->regions[i].size = size;
357a6caeee8SXie Yongji             dev->num_regions++;
358a6caeee8SXie Yongji             break;
359a6caeee8SXie Yongji         }
360a6caeee8SXie Yongji     }
361a6caeee8SXie Yongji     assert(i < MAX_IOVA_REGIONS);
362a6caeee8SXie Yongji     close(fd);
363a6caeee8SXie Yongji 
364a6caeee8SXie Yongji     return 0;
365a6caeee8SXie Yongji }
366a6caeee8SXie Yongji 
perm_to_prot(uint8_t perm)367a6caeee8SXie Yongji static int perm_to_prot(uint8_t perm)
368a6caeee8SXie Yongji {
369a6caeee8SXie Yongji     int prot = 0;
370a6caeee8SXie Yongji 
371a6caeee8SXie Yongji     switch (perm) {
372a6caeee8SXie Yongji     case VDUSE_ACCESS_WO:
373a6caeee8SXie Yongji         prot |= PROT_WRITE;
374a6caeee8SXie Yongji         break;
375a6caeee8SXie Yongji     case VDUSE_ACCESS_RO:
376a6caeee8SXie Yongji         prot |= PROT_READ;
377a6caeee8SXie Yongji         break;
378a6caeee8SXie Yongji     case VDUSE_ACCESS_RW:
379a6caeee8SXie Yongji         prot |= PROT_READ | PROT_WRITE;
380a6caeee8SXie Yongji         break;
381a6caeee8SXie Yongji     default:
382a6caeee8SXie Yongji         break;
383a6caeee8SXie Yongji     }
384a6caeee8SXie Yongji 
385a6caeee8SXie Yongji     return prot;
386a6caeee8SXie Yongji }
387a6caeee8SXie Yongji 
iova_to_va(VduseDev * dev,uint64_t * plen,uint64_t iova)388a6caeee8SXie Yongji static inline void *iova_to_va(VduseDev *dev, uint64_t *plen, uint64_t iova)
389a6caeee8SXie Yongji {
390a6caeee8SXie Yongji     int i, ret;
391a6caeee8SXie Yongji     struct vduse_iotlb_entry entry;
392a6caeee8SXie Yongji 
393a6caeee8SXie Yongji     for (i = 0; i < MAX_IOVA_REGIONS; i++) {
394a6caeee8SXie Yongji         VduseIovaRegion *r = &dev->regions[i];
395a6caeee8SXie Yongji 
396a6caeee8SXie Yongji         if (!r->mmap_addr) {
397a6caeee8SXie Yongji             continue;
398a6caeee8SXie Yongji         }
399a6caeee8SXie Yongji 
400a6caeee8SXie Yongji         if ((iova >= r->iova) && (iova < (r->iova + r->size))) {
401a6caeee8SXie Yongji             if ((iova + *plen) > (r->iova + r->size)) {
402a6caeee8SXie Yongji                 *plen = r->iova + r->size - iova;
403a6caeee8SXie Yongji             }
404a6caeee8SXie Yongji             return (void *)(uintptr_t)(iova - r->iova +
405a6caeee8SXie Yongji                    r->mmap_addr + r->mmap_offset);
406a6caeee8SXie Yongji         }
407a6caeee8SXie Yongji     }
408a6caeee8SXie Yongji 
409a6caeee8SXie Yongji     entry.start = iova;
410a6caeee8SXie Yongji     entry.last = iova + 1;
411a6caeee8SXie Yongji     ret = ioctl(dev->fd, VDUSE_IOTLB_GET_FD, &entry);
412a6caeee8SXie Yongji     if (ret < 0) {
413a6caeee8SXie Yongji         return NULL;
414a6caeee8SXie Yongji     }
415a6caeee8SXie Yongji 
416a6caeee8SXie Yongji     if (!vduse_iova_add_region(dev, ret, entry.offset, entry.start,
417a6caeee8SXie Yongji                                entry.last, perm_to_prot(entry.perm))) {
418a6caeee8SXie Yongji         return iova_to_va(dev, plen, iova);
419a6caeee8SXie Yongji     }
420a6caeee8SXie Yongji 
421a6caeee8SXie Yongji     return NULL;
422a6caeee8SXie Yongji }
423a6caeee8SXie Yongji 
vring_avail_flags(VduseVirtq * vq)424a6caeee8SXie Yongji static inline uint16_t vring_avail_flags(VduseVirtq *vq)
425a6caeee8SXie Yongji {
426a6caeee8SXie Yongji     return le16toh(vq->vring.avail->flags);
427a6caeee8SXie Yongji }
428a6caeee8SXie Yongji 
vring_avail_idx(VduseVirtq * vq)429a6caeee8SXie Yongji static inline uint16_t vring_avail_idx(VduseVirtq *vq)
430a6caeee8SXie Yongji {
431a6caeee8SXie Yongji     vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
432a6caeee8SXie Yongji 
433a6caeee8SXie Yongji     return vq->shadow_avail_idx;
434a6caeee8SXie Yongji }
435a6caeee8SXie Yongji 
vring_avail_ring(VduseVirtq * vq,int i)436a6caeee8SXie Yongji static inline uint16_t vring_avail_ring(VduseVirtq *vq, int i)
437a6caeee8SXie Yongji {
438a6caeee8SXie Yongji     return le16toh(vq->vring.avail->ring[i]);
439a6caeee8SXie Yongji }
440a6caeee8SXie Yongji 
vring_get_used_event(VduseVirtq * vq)441a6caeee8SXie Yongji static inline uint16_t vring_get_used_event(VduseVirtq *vq)
442a6caeee8SXie Yongji {
443a6caeee8SXie Yongji     return vring_avail_ring(vq, vq->vring.num);
444a6caeee8SXie Yongji }
445a6caeee8SXie Yongji 
vduse_queue_get_head(VduseVirtq * vq,unsigned int idx,unsigned int * head)446a6caeee8SXie Yongji static bool vduse_queue_get_head(VduseVirtq *vq, unsigned int idx,
447a6caeee8SXie Yongji                                  unsigned int *head)
448a6caeee8SXie Yongji {
449a6caeee8SXie Yongji     /*
450a6caeee8SXie Yongji      * Grab the next descriptor number they're advertising, and increment
451a6caeee8SXie Yongji      * the index we've seen.
452a6caeee8SXie Yongji      */
453a6caeee8SXie Yongji     *head = vring_avail_ring(vq, idx % vq->vring.num);
454a6caeee8SXie Yongji 
455a6caeee8SXie Yongji     /* If their number is silly, that's a fatal mistake. */
456a6caeee8SXie Yongji     if (*head >= vq->vring.num) {
457a6caeee8SXie Yongji         fprintf(stderr, "Guest says index %u is available\n", *head);
458a6caeee8SXie Yongji         return false;
459a6caeee8SXie Yongji     }
460a6caeee8SXie Yongji 
461a6caeee8SXie Yongji     return true;
462a6caeee8SXie Yongji }
463a6caeee8SXie Yongji 
464a6caeee8SXie Yongji static int
vduse_queue_read_indirect_desc(VduseDev * dev,struct vring_desc * desc,uint64_t addr,size_t len)465a6caeee8SXie Yongji vduse_queue_read_indirect_desc(VduseDev *dev, struct vring_desc *desc,
466a6caeee8SXie Yongji                                uint64_t addr, size_t len)
467a6caeee8SXie Yongji {
468a6caeee8SXie Yongji     struct vring_desc *ori_desc;
469a6caeee8SXie Yongji     uint64_t read_len;
470a6caeee8SXie Yongji 
471a6caeee8SXie Yongji     if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
472a6caeee8SXie Yongji         return -1;
473a6caeee8SXie Yongji     }
474a6caeee8SXie Yongji 
475a6caeee8SXie Yongji     if (len == 0) {
476a6caeee8SXie Yongji         return -1;
477a6caeee8SXie Yongji     }
478a6caeee8SXie Yongji 
479a6caeee8SXie Yongji     while (len) {
480a6caeee8SXie Yongji         read_len = len;
481a6caeee8SXie Yongji         ori_desc = iova_to_va(dev, &read_len, addr);
482a6caeee8SXie Yongji         if (!ori_desc) {
483a6caeee8SXie Yongji             return -1;
484a6caeee8SXie Yongji         }
485a6caeee8SXie Yongji 
486a6caeee8SXie Yongji         memcpy(desc, ori_desc, read_len);
487a6caeee8SXie Yongji         len -= read_len;
488a6caeee8SXie Yongji         addr += read_len;
489a6caeee8SXie Yongji         desc += read_len;
490a6caeee8SXie Yongji     }
491a6caeee8SXie Yongji 
492a6caeee8SXie Yongji     return 0;
493a6caeee8SXie Yongji }
494a6caeee8SXie Yongji 
495a6caeee8SXie Yongji enum {
496a6caeee8SXie Yongji     VIRTQUEUE_READ_DESC_ERROR = -1,
497a6caeee8SXie Yongji     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
498a6caeee8SXie Yongji     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
499a6caeee8SXie Yongji };
500a6caeee8SXie Yongji 
vduse_queue_read_next_desc(struct vring_desc * desc,int i,unsigned int max,unsigned int * next)501a6caeee8SXie Yongji static int vduse_queue_read_next_desc(struct vring_desc *desc, int i,
502a6caeee8SXie Yongji                                       unsigned int max, unsigned int *next)
503a6caeee8SXie Yongji {
504a6caeee8SXie Yongji     /* If this descriptor says it doesn't chain, we're done. */
505a6caeee8SXie Yongji     if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
506a6caeee8SXie Yongji         return VIRTQUEUE_READ_DESC_DONE;
507a6caeee8SXie Yongji     }
508a6caeee8SXie Yongji 
509a6caeee8SXie Yongji     /* Check they're not leading us off end of descriptors. */
510a6caeee8SXie Yongji     *next = desc[i].next;
511a6caeee8SXie Yongji     /* Make sure compiler knows to grab that: we don't want it changing! */
512a6caeee8SXie Yongji     smp_wmb();
513a6caeee8SXie Yongji 
514a6caeee8SXie Yongji     if (*next >= max) {
515a6caeee8SXie Yongji         fprintf(stderr, "Desc next is %u\n", *next);
516a6caeee8SXie Yongji         return VIRTQUEUE_READ_DESC_ERROR;
517a6caeee8SXie Yongji     }
518a6caeee8SXie Yongji 
519a6caeee8SXie Yongji     return VIRTQUEUE_READ_DESC_MORE;
520a6caeee8SXie Yongji }
521a6caeee8SXie Yongji 
522a6caeee8SXie Yongji /*
523a6caeee8SXie Yongji  * Fetch avail_idx from VQ memory only when we really need to know if
524a6caeee8SXie Yongji  * guest has added some buffers.
525a6caeee8SXie Yongji  */
vduse_queue_empty(VduseVirtq * vq)526a6caeee8SXie Yongji static bool vduse_queue_empty(VduseVirtq *vq)
527a6caeee8SXie Yongji {
528a6caeee8SXie Yongji     if (unlikely(!vq->vring.avail)) {
529a6caeee8SXie Yongji         return true;
530a6caeee8SXie Yongji     }
531a6caeee8SXie Yongji 
532a6caeee8SXie Yongji     if (vq->shadow_avail_idx != vq->last_avail_idx) {
533a6caeee8SXie Yongji         return false;
534a6caeee8SXie Yongji     }
535a6caeee8SXie Yongji 
536a6caeee8SXie Yongji     return vring_avail_idx(vq) == vq->last_avail_idx;
537a6caeee8SXie Yongji }
538a6caeee8SXie Yongji 
vduse_queue_should_notify(VduseVirtq * vq)539a6caeee8SXie Yongji static bool vduse_queue_should_notify(VduseVirtq *vq)
540a6caeee8SXie Yongji {
541a6caeee8SXie Yongji     VduseDev *dev = vq->dev;
542a6caeee8SXie Yongji     uint16_t old, new;
543a6caeee8SXie Yongji     bool v;
544a6caeee8SXie Yongji 
545a6caeee8SXie Yongji     /* We need to expose used array entries before checking used event. */
546a6caeee8SXie Yongji     smp_mb();
547a6caeee8SXie Yongji 
548a6caeee8SXie Yongji     /* Always notify when queue is empty (when feature acknowledge) */
549a6caeee8SXie Yongji     if (vduse_dev_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
550a6caeee8SXie Yongji         !vq->inuse && vduse_queue_empty(vq)) {
551a6caeee8SXie Yongji         return true;
552a6caeee8SXie Yongji     }
553a6caeee8SXie Yongji 
554a6caeee8SXie Yongji     if (!vduse_dev_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
555a6caeee8SXie Yongji         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
556a6caeee8SXie Yongji     }
557a6caeee8SXie Yongji 
558a6caeee8SXie Yongji     v = vq->signalled_used_valid;
559a6caeee8SXie Yongji     vq->signalled_used_valid = true;
560a6caeee8SXie Yongji     old = vq->signalled_used;
561a6caeee8SXie Yongji     new = vq->signalled_used = vq->used_idx;
562a6caeee8SXie Yongji     return !v || vring_need_event(vring_get_used_event(vq), new, old);
563a6caeee8SXie Yongji }
564a6caeee8SXie Yongji 
vduse_queue_notify(VduseVirtq * vq)565a6caeee8SXie Yongji void vduse_queue_notify(VduseVirtq *vq)
566a6caeee8SXie Yongji {
567a6caeee8SXie Yongji     VduseDev *dev = vq->dev;
568a6caeee8SXie Yongji 
569a6caeee8SXie Yongji     if (unlikely(!vq->vring.avail)) {
570a6caeee8SXie Yongji         return;
571a6caeee8SXie Yongji     }
572a6caeee8SXie Yongji 
573a6caeee8SXie Yongji     if (!vduse_queue_should_notify(vq)) {
574a6caeee8SXie Yongji         return;
575a6caeee8SXie Yongji     }
576a6caeee8SXie Yongji 
577a6caeee8SXie Yongji     if (vduse_inject_irq(dev, vq->index) < 0) {
578a6caeee8SXie Yongji         fprintf(stderr, "Error inject irq for vq %d: %s\n",
579a6caeee8SXie Yongji                 vq->index, strerror(errno));
580a6caeee8SXie Yongji     }
581a6caeee8SXie Yongji }
582a6caeee8SXie Yongji 
vring_set_avail_event(VduseVirtq * vq,uint16_t val)583a6caeee8SXie Yongji static inline void vring_set_avail_event(VduseVirtq *vq, uint16_t val)
584a6caeee8SXie Yongji {
58586e61e42SMarcel Holtmann     uint16_t val_le = htole16(val);
58686e61e42SMarcel Holtmann     memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t));
587a6caeee8SXie Yongji }
588a6caeee8SXie Yongji 
vduse_queue_map_single_desc(VduseVirtq * vq,unsigned int * p_num_sg,struct iovec * iov,unsigned int max_num_sg,bool is_write,uint64_t pa,size_t sz)589a6caeee8SXie Yongji static bool vduse_queue_map_single_desc(VduseVirtq *vq, unsigned int *p_num_sg,
590a6caeee8SXie Yongji                                    struct iovec *iov, unsigned int max_num_sg,
591a6caeee8SXie Yongji                                    bool is_write, uint64_t pa, size_t sz)
592a6caeee8SXie Yongji {
593a6caeee8SXie Yongji     unsigned num_sg = *p_num_sg;
594a6caeee8SXie Yongji     VduseDev *dev = vq->dev;
595a6caeee8SXie Yongji 
596a6caeee8SXie Yongji     assert(num_sg <= max_num_sg);
597a6caeee8SXie Yongji 
598a6caeee8SXie Yongji     if (!sz) {
599a6caeee8SXie Yongji         fprintf(stderr, "virtio: zero sized buffers are not allowed\n");
600a6caeee8SXie Yongji         return false;
601a6caeee8SXie Yongji     }
602a6caeee8SXie Yongji 
603a6caeee8SXie Yongji     while (sz) {
604a6caeee8SXie Yongji         uint64_t len = sz;
605a6caeee8SXie Yongji 
606a6caeee8SXie Yongji         if (num_sg == max_num_sg) {
607a6caeee8SXie Yongji             fprintf(stderr,
608a6caeee8SXie Yongji                     "virtio: too many descriptors in indirect table\n");
609a6caeee8SXie Yongji             return false;
610a6caeee8SXie Yongji         }
611a6caeee8SXie Yongji 
612a6caeee8SXie Yongji         iov[num_sg].iov_base = iova_to_va(dev, &len, pa);
613a6caeee8SXie Yongji         if (iov[num_sg].iov_base == NULL) {
614a6caeee8SXie Yongji             fprintf(stderr, "virtio: invalid address for buffers\n");
615a6caeee8SXie Yongji             return false;
616a6caeee8SXie Yongji         }
617a6caeee8SXie Yongji         iov[num_sg++].iov_len = len;
618a6caeee8SXie Yongji         sz -= len;
619a6caeee8SXie Yongji         pa += len;
620a6caeee8SXie Yongji     }
621a6caeee8SXie Yongji 
622a6caeee8SXie Yongji     *p_num_sg = num_sg;
623a6caeee8SXie Yongji     return true;
624a6caeee8SXie Yongji }
625a6caeee8SXie Yongji 
vduse_queue_alloc_element(size_t sz,unsigned out_num,unsigned in_num)626a6caeee8SXie Yongji static void *vduse_queue_alloc_element(size_t sz, unsigned out_num,
627a6caeee8SXie Yongji                                        unsigned in_num)
628a6caeee8SXie Yongji {
629a6caeee8SXie Yongji     VduseVirtqElement *elem;
630a6caeee8SXie Yongji     size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
631a6caeee8SXie Yongji     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
632a6caeee8SXie Yongji     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
633a6caeee8SXie Yongji 
634a6caeee8SXie Yongji     assert(sz >= sizeof(VduseVirtqElement));
635a6caeee8SXie Yongji     elem = malloc(out_sg_end);
636a6caeee8SXie Yongji     if (!elem) {
637a6caeee8SXie Yongji         return NULL;
638a6caeee8SXie Yongji     }
639a6caeee8SXie Yongji     elem->out_num = out_num;
640a6caeee8SXie Yongji     elem->in_num = in_num;
641a6caeee8SXie Yongji     elem->in_sg = (void *)elem + in_sg_ofs;
642a6caeee8SXie Yongji     elem->out_sg = (void *)elem + out_sg_ofs;
643a6caeee8SXie Yongji     return elem;
644a6caeee8SXie Yongji }
645a6caeee8SXie Yongji 
vduse_queue_map_desc(VduseVirtq * vq,unsigned int idx,size_t sz)646a6caeee8SXie Yongji static void *vduse_queue_map_desc(VduseVirtq *vq, unsigned int idx, size_t sz)
647a6caeee8SXie Yongji {
648a6caeee8SXie Yongji     struct vring_desc *desc = vq->vring.desc;
649a6caeee8SXie Yongji     VduseDev *dev = vq->dev;
650a6caeee8SXie Yongji     uint64_t desc_addr, read_len;
651a6caeee8SXie Yongji     unsigned int desc_len;
652a6caeee8SXie Yongji     unsigned int max = vq->vring.num;
653a6caeee8SXie Yongji     unsigned int i = idx;
654a6caeee8SXie Yongji     VduseVirtqElement *elem;
655a6caeee8SXie Yongji     struct iovec iov[VIRTQUEUE_MAX_SIZE];
656a6caeee8SXie Yongji     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
657a6caeee8SXie Yongji     unsigned int out_num = 0, in_num = 0;
658a6caeee8SXie Yongji     int rc;
659a6caeee8SXie Yongji 
660a6caeee8SXie Yongji     if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
661a6caeee8SXie Yongji         if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
662a6caeee8SXie Yongji             fprintf(stderr, "Invalid size for indirect buffer table\n");
663a6caeee8SXie Yongji             return NULL;
664a6caeee8SXie Yongji         }
665a6caeee8SXie Yongji 
666a6caeee8SXie Yongji         /* loop over the indirect descriptor table */
667a6caeee8SXie Yongji         desc_addr = le64toh(desc[i].addr);
668a6caeee8SXie Yongji         desc_len = le32toh(desc[i].len);
669a6caeee8SXie Yongji         max = desc_len / sizeof(struct vring_desc);
670a6caeee8SXie Yongji         read_len = desc_len;
671a6caeee8SXie Yongji         desc = iova_to_va(dev, &read_len, desc_addr);
672a6caeee8SXie Yongji         if (unlikely(desc && read_len != desc_len)) {
673a6caeee8SXie Yongji             /* Failed to use zero copy */
674a6caeee8SXie Yongji             desc = NULL;
675a6caeee8SXie Yongji             if (!vduse_queue_read_indirect_desc(dev, desc_buf,
676a6caeee8SXie Yongji                                                 desc_addr,
677a6caeee8SXie Yongji                                                 desc_len)) {
678a6caeee8SXie Yongji                 desc = desc_buf;
679a6caeee8SXie Yongji             }
680a6caeee8SXie Yongji         }
681a6caeee8SXie Yongji         if (!desc) {
682a6caeee8SXie Yongji             fprintf(stderr, "Invalid indirect buffer table\n");
683a6caeee8SXie Yongji             return NULL;
684a6caeee8SXie Yongji         }
685a6caeee8SXie Yongji         i = 0;
686a6caeee8SXie Yongji     }
687a6caeee8SXie Yongji 
688a6caeee8SXie Yongji     /* Collect all the descriptors */
689a6caeee8SXie Yongji     do {
690a6caeee8SXie Yongji         if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
691a6caeee8SXie Yongji             if (!vduse_queue_map_single_desc(vq, &in_num, iov + out_num,
692a6caeee8SXie Yongji                                              VIRTQUEUE_MAX_SIZE - out_num,
693a6caeee8SXie Yongji                                              true, le64toh(desc[i].addr),
694a6caeee8SXie Yongji                                              le32toh(desc[i].len))) {
695a6caeee8SXie Yongji                 return NULL;
696a6caeee8SXie Yongji             }
697a6caeee8SXie Yongji         } else {
698a6caeee8SXie Yongji             if (in_num) {
699a6caeee8SXie Yongji                 fprintf(stderr, "Incorrect order for descriptors\n");
700a6caeee8SXie Yongji                 return NULL;
701a6caeee8SXie Yongji             }
702a6caeee8SXie Yongji             if (!vduse_queue_map_single_desc(vq, &out_num, iov,
703a6caeee8SXie Yongji                                              VIRTQUEUE_MAX_SIZE, false,
704a6caeee8SXie Yongji                                              le64toh(desc[i].addr),
705a6caeee8SXie Yongji                                              le32toh(desc[i].len))) {
706a6caeee8SXie Yongji                 return NULL;
707a6caeee8SXie Yongji             }
708a6caeee8SXie Yongji         }
709a6caeee8SXie Yongji 
710a6caeee8SXie Yongji         /* If we've got too many, that implies a descriptor loop. */
711a6caeee8SXie Yongji         if ((in_num + out_num) > max) {
712a6caeee8SXie Yongji             fprintf(stderr, "Looped descriptor\n");
713a6caeee8SXie Yongji             return NULL;
714a6caeee8SXie Yongji         }
715a6caeee8SXie Yongji         rc = vduse_queue_read_next_desc(desc, i, max, &i);
716a6caeee8SXie Yongji     } while (rc == VIRTQUEUE_READ_DESC_MORE);
717a6caeee8SXie Yongji 
718a6caeee8SXie Yongji     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
719a6caeee8SXie Yongji         fprintf(stderr, "read descriptor error\n");
720a6caeee8SXie Yongji         return NULL;
721a6caeee8SXie Yongji     }
722a6caeee8SXie Yongji 
723a6caeee8SXie Yongji     /* Now copy what we have collected and mapped */
724a6caeee8SXie Yongji     elem = vduse_queue_alloc_element(sz, out_num, in_num);
725a6caeee8SXie Yongji     if (!elem) {
726a6caeee8SXie Yongji         fprintf(stderr, "read descriptor error\n");
727a6caeee8SXie Yongji         return NULL;
728a6caeee8SXie Yongji     }
729a6caeee8SXie Yongji     elem->index = idx;
730a6caeee8SXie Yongji     for (i = 0; i < out_num; i++) {
731a6caeee8SXie Yongji         elem->out_sg[i] = iov[i];
732a6caeee8SXie Yongji     }
733a6caeee8SXie Yongji     for (i = 0; i < in_num; i++) {
734a6caeee8SXie Yongji         elem->in_sg[i] = iov[out_num + i];
735a6caeee8SXie Yongji     }
736a6caeee8SXie Yongji 
737a6caeee8SXie Yongji     return elem;
738a6caeee8SXie Yongji }
739a6caeee8SXie Yongji 
vduse_queue_pop(VduseVirtq * vq,size_t sz)740a6caeee8SXie Yongji void *vduse_queue_pop(VduseVirtq *vq, size_t sz)
741a6caeee8SXie Yongji {
742a6caeee8SXie Yongji     unsigned int head;
743a6caeee8SXie Yongji     VduseVirtqElement *elem;
744a6caeee8SXie Yongji     VduseDev *dev = vq->dev;
745d043e2dbSXie Yongji     int i;
746a6caeee8SXie Yongji 
747a6caeee8SXie Yongji     if (unlikely(!vq->vring.avail)) {
748a6caeee8SXie Yongji         return NULL;
749a6caeee8SXie Yongji     }
750a6caeee8SXie Yongji 
751d043e2dbSXie Yongji     if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
752d043e2dbSXie Yongji         i = (--vq->resubmit_num);
753d043e2dbSXie Yongji         elem = vduse_queue_map_desc(vq, vq->resubmit_list[i].index, sz);
754d043e2dbSXie Yongji 
755d043e2dbSXie Yongji         if (!vq->resubmit_num) {
756d043e2dbSXie Yongji             free(vq->resubmit_list);
757d043e2dbSXie Yongji             vq->resubmit_list = NULL;
758d043e2dbSXie Yongji         }
759d043e2dbSXie Yongji 
760d043e2dbSXie Yongji         return elem;
761d043e2dbSXie Yongji     }
762d043e2dbSXie Yongji 
763a6caeee8SXie Yongji     if (vduse_queue_empty(vq)) {
764a6caeee8SXie Yongji         return NULL;
765a6caeee8SXie Yongji     }
766a6caeee8SXie Yongji     /* Needed after virtio_queue_empty() */
767a6caeee8SXie Yongji     smp_rmb();
768a6caeee8SXie Yongji 
769a6caeee8SXie Yongji     if (vq->inuse >= vq->vring.num) {
770a6caeee8SXie Yongji         fprintf(stderr, "Virtqueue size exceeded: %d\n", vq->inuse);
771a6caeee8SXie Yongji         return NULL;
772a6caeee8SXie Yongji     }
773a6caeee8SXie Yongji 
774a6caeee8SXie Yongji     if (!vduse_queue_get_head(vq, vq->last_avail_idx++, &head)) {
775a6caeee8SXie Yongji         return NULL;
776a6caeee8SXie Yongji     }
777a6caeee8SXie Yongji 
778a6caeee8SXie Yongji     if (vduse_dev_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
779a6caeee8SXie Yongji         vring_set_avail_event(vq, vq->last_avail_idx);
780a6caeee8SXie Yongji     }
781a6caeee8SXie Yongji 
782a6caeee8SXie Yongji     elem = vduse_queue_map_desc(vq, head, sz);
783a6caeee8SXie Yongji 
784a6caeee8SXie Yongji     if (!elem) {
785a6caeee8SXie Yongji         return NULL;
786a6caeee8SXie Yongji     }
787a6caeee8SXie Yongji 
788a6caeee8SXie Yongji     vq->inuse++;
789a6caeee8SXie Yongji 
790d043e2dbSXie Yongji     vduse_queue_inflight_get(vq, head);
791d043e2dbSXie Yongji 
792a6caeee8SXie Yongji     return elem;
793a6caeee8SXie Yongji }
794a6caeee8SXie Yongji 
vring_used_write(VduseVirtq * vq,struct vring_used_elem * uelem,int i)795a6caeee8SXie Yongji static inline void vring_used_write(VduseVirtq *vq,
796a6caeee8SXie Yongji                                     struct vring_used_elem *uelem, int i)
797a6caeee8SXie Yongji {
798a6caeee8SXie Yongji     struct vring_used *used = vq->vring.used;
799a6caeee8SXie Yongji 
800a6caeee8SXie Yongji     used->ring[i] = *uelem;
801a6caeee8SXie Yongji }
802a6caeee8SXie Yongji 
vduse_queue_fill(VduseVirtq * vq,const VduseVirtqElement * elem,unsigned int len,unsigned int idx)803a6caeee8SXie Yongji static void vduse_queue_fill(VduseVirtq *vq, const VduseVirtqElement *elem,
804a6caeee8SXie Yongji                              unsigned int len, unsigned int idx)
805a6caeee8SXie Yongji {
806a6caeee8SXie Yongji     struct vring_used_elem uelem;
807a6caeee8SXie Yongji 
808a6caeee8SXie Yongji     if (unlikely(!vq->vring.used)) {
809a6caeee8SXie Yongji         return;
810a6caeee8SXie Yongji     }
811a6caeee8SXie Yongji 
812a6caeee8SXie Yongji     idx = (idx + vq->used_idx) % vq->vring.num;
813a6caeee8SXie Yongji 
814a6caeee8SXie Yongji     uelem.id = htole32(elem->index);
815a6caeee8SXie Yongji     uelem.len = htole32(len);
816a6caeee8SXie Yongji     vring_used_write(vq, &uelem, idx);
817a6caeee8SXie Yongji }
818a6caeee8SXie Yongji 
vring_used_idx_set(VduseVirtq * vq,uint16_t val)819a6caeee8SXie Yongji static inline void vring_used_idx_set(VduseVirtq *vq, uint16_t val)
820a6caeee8SXie Yongji {
821a6caeee8SXie Yongji     vq->vring.used->idx = htole16(val);
822a6caeee8SXie Yongji     vq->used_idx = val;
823a6caeee8SXie Yongji }
824a6caeee8SXie Yongji 
vduse_queue_flush(VduseVirtq * vq,unsigned int count)825a6caeee8SXie Yongji static void vduse_queue_flush(VduseVirtq *vq, unsigned int count)
826a6caeee8SXie Yongji {
827a6caeee8SXie Yongji     uint16_t old, new;
828a6caeee8SXie Yongji 
829a6caeee8SXie Yongji     if (unlikely(!vq->vring.used)) {
830a6caeee8SXie Yongji         return;
831a6caeee8SXie Yongji     }
832a6caeee8SXie Yongji 
833a6caeee8SXie Yongji     /* Make sure buffer is written before we update index. */
834a6caeee8SXie Yongji     smp_wmb();
835a6caeee8SXie Yongji 
836a6caeee8SXie Yongji     old = vq->used_idx;
837a6caeee8SXie Yongji     new = old + count;
838a6caeee8SXie Yongji     vring_used_idx_set(vq, new);
839a6caeee8SXie Yongji     vq->inuse -= count;
840a6caeee8SXie Yongji     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
841a6caeee8SXie Yongji         vq->signalled_used_valid = false;
842a6caeee8SXie Yongji     }
843a6caeee8SXie Yongji }
844a6caeee8SXie Yongji 
vduse_queue_push(VduseVirtq * vq,const VduseVirtqElement * elem,unsigned int len)845a6caeee8SXie Yongji void vduse_queue_push(VduseVirtq *vq, const VduseVirtqElement *elem,
846a6caeee8SXie Yongji                       unsigned int len)
847a6caeee8SXie Yongji {
848a6caeee8SXie Yongji     vduse_queue_fill(vq, elem, len, 0);
849d043e2dbSXie Yongji     vduse_queue_inflight_pre_put(vq, elem->index);
850a6caeee8SXie Yongji     vduse_queue_flush(vq, 1);
851d043e2dbSXie Yongji     vduse_queue_inflight_post_put(vq, elem->index);
852a6caeee8SXie Yongji }
853a6caeee8SXie Yongji 
vduse_queue_update_vring(VduseVirtq * vq,uint64_t desc_addr,uint64_t avail_addr,uint64_t used_addr)854a6caeee8SXie Yongji static int vduse_queue_update_vring(VduseVirtq *vq, uint64_t desc_addr,
855a6caeee8SXie Yongji                                     uint64_t avail_addr, uint64_t used_addr)
856a6caeee8SXie Yongji {
857a6caeee8SXie Yongji     struct VduseDev *dev = vq->dev;
858a6caeee8SXie Yongji     uint64_t len;
859a6caeee8SXie Yongji 
860a6caeee8SXie Yongji     len = sizeof(struct vring_desc);
861a6caeee8SXie Yongji     vq->vring.desc = iova_to_va(dev, &len, desc_addr);
862a6caeee8SXie Yongji     if (len != sizeof(struct vring_desc)) {
863a6caeee8SXie Yongji         return -EINVAL;
864a6caeee8SXie Yongji     }
865a6caeee8SXie Yongji 
866a6caeee8SXie Yongji     len = sizeof(struct vring_avail);
867a6caeee8SXie Yongji     vq->vring.avail = iova_to_va(dev, &len, avail_addr);
868a6caeee8SXie Yongji     if (len != sizeof(struct vring_avail)) {
869a6caeee8SXie Yongji         return -EINVAL;
870a6caeee8SXie Yongji     }
871a6caeee8SXie Yongji 
872a6caeee8SXie Yongji     len = sizeof(struct vring_used);
873a6caeee8SXie Yongji     vq->vring.used = iova_to_va(dev, &len, used_addr);
874a6caeee8SXie Yongji     if (len != sizeof(struct vring_used)) {
875a6caeee8SXie Yongji         return -EINVAL;
876a6caeee8SXie Yongji     }
877a6caeee8SXie Yongji 
878a6caeee8SXie Yongji     if (!vq->vring.desc || !vq->vring.avail || !vq->vring.used) {
879a6caeee8SXie Yongji         fprintf(stderr, "Failed to get vq[%d] iova mapping\n", vq->index);
880a6caeee8SXie Yongji         return -EINVAL;
881a6caeee8SXie Yongji     }
882a6caeee8SXie Yongji 
883a6caeee8SXie Yongji     return 0;
884a6caeee8SXie Yongji }
885a6caeee8SXie Yongji 
vduse_queue_enable(VduseVirtq * vq)886a6caeee8SXie Yongji static void vduse_queue_enable(VduseVirtq *vq)
887a6caeee8SXie Yongji {
888a6caeee8SXie Yongji     struct VduseDev *dev = vq->dev;
889a6caeee8SXie Yongji     struct vduse_vq_info vq_info;
890a6caeee8SXie Yongji     struct vduse_vq_eventfd vq_eventfd;
891a6caeee8SXie Yongji     int fd;
892a6caeee8SXie Yongji 
893a6caeee8SXie Yongji     vq_info.index = vq->index;
894a6caeee8SXie Yongji     if (ioctl(dev->fd, VDUSE_VQ_GET_INFO, &vq_info)) {
895a6caeee8SXie Yongji         fprintf(stderr, "Failed to get vq[%d] info: %s\n",
896a6caeee8SXie Yongji                 vq->index, strerror(errno));
897a6caeee8SXie Yongji         return;
898a6caeee8SXie Yongji     }
899a6caeee8SXie Yongji 
900a6caeee8SXie Yongji     if (!vq_info.ready) {
901a6caeee8SXie Yongji         return;
902a6caeee8SXie Yongji     }
903a6caeee8SXie Yongji 
904a6caeee8SXie Yongji     vq->vring.num = vq_info.num;
905a6caeee8SXie Yongji     vq->vring.desc_addr = vq_info.desc_addr;
906a6caeee8SXie Yongji     vq->vring.avail_addr = vq_info.driver_addr;
907a6caeee8SXie Yongji     vq->vring.used_addr = vq_info.device_addr;
908a6caeee8SXie Yongji 
909a6caeee8SXie Yongji     if (vduse_queue_update_vring(vq, vq_info.desc_addr,
910a6caeee8SXie Yongji                                  vq_info.driver_addr, vq_info.device_addr)) {
911a6caeee8SXie Yongji         fprintf(stderr, "Failed to update vring for vq[%d]\n", vq->index);
912a6caeee8SXie Yongji         return;
913a6caeee8SXie Yongji     }
914a6caeee8SXie Yongji 
915a6caeee8SXie Yongji     fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
916a6caeee8SXie Yongji     if (fd < 0) {
917a6caeee8SXie Yongji         fprintf(stderr, "Failed to init eventfd for vq[%d]\n", vq->index);
918a6caeee8SXie Yongji         return;
919a6caeee8SXie Yongji     }
920a6caeee8SXie Yongji 
921a6caeee8SXie Yongji     vq_eventfd.index = vq->index;
922a6caeee8SXie Yongji     vq_eventfd.fd = fd;
923a6caeee8SXie Yongji     if (ioctl(dev->fd, VDUSE_VQ_SETUP_KICKFD, &vq_eventfd)) {
924a6caeee8SXie Yongji         fprintf(stderr, "Failed to setup kick fd for vq[%d]\n", vq->index);
925a6caeee8SXie Yongji         close(fd);
926a6caeee8SXie Yongji         return;
927a6caeee8SXie Yongji     }
928a6caeee8SXie Yongji 
929a6caeee8SXie Yongji     vq->fd = fd;
930a6caeee8SXie Yongji     vq->signalled_used_valid = false;
931a6caeee8SXie Yongji     vq->ready = true;
932a6caeee8SXie Yongji 
933d043e2dbSXie Yongji     if (vduse_queue_check_inflights(vq)) {
934d043e2dbSXie Yongji         fprintf(stderr, "Failed to check inflights for vq[%d]\n", vq->index);
935d043e2dbSXie Yongji         close(fd);
936d043e2dbSXie Yongji         return;
937d043e2dbSXie Yongji     }
938d043e2dbSXie Yongji 
939a6caeee8SXie Yongji     dev->ops->enable_queue(dev, vq);
940a6caeee8SXie Yongji }
941a6caeee8SXie Yongji 
vduse_queue_disable(VduseVirtq * vq)942a6caeee8SXie Yongji static void vduse_queue_disable(VduseVirtq *vq)
943a6caeee8SXie Yongji {
944a6caeee8SXie Yongji     struct VduseDev *dev = vq->dev;
945a6caeee8SXie Yongji     struct vduse_vq_eventfd eventfd;
946a6caeee8SXie Yongji 
947a6caeee8SXie Yongji     if (!vq->ready) {
948a6caeee8SXie Yongji         return;
949a6caeee8SXie Yongji     }
950a6caeee8SXie Yongji 
951a6caeee8SXie Yongji     dev->ops->disable_queue(dev, vq);
952a6caeee8SXie Yongji 
953a6caeee8SXie Yongji     eventfd.index = vq->index;
954a6caeee8SXie Yongji     eventfd.fd = VDUSE_EVENTFD_DEASSIGN;
955a6caeee8SXie Yongji     ioctl(dev->fd, VDUSE_VQ_SETUP_KICKFD, &eventfd);
956a6caeee8SXie Yongji     close(vq->fd);
957a6caeee8SXie Yongji 
958a6caeee8SXie Yongji     assert(vq->inuse == 0);
959a6caeee8SXie Yongji 
960a6caeee8SXie Yongji     vq->vring.num = 0;
961a6caeee8SXie Yongji     vq->vring.desc_addr = 0;
962a6caeee8SXie Yongji     vq->vring.avail_addr = 0;
963a6caeee8SXie Yongji     vq->vring.used_addr = 0;
964a6caeee8SXie Yongji     vq->vring.desc = 0;
965a6caeee8SXie Yongji     vq->vring.avail = 0;
966a6caeee8SXie Yongji     vq->vring.used = 0;
967a6caeee8SXie Yongji     vq->ready = false;
968a6caeee8SXie Yongji     vq->fd = -1;
969a6caeee8SXie Yongji }
970a6caeee8SXie Yongji 
vduse_dev_start_dataplane(VduseDev * dev)971a6caeee8SXie Yongji static void vduse_dev_start_dataplane(VduseDev *dev)
972a6caeee8SXie Yongji {
973a6caeee8SXie Yongji     int i;
974a6caeee8SXie Yongji 
975a6caeee8SXie Yongji     if (ioctl(dev->fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
976a6caeee8SXie Yongji         fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
977a6caeee8SXie Yongji         return;
978a6caeee8SXie Yongji     }
979a6caeee8SXie Yongji     assert(vduse_dev_has_feature(dev, VIRTIO_F_VERSION_1));
980a6caeee8SXie Yongji 
981a6caeee8SXie Yongji     for (i = 0; i < dev->num_queues; i++) {
982a6caeee8SXie Yongji         vduse_queue_enable(&dev->vqs[i]);
983a6caeee8SXie Yongji     }
984a6caeee8SXie Yongji }
985a6caeee8SXie Yongji 
vduse_dev_stop_dataplane(VduseDev * dev)986a6caeee8SXie Yongji static void vduse_dev_stop_dataplane(VduseDev *dev)
987a6caeee8SXie Yongji {
988d043e2dbSXie Yongji     size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
989a6caeee8SXie Yongji     int i;
990a6caeee8SXie Yongji 
991a6caeee8SXie Yongji     for (i = 0; i < dev->num_queues; i++) {
992a6caeee8SXie Yongji         vduse_queue_disable(&dev->vqs[i]);
993a6caeee8SXie Yongji     }
994d043e2dbSXie Yongji     if (dev->log) {
995d043e2dbSXie Yongji         memset(dev->log, 0, log_size);
996d043e2dbSXie Yongji     }
997a6caeee8SXie Yongji     dev->features = 0;
998a6caeee8SXie Yongji     vduse_iova_remove_region(dev, 0, ULONG_MAX);
999a6caeee8SXie Yongji }
1000a6caeee8SXie Yongji 
vduse_dev_handler(VduseDev * dev)1001a6caeee8SXie Yongji int vduse_dev_handler(VduseDev *dev)
1002a6caeee8SXie Yongji {
1003a6caeee8SXie Yongji     struct vduse_dev_request req;
1004a6caeee8SXie Yongji     struct vduse_dev_response resp = { 0 };
1005a6caeee8SXie Yongji     VduseVirtq *vq;
1006a6caeee8SXie Yongji     int i, ret;
1007a6caeee8SXie Yongji 
1008a6caeee8SXie Yongji     ret = read(dev->fd, &req, sizeof(req));
1009a6caeee8SXie Yongji     if (ret != sizeof(req)) {
1010a6caeee8SXie Yongji         fprintf(stderr, "Read request error [%d]: %s\n",
1011a6caeee8SXie Yongji                 ret, strerror(errno));
1012a6caeee8SXie Yongji         return -errno;
1013a6caeee8SXie Yongji     }
1014a6caeee8SXie Yongji     resp.request_id = req.request_id;
1015a6caeee8SXie Yongji 
1016a6caeee8SXie Yongji     switch (req.type) {
1017a6caeee8SXie Yongji     case VDUSE_GET_VQ_STATE:
1018a6caeee8SXie Yongji         vq = &dev->vqs[req.vq_state.index];
1019a6caeee8SXie Yongji         resp.vq_state.split.avail_index = vq->last_avail_idx;
1020a6caeee8SXie Yongji         resp.result = VDUSE_REQ_RESULT_OK;
1021a6caeee8SXie Yongji         break;
1022a6caeee8SXie Yongji     case VDUSE_SET_STATUS:
1023a6caeee8SXie Yongji         if (req.s.status & VIRTIO_CONFIG_S_DRIVER_OK) {
1024a6caeee8SXie Yongji             vduse_dev_start_dataplane(dev);
1025a6caeee8SXie Yongji         } else if (req.s.status == 0) {
1026a6caeee8SXie Yongji             vduse_dev_stop_dataplane(dev);
1027a6caeee8SXie Yongji         }
1028a6caeee8SXie Yongji         resp.result = VDUSE_REQ_RESULT_OK;
1029a6caeee8SXie Yongji         break;
1030a6caeee8SXie Yongji     case VDUSE_UPDATE_IOTLB:
1031a6caeee8SXie Yongji         /* The iova will be updated by iova_to_va() later, so just remove it */
1032a6caeee8SXie Yongji         vduse_iova_remove_region(dev, req.iova.start, req.iova.last);
1033a6caeee8SXie Yongji         for (i = 0; i < dev->num_queues; i++) {
10343cc72cdbSThomas Huth             vq = &dev->vqs[i];
1035a6caeee8SXie Yongji             if (vq->ready) {
1036a6caeee8SXie Yongji                 if (vduse_queue_update_vring(vq, vq->vring.desc_addr,
1037a6caeee8SXie Yongji                                              vq->vring.avail_addr,
1038a6caeee8SXie Yongji                                              vq->vring.used_addr)) {
1039a6caeee8SXie Yongji                     fprintf(stderr, "Failed to update vring for vq[%d]\n",
1040a6caeee8SXie Yongji                             vq->index);
1041a6caeee8SXie Yongji                 }
1042a6caeee8SXie Yongji             }
1043a6caeee8SXie Yongji         }
1044a6caeee8SXie Yongji         resp.result = VDUSE_REQ_RESULT_OK;
1045a6caeee8SXie Yongji         break;
1046a6caeee8SXie Yongji     default:
1047a6caeee8SXie Yongji         resp.result = VDUSE_REQ_RESULT_FAILED;
1048a6caeee8SXie Yongji         break;
1049a6caeee8SXie Yongji     }
1050a6caeee8SXie Yongji 
1051a6caeee8SXie Yongji     ret = write(dev->fd, &resp, sizeof(resp));
1052a6caeee8SXie Yongji     if (ret != sizeof(resp)) {
1053a6caeee8SXie Yongji         fprintf(stderr, "Write request %d error [%d]: %s\n",
1054a6caeee8SXie Yongji                 req.type, ret, strerror(errno));
1055a6caeee8SXie Yongji         return -errno;
1056a6caeee8SXie Yongji     }
1057a6caeee8SXie Yongji     return 0;
1058a6caeee8SXie Yongji }
1059a6caeee8SXie Yongji 
vduse_dev_update_config(VduseDev * dev,uint32_t size,uint32_t offset,char * buffer)1060a6caeee8SXie Yongji int vduse_dev_update_config(VduseDev *dev, uint32_t size,
1061a6caeee8SXie Yongji                             uint32_t offset, char *buffer)
1062a6caeee8SXie Yongji {
1063a6caeee8SXie Yongji     int ret;
1064a6caeee8SXie Yongji     struct vduse_config_data *data;
1065a6caeee8SXie Yongji 
1066a6caeee8SXie Yongji     data = malloc(offsetof(struct vduse_config_data, buffer) + size);
1067a6caeee8SXie Yongji     if (!data) {
1068a6caeee8SXie Yongji         return -ENOMEM;
1069a6caeee8SXie Yongji     }
1070a6caeee8SXie Yongji 
1071a6caeee8SXie Yongji     data->offset = offset;
1072a6caeee8SXie Yongji     data->length = size;
1073a6caeee8SXie Yongji     memcpy(data->buffer, buffer, size);
1074a6caeee8SXie Yongji 
1075a6caeee8SXie Yongji     ret = ioctl(dev->fd, VDUSE_DEV_SET_CONFIG, data);
1076a6caeee8SXie Yongji     free(data);
1077a6caeee8SXie Yongji 
1078a6caeee8SXie Yongji     if (ret) {
1079a6caeee8SXie Yongji         return -errno;
1080a6caeee8SXie Yongji     }
1081a6caeee8SXie Yongji 
1082a6caeee8SXie Yongji     if (ioctl(dev->fd, VDUSE_DEV_INJECT_CONFIG_IRQ)) {
1083a6caeee8SXie Yongji         return -errno;
1084a6caeee8SXie Yongji     }
1085a6caeee8SXie Yongji 
1086a6caeee8SXie Yongji     return 0;
1087a6caeee8SXie Yongji }
1088a6caeee8SXie Yongji 
vduse_dev_setup_queue(VduseDev * dev,int index,int max_size)1089a6caeee8SXie Yongji int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size)
1090a6caeee8SXie Yongji {
1091a6caeee8SXie Yongji     VduseVirtq *vq = &dev->vqs[index];
1092a6caeee8SXie Yongji     struct vduse_vq_config vq_config = { 0 };
1093a6caeee8SXie Yongji 
1094a6caeee8SXie Yongji     if (max_size > VIRTQUEUE_MAX_SIZE) {
1095a6caeee8SXie Yongji         return -EINVAL;
1096a6caeee8SXie Yongji     }
1097a6caeee8SXie Yongji 
1098a6caeee8SXie Yongji     vq_config.index = vq->index;
1099a6caeee8SXie Yongji     vq_config.max_size = max_size;
1100a6caeee8SXie Yongji 
1101a6caeee8SXie Yongji     if (ioctl(dev->fd, VDUSE_VQ_SETUP, &vq_config)) {
1102a6caeee8SXie Yongji         return -errno;
1103a6caeee8SXie Yongji     }
1104a6caeee8SXie Yongji 
1105d043e2dbSXie Yongji     vduse_queue_enable(vq);
1106d043e2dbSXie Yongji 
1107d043e2dbSXie Yongji     return 0;
1108d043e2dbSXie Yongji }
1109d043e2dbSXie Yongji 
vduse_set_reconnect_log_file(VduseDev * dev,const char * filename)1110d043e2dbSXie Yongji int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename)
1111d043e2dbSXie Yongji {
1112d043e2dbSXie Yongji 
1113d043e2dbSXie Yongji     size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
1114d043e2dbSXie Yongji     void *log;
1115d043e2dbSXie Yongji     int i;
1116d043e2dbSXie Yongji 
1117d043e2dbSXie Yongji     dev->log = log = vduse_log_get(filename, log_size);
1118d043e2dbSXie Yongji     if (log == MAP_FAILED) {
1119d043e2dbSXie Yongji         fprintf(stderr, "Failed to get vduse log\n");
1120d043e2dbSXie Yongji         return -EINVAL;
1121d043e2dbSXie Yongji     }
1122d043e2dbSXie Yongji 
1123d043e2dbSXie Yongji     for (i = 0; i < dev->num_queues; i++) {
1124d043e2dbSXie Yongji         dev->vqs[i].log = log;
1125d043e2dbSXie Yongji         dev->vqs[i].log->inflight.desc_num = VIRTQUEUE_MAX_SIZE;
1126d043e2dbSXie Yongji         log = (void *)((char *)log + vduse_vq_log_size(VIRTQUEUE_MAX_SIZE));
1127d043e2dbSXie Yongji     }
1128d043e2dbSXie Yongji 
1129a6caeee8SXie Yongji     return 0;
1130a6caeee8SXie Yongji }
1131a6caeee8SXie Yongji 
vduse_dev_init_vqs(VduseDev * dev,uint16_t num_queues)1132a6caeee8SXie Yongji static int vduse_dev_init_vqs(VduseDev *dev, uint16_t num_queues)
1133a6caeee8SXie Yongji {
1134a6caeee8SXie Yongji     VduseVirtq *vqs;
1135a6caeee8SXie Yongji     int i;
1136a6caeee8SXie Yongji 
1137a6caeee8SXie Yongji     vqs = calloc(sizeof(VduseVirtq), num_queues);
1138a6caeee8SXie Yongji     if (!vqs) {
1139a6caeee8SXie Yongji         return -ENOMEM;
1140a6caeee8SXie Yongji     }
1141a6caeee8SXie Yongji 
1142a6caeee8SXie Yongji     for (i = 0; i < num_queues; i++) {
1143a6caeee8SXie Yongji         vqs[i].index = i;
1144a6caeee8SXie Yongji         vqs[i].dev = dev;
1145a6caeee8SXie Yongji         vqs[i].fd = -1;
1146a6caeee8SXie Yongji     }
1147a6caeee8SXie Yongji     dev->vqs = vqs;
1148a6caeee8SXie Yongji 
1149a6caeee8SXie Yongji     return 0;
1150a6caeee8SXie Yongji }
1151a6caeee8SXie Yongji 
vduse_dev_init(VduseDev * dev,const char * name,uint16_t num_queues,const VduseOps * ops,void * priv)1152a6caeee8SXie Yongji static int vduse_dev_init(VduseDev *dev, const char *name,
1153a6caeee8SXie Yongji                           uint16_t num_queues, const VduseOps *ops,
1154a6caeee8SXie Yongji                           void *priv)
1155a6caeee8SXie Yongji {
1156a6caeee8SXie Yongji     char *dev_path, *dev_name;
1157a6caeee8SXie Yongji     int ret, fd;
1158a6caeee8SXie Yongji 
1159a6caeee8SXie Yongji     dev_path = malloc(strlen(name) + strlen("/dev/vduse/") + 1);
1160a6caeee8SXie Yongji     if (!dev_path) {
1161a6caeee8SXie Yongji         return -ENOMEM;
1162a6caeee8SXie Yongji     }
1163a6caeee8SXie Yongji     sprintf(dev_path, "/dev/vduse/%s", name);
1164a6caeee8SXie Yongji 
1165a6caeee8SXie Yongji     fd = open(dev_path, O_RDWR);
1166a6caeee8SXie Yongji     free(dev_path);
1167a6caeee8SXie Yongji     if (fd < 0) {
1168a6caeee8SXie Yongji         fprintf(stderr, "Failed to open vduse dev %s: %s\n",
1169a6caeee8SXie Yongji                 name, strerror(errno));
1170a6caeee8SXie Yongji         return -errno;
1171a6caeee8SXie Yongji     }
1172a6caeee8SXie Yongji 
1173d043e2dbSXie Yongji     if (ioctl(fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
1174d043e2dbSXie Yongji         fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
1175d043e2dbSXie Yongji         close(fd);
1176d043e2dbSXie Yongji         return -errno;
1177d043e2dbSXie Yongji     }
1178d043e2dbSXie Yongji 
1179a6caeee8SXie Yongji     dev_name = strdup(name);
1180a6caeee8SXie Yongji     if (!dev_name) {
1181a6caeee8SXie Yongji         close(fd);
1182a6caeee8SXie Yongji         return -ENOMEM;
1183a6caeee8SXie Yongji     }
1184a6caeee8SXie Yongji 
1185a6caeee8SXie Yongji     ret = vduse_dev_init_vqs(dev, num_queues);
1186a6caeee8SXie Yongji     if (ret) {
1187a6caeee8SXie Yongji         free(dev_name);
1188a6caeee8SXie Yongji         close(fd);
1189a6caeee8SXie Yongji         return ret;
1190a6caeee8SXie Yongji     }
1191a6caeee8SXie Yongji 
1192a6caeee8SXie Yongji     dev->name = dev_name;
1193a6caeee8SXie Yongji     dev->num_queues = num_queues;
1194a6caeee8SXie Yongji     dev->fd = fd;
1195a6caeee8SXie Yongji     dev->ops = ops;
1196a6caeee8SXie Yongji     dev->priv = priv;
1197a6caeee8SXie Yongji 
1198a6caeee8SXie Yongji     return 0;
1199a6caeee8SXie Yongji }
1200a6caeee8SXie Yongji 
vduse_name_is_invalid(const char * name)1201e7156ff7SXie Yongji static inline bool vduse_name_is_invalid(const char *name)
1202a6caeee8SXie Yongji {
1203a6caeee8SXie Yongji     return strlen(name) >= VDUSE_NAME_MAX || strstr(name, "..");
1204a6caeee8SXie Yongji }
1205a6caeee8SXie Yongji 
vduse_dev_create_by_fd(int fd,uint16_t num_queues,const VduseOps * ops,void * priv)1206a6caeee8SXie Yongji VduseDev *vduse_dev_create_by_fd(int fd, uint16_t num_queues,
1207a6caeee8SXie Yongji                                  const VduseOps *ops, void *priv)
1208a6caeee8SXie Yongji {
1209a6caeee8SXie Yongji     VduseDev *dev;
1210a6caeee8SXie Yongji     int ret;
1211a6caeee8SXie Yongji 
1212a6caeee8SXie Yongji     if (!ops || !ops->enable_queue || !ops->disable_queue) {
1213a6caeee8SXie Yongji         fprintf(stderr, "Invalid parameter for vduse\n");
1214a6caeee8SXie Yongji         return NULL;
1215a6caeee8SXie Yongji     }
1216a6caeee8SXie Yongji 
1217a6caeee8SXie Yongji     dev = calloc(sizeof(VduseDev), 1);
1218a6caeee8SXie Yongji     if (!dev) {
1219a6caeee8SXie Yongji         fprintf(stderr, "Failed to allocate vduse device\n");
1220a6caeee8SXie Yongji         return NULL;
1221a6caeee8SXie Yongji     }
1222a6caeee8SXie Yongji 
1223d043e2dbSXie Yongji     if (ioctl(fd, VDUSE_DEV_GET_FEATURES, &dev->features)) {
1224d043e2dbSXie Yongji         fprintf(stderr, "Failed to get features: %s\n", strerror(errno));
1225d043e2dbSXie Yongji         free(dev);
1226d043e2dbSXie Yongji         return NULL;
1227d043e2dbSXie Yongji     }
1228d043e2dbSXie Yongji 
1229a6caeee8SXie Yongji     ret = vduse_dev_init_vqs(dev, num_queues);
1230a6caeee8SXie Yongji     if (ret) {
1231a6caeee8SXie Yongji         fprintf(stderr, "Failed to init vqs\n");
1232a6caeee8SXie Yongji         free(dev);
1233a6caeee8SXie Yongji         return NULL;
1234a6caeee8SXie Yongji     }
1235a6caeee8SXie Yongji 
1236a6caeee8SXie Yongji     dev->num_queues = num_queues;
1237a6caeee8SXie Yongji     dev->fd = fd;
1238a6caeee8SXie Yongji     dev->ops = ops;
1239a6caeee8SXie Yongji     dev->priv = priv;
1240a6caeee8SXie Yongji 
1241a6caeee8SXie Yongji     return dev;
1242a6caeee8SXie Yongji }
1243a6caeee8SXie Yongji 
vduse_dev_create_by_name(const char * name,uint16_t num_queues,const VduseOps * ops,void * priv)1244a6caeee8SXie Yongji VduseDev *vduse_dev_create_by_name(const char *name, uint16_t num_queues,
1245a6caeee8SXie Yongji                                    const VduseOps *ops, void *priv)
1246a6caeee8SXie Yongji {
1247a6caeee8SXie Yongji     VduseDev *dev;
1248a6caeee8SXie Yongji     int ret;
1249a6caeee8SXie Yongji 
1250e7156ff7SXie Yongji     if (!name || vduse_name_is_invalid(name) || !ops ||
1251a6caeee8SXie Yongji         !ops->enable_queue || !ops->disable_queue) {
1252a6caeee8SXie Yongji         fprintf(stderr, "Invalid parameter for vduse\n");
1253a6caeee8SXie Yongji         return NULL;
1254a6caeee8SXie Yongji     }
1255a6caeee8SXie Yongji 
1256a6caeee8SXie Yongji     dev = calloc(sizeof(VduseDev), 1);
1257a6caeee8SXie Yongji     if (!dev) {
1258a6caeee8SXie Yongji         fprintf(stderr, "Failed to allocate vduse device\n");
1259a6caeee8SXie Yongji         return NULL;
1260a6caeee8SXie Yongji     }
1261a6caeee8SXie Yongji 
1262a6caeee8SXie Yongji     ret = vduse_dev_init(dev, name, num_queues, ops, priv);
1263a6caeee8SXie Yongji     if (ret < 0) {
1264a6caeee8SXie Yongji         fprintf(stderr, "Failed to init vduse device %s: %s\n",
1265630179b7SXie Yongji                 name, strerror(-ret));
1266a6caeee8SXie Yongji         free(dev);
1267a6caeee8SXie Yongji         return NULL;
1268a6caeee8SXie Yongji     }
1269a6caeee8SXie Yongji 
1270a6caeee8SXie Yongji     return dev;
1271a6caeee8SXie Yongji }
1272a6caeee8SXie Yongji 
vduse_dev_create(const char * name,uint32_t device_id,uint32_t vendor_id,uint64_t features,uint16_t num_queues,uint32_t config_size,char * config,const VduseOps * ops,void * priv)1273a6caeee8SXie Yongji VduseDev *vduse_dev_create(const char *name, uint32_t device_id,
1274a6caeee8SXie Yongji                            uint32_t vendor_id, uint64_t features,
1275a6caeee8SXie Yongji                            uint16_t num_queues, uint32_t config_size,
1276a6caeee8SXie Yongji                            char *config, const VduseOps *ops, void *priv)
1277a6caeee8SXie Yongji {
1278a6caeee8SXie Yongji     VduseDev *dev;
1279a6caeee8SXie Yongji     int ret, ctrl_fd;
1280a6caeee8SXie Yongji     uint64_t version;
1281a6caeee8SXie Yongji     struct vduse_dev_config *dev_config;
1282a6caeee8SXie Yongji     size_t size = offsetof(struct vduse_dev_config, config);
1283a6caeee8SXie Yongji 
1284e7156ff7SXie Yongji     if (!name || vduse_name_is_invalid(name) ||
1285a6caeee8SXie Yongji         !has_feature(features,  VIRTIO_F_VERSION_1) || !config ||
1286a6caeee8SXie Yongji         !config_size || !ops || !ops->enable_queue || !ops->disable_queue) {
1287a6caeee8SXie Yongji         fprintf(stderr, "Invalid parameter for vduse\n");
1288a6caeee8SXie Yongji         return NULL;
1289a6caeee8SXie Yongji     }
1290a6caeee8SXie Yongji 
1291a6caeee8SXie Yongji     dev = calloc(sizeof(VduseDev), 1);
1292a6caeee8SXie Yongji     if (!dev) {
1293a6caeee8SXie Yongji         fprintf(stderr, "Failed to allocate vduse device\n");
1294a6caeee8SXie Yongji         return NULL;
1295a6caeee8SXie Yongji     }
1296a6caeee8SXie Yongji 
1297a6caeee8SXie Yongji     ctrl_fd = open("/dev/vduse/control", O_RDWR);
1298a6caeee8SXie Yongji     if (ctrl_fd < 0) {
1299a6caeee8SXie Yongji         fprintf(stderr, "Failed to open /dev/vduse/control: %s\n",
1300a6caeee8SXie Yongji                 strerror(errno));
1301a6caeee8SXie Yongji         goto err_ctrl;
1302a6caeee8SXie Yongji     }
1303a6caeee8SXie Yongji 
1304a6caeee8SXie Yongji     version = VDUSE_API_VERSION;
1305a6caeee8SXie Yongji     if (ioctl(ctrl_fd, VDUSE_SET_API_VERSION, &version)) {
1306a6caeee8SXie Yongji         fprintf(stderr, "Failed to set api version %" PRIu64 ": %s\n",
1307a6caeee8SXie Yongji                 version, strerror(errno));
1308a6caeee8SXie Yongji         goto err_dev;
1309a6caeee8SXie Yongji     }
1310a6caeee8SXie Yongji 
1311a6caeee8SXie Yongji     dev_config = calloc(size + config_size, 1);
1312a6caeee8SXie Yongji     if (!dev_config) {
1313a6caeee8SXie Yongji         fprintf(stderr, "Failed to allocate config space\n");
1314a6caeee8SXie Yongji         goto err_dev;
1315a6caeee8SXie Yongji     }
1316a6caeee8SXie Yongji 
131757bc6e40SPhilippe Mathieu-Daudé     assert(!vduse_name_is_invalid(name));
131857bc6e40SPhilippe Mathieu-Daudé     strcpy(dev_config->name, name);
1319a6caeee8SXie Yongji     dev_config->device_id = device_id;
1320a6caeee8SXie Yongji     dev_config->vendor_id = vendor_id;
1321a6caeee8SXie Yongji     dev_config->features = features;
1322a6caeee8SXie Yongji     dev_config->vq_num = num_queues;
1323a6caeee8SXie Yongji     dev_config->vq_align = VDUSE_VQ_ALIGN;
1324a6caeee8SXie Yongji     dev_config->config_size = config_size;
1325a6caeee8SXie Yongji     memcpy(dev_config->config, config, config_size);
1326a6caeee8SXie Yongji 
1327a6caeee8SXie Yongji     ret = ioctl(ctrl_fd, VDUSE_CREATE_DEV, dev_config);
1328a6caeee8SXie Yongji     free(dev_config);
1329d043e2dbSXie Yongji     if (ret && errno != EEXIST) {
1330a6caeee8SXie Yongji         fprintf(stderr, "Failed to create vduse device %s: %s\n",
1331a6caeee8SXie Yongji                 name, strerror(errno));
1332a6caeee8SXie Yongji         goto err_dev;
1333a6caeee8SXie Yongji     }
1334a6caeee8SXie Yongji     dev->ctrl_fd = ctrl_fd;
1335a6caeee8SXie Yongji 
1336a6caeee8SXie Yongji     ret = vduse_dev_init(dev, name, num_queues, ops, priv);
1337a6caeee8SXie Yongji     if (ret < 0) {
1338a6caeee8SXie Yongji         fprintf(stderr, "Failed to init vduse device %s: %s\n",
1339630179b7SXie Yongji                 name, strerror(-ret));
1340a6caeee8SXie Yongji         goto err;
1341a6caeee8SXie Yongji     }
1342a6caeee8SXie Yongji 
1343a6caeee8SXie Yongji     return dev;
1344a6caeee8SXie Yongji err:
1345a6caeee8SXie Yongji     ioctl(ctrl_fd, VDUSE_DESTROY_DEV, name);
1346a6caeee8SXie Yongji err_dev:
1347a6caeee8SXie Yongji     close(ctrl_fd);
1348a6caeee8SXie Yongji err_ctrl:
1349a6caeee8SXie Yongji     free(dev);
1350a6caeee8SXie Yongji 
1351a6caeee8SXie Yongji     return NULL;
1352a6caeee8SXie Yongji }
1353a6caeee8SXie Yongji 
vduse_dev_destroy(VduseDev * dev)1354a6caeee8SXie Yongji int vduse_dev_destroy(VduseDev *dev)
1355a6caeee8SXie Yongji {
1356d043e2dbSXie Yongji     size_t log_size = dev->num_queues * vduse_vq_log_size(VIRTQUEUE_MAX_SIZE);
1357d043e2dbSXie Yongji     int i, ret = 0;
1358a6caeee8SXie Yongji 
1359d043e2dbSXie Yongji     if (dev->log) {
1360d043e2dbSXie Yongji         munmap(dev->log, log_size);
1361d043e2dbSXie Yongji     }
1362d043e2dbSXie Yongji     for (i = 0; i < dev->num_queues; i++) {
1363d043e2dbSXie Yongji         free(dev->vqs[i].resubmit_list);
1364d043e2dbSXie Yongji     }
1365a6caeee8SXie Yongji     free(dev->vqs);
1366a6caeee8SXie Yongji     if (dev->fd >= 0) {
1367a6caeee8SXie Yongji         close(dev->fd);
1368a6caeee8SXie Yongji         dev->fd = -1;
1369a6caeee8SXie Yongji     }
1370a6caeee8SXie Yongji     if (dev->ctrl_fd >= 0) {
1371a6caeee8SXie Yongji         if (ioctl(dev->ctrl_fd, VDUSE_DESTROY_DEV, dev->name)) {
1372a6caeee8SXie Yongji             ret = -errno;
1373a6caeee8SXie Yongji         }
1374a6caeee8SXie Yongji         close(dev->ctrl_fd);
1375a6caeee8SXie Yongji         dev->ctrl_fd = -1;
1376a6caeee8SXie Yongji     }
1377a6caeee8SXie Yongji     free(dev->name);
1378a6caeee8SXie Yongji     free(dev);
1379a6caeee8SXie Yongji 
1380a6caeee8SXie Yongji     return ret;
1381a6caeee8SXie Yongji }
1382