1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34
35 #include "iova_domain.h"
36
37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC "vDPA Device in Userspace"
39 #define DRV_LICENSE "GPL v2"
40
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49 #define IRQ_UNBOUND -1
50
51 struct vduse_virtqueue {
52 u16 index;
53 u16 num_max;
54 u32 num;
55 u64 desc_addr;
56 u64 driver_addr;
57 u64 device_addr;
58 struct vdpa_vq_state state;
59 bool ready;
60 bool kicked;
61 spinlock_t kick_lock;
62 spinlock_t irq_lock;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
69 struct kobject kobj;
70 };
71
72 struct vduse_dev;
73
74 struct vduse_vdpa {
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
77 };
78
79 struct vduse_umem {
80 unsigned long iova;
81 unsigned long npages;
82 struct page **pages;
83 struct mm_struct *mm;
84 };
85
86 struct vduse_dev {
87 struct vduse_vdpa *vdev;
88 struct device *dev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
91 char *name;
92 struct mutex lock;
93 spinlock_t msg_lock;
94 u64 msg_unique;
95 u32 msg_timeout;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
101 spinlock_t irq_lock;
102 struct rw_semaphore rwsem;
103 int minor;
104 bool broken;
105 bool connected;
106 u64 api_version;
107 u64 device_features;
108 u64 driver_features;
109 u32 device_id;
110 u32 vendor_id;
111 u32 generation;
112 u32 config_size;
113 void *config;
114 u8 status;
115 u32 vq_num;
116 u32 vq_align;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
121 };
122
123 struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
128 bool completed;
129 };
130
131 struct vduse_control {
132 u64 api_version;
133 };
134
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137
138 static dev_t vduse_major;
139 static struct cdev vduse_ctrl_cdev;
140 static struct cdev vduse_cdev;
141 static struct workqueue_struct *vduse_irq_wq;
142 static struct workqueue_struct *vduse_irq_bound_wq;
143
144 static u32 allowed_device_id[] = {
145 VIRTIO_ID_BLOCK,
146 VIRTIO_ID_NET,
147 };
148
vdpa_to_vduse(struct vdpa_device * vdpa)149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
150 {
151 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
152
153 return vdev->dev;
154 }
155
dev_to_vduse(struct device * dev)156 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
157 {
158 struct vdpa_device *vdpa = dev_to_vdpa(dev);
159
160 return vdpa_to_vduse(vdpa);
161 }
162
vduse_find_msg(struct list_head * head,uint32_t request_id)163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
164 uint32_t request_id)
165 {
166 struct vduse_dev_msg *msg;
167
168 list_for_each_entry(msg, head, list) {
169 if (msg->req.request_id == request_id) {
170 list_del(&msg->list);
171 return msg;
172 }
173 }
174
175 return NULL;
176 }
177
vduse_dequeue_msg(struct list_head * head)178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
179 {
180 struct vduse_dev_msg *msg = NULL;
181
182 if (!list_empty(head)) {
183 msg = list_first_entry(head, struct vduse_dev_msg, list);
184 list_del(&msg->list);
185 }
186
187 return msg;
188 }
189
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)190 static void vduse_enqueue_msg(struct list_head *head,
191 struct vduse_dev_msg *msg)
192 {
193 list_add_tail(&msg->list, head);
194 }
195
vduse_dev_broken(struct vduse_dev * dev)196 static void vduse_dev_broken(struct vduse_dev *dev)
197 {
198 struct vduse_dev_msg *msg, *tmp;
199
200 if (unlikely(dev->broken))
201 return;
202
203 list_splice_init(&dev->recv_list, &dev->send_list);
204 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205 list_del(&msg->list);
206 msg->completed = 1;
207 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208 wake_up(&msg->waitq);
209 }
210 dev->broken = true;
211 wake_up(&dev->waitq);
212 }
213
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)214 static int vduse_dev_msg_sync(struct vduse_dev *dev,
215 struct vduse_dev_msg *msg)
216 {
217 int ret;
218
219 if (unlikely(dev->broken))
220 return -EIO;
221
222 init_waitqueue_head(&msg->waitq);
223 spin_lock(&dev->msg_lock);
224 if (unlikely(dev->broken)) {
225 spin_unlock(&dev->msg_lock);
226 return -EIO;
227 }
228 msg->req.request_id = dev->msg_unique++;
229 vduse_enqueue_msg(&dev->send_list, msg);
230 wake_up(&dev->waitq);
231 spin_unlock(&dev->msg_lock);
232 if (dev->msg_timeout)
233 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234 (long)dev->msg_timeout * HZ);
235 else
236 ret = wait_event_killable(msg->waitq, msg->completed);
237
238 spin_lock(&dev->msg_lock);
239 if (!msg->completed) {
240 list_del(&msg->list);
241 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242 /* Mark the device as malfunction when there is a timeout */
243 if (!ret)
244 vduse_dev_broken(dev);
245 }
246 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247 spin_unlock(&dev->msg_lock);
248
249 return ret;
250 }
251
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253 struct vduse_virtqueue *vq,
254 struct vdpa_vq_state_packed *packed)
255 {
256 struct vduse_dev_msg msg = { 0 };
257 int ret;
258
259 msg.req.type = VDUSE_GET_VQ_STATE;
260 msg.req.vq_state.index = vq->index;
261
262 ret = vduse_dev_msg_sync(dev, &msg);
263 if (ret)
264 return ret;
265
266 packed->last_avail_counter =
267 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268 packed->last_avail_idx =
269 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270 packed->last_used_counter =
271 msg.resp.vq_state.packed.last_used_counter & 0x0001;
272 packed->last_used_idx =
273 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
274
275 return 0;
276 }
277
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279 struct vduse_virtqueue *vq,
280 struct vdpa_vq_state_split *split)
281 {
282 struct vduse_dev_msg msg = { 0 };
283 int ret;
284
285 msg.req.type = VDUSE_GET_VQ_STATE;
286 msg.req.vq_state.index = vq->index;
287
288 ret = vduse_dev_msg_sync(dev, &msg);
289 if (ret)
290 return ret;
291
292 split->avail_index = msg.resp.vq_state.split.avail_index;
293
294 return 0;
295 }
296
vduse_dev_set_status(struct vduse_dev * dev,u8 status)297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
298 {
299 struct vduse_dev_msg msg = { 0 };
300
301 msg.req.type = VDUSE_SET_STATUS;
302 msg.req.s.status = status;
303
304 return vduse_dev_msg_sync(dev, &msg);
305 }
306
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)307 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
308 u64 start, u64 last)
309 {
310 struct vduse_dev_msg msg = { 0 };
311
312 if (last < start)
313 return -EINVAL;
314
315 msg.req.type = VDUSE_UPDATE_IOTLB;
316 msg.req.iova.start = start;
317 msg.req.iova.last = last;
318
319 return vduse_dev_msg_sync(dev, &msg);
320 }
321
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
323 {
324 struct file *file = iocb->ki_filp;
325 struct vduse_dev *dev = file->private_data;
326 struct vduse_dev_msg *msg;
327 int size = sizeof(struct vduse_dev_request);
328 ssize_t ret;
329
330 if (iov_iter_count(to) < size)
331 return -EINVAL;
332
333 spin_lock(&dev->msg_lock);
334 while (1) {
335 msg = vduse_dequeue_msg(&dev->send_list);
336 if (msg)
337 break;
338
339 ret = -EAGAIN;
340 if (file->f_flags & O_NONBLOCK)
341 goto unlock;
342
343 spin_unlock(&dev->msg_lock);
344 ret = wait_event_interruptible_exclusive(dev->waitq,
345 !list_empty(&dev->send_list));
346 if (ret)
347 return ret;
348
349 spin_lock(&dev->msg_lock);
350 }
351 spin_unlock(&dev->msg_lock);
352 ret = copy_to_iter(&msg->req, size, to);
353 spin_lock(&dev->msg_lock);
354 if (ret != size) {
355 ret = -EFAULT;
356 vduse_enqueue_msg(&dev->send_list, msg);
357 goto unlock;
358 }
359 vduse_enqueue_msg(&dev->recv_list, msg);
360 unlock:
361 spin_unlock(&dev->msg_lock);
362
363 return ret;
364 }
365
is_mem_zero(const char * ptr,int size)366 static bool is_mem_zero(const char *ptr, int size)
367 {
368 int i;
369
370 for (i = 0; i < size; i++) {
371 if (ptr[i])
372 return false;
373 }
374 return true;
375 }
376
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
378 {
379 struct file *file = iocb->ki_filp;
380 struct vduse_dev *dev = file->private_data;
381 struct vduse_dev_response resp;
382 struct vduse_dev_msg *msg;
383 size_t ret;
384
385 ret = copy_from_iter(&resp, sizeof(resp), from);
386 if (ret != sizeof(resp))
387 return -EINVAL;
388
389 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
390 return -EINVAL;
391
392 spin_lock(&dev->msg_lock);
393 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
394 if (!msg) {
395 ret = -ENOENT;
396 goto unlock;
397 }
398
399 memcpy(&msg->resp, &resp, sizeof(resp));
400 msg->completed = 1;
401 wake_up(&msg->waitq);
402 unlock:
403 spin_unlock(&dev->msg_lock);
404
405 return ret;
406 }
407
vduse_dev_poll(struct file * file,poll_table * wait)408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
409 {
410 struct vduse_dev *dev = file->private_data;
411 __poll_t mask = 0;
412
413 poll_wait(file, &dev->waitq, wait);
414
415 spin_lock(&dev->msg_lock);
416
417 if (unlikely(dev->broken))
418 mask |= EPOLLERR;
419 if (!list_empty(&dev->send_list))
420 mask |= EPOLLIN | EPOLLRDNORM;
421 if (!list_empty(&dev->recv_list))
422 mask |= EPOLLOUT | EPOLLWRNORM;
423
424 spin_unlock(&dev->msg_lock);
425
426 return mask;
427 }
428
vduse_dev_reset(struct vduse_dev * dev)429 static void vduse_dev_reset(struct vduse_dev *dev)
430 {
431 int i;
432 struct vduse_iova_domain *domain = dev->domain;
433
434 /* The coherent mappings are handled in vduse_dev_free_coherent() */
435 if (domain && domain->bounce_map)
436 vduse_domain_reset_bounce_map(domain);
437
438 down_write(&dev->rwsem);
439
440 dev->status = 0;
441 dev->driver_features = 0;
442 dev->generation++;
443 spin_lock(&dev->irq_lock);
444 dev->config_cb.callback = NULL;
445 dev->config_cb.private = NULL;
446 spin_unlock(&dev->irq_lock);
447 flush_work(&dev->inject);
448
449 for (i = 0; i < dev->vq_num; i++) {
450 struct vduse_virtqueue *vq = dev->vqs[i];
451
452 vq->ready = false;
453 vq->desc_addr = 0;
454 vq->driver_addr = 0;
455 vq->device_addr = 0;
456 vq->num = 0;
457 memset(&vq->state, 0, sizeof(vq->state));
458
459 spin_lock(&vq->kick_lock);
460 vq->kicked = false;
461 if (vq->kickfd)
462 eventfd_ctx_put(vq->kickfd);
463 vq->kickfd = NULL;
464 spin_unlock(&vq->kick_lock);
465
466 spin_lock(&vq->irq_lock);
467 vq->cb.callback = NULL;
468 vq->cb.private = NULL;
469 vq->cb.trigger = NULL;
470 spin_unlock(&vq->irq_lock);
471 flush_work(&vq->inject);
472 flush_work(&vq->kick);
473 }
474
475 up_write(&dev->rwsem);
476 }
477
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479 u64 desc_area, u64 driver_area,
480 u64 device_area)
481 {
482 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
483 struct vduse_virtqueue *vq = dev->vqs[idx];
484
485 vq->desc_addr = desc_area;
486 vq->driver_addr = driver_area;
487 vq->device_addr = device_area;
488
489 return 0;
490 }
491
vduse_vq_kick(struct vduse_virtqueue * vq)492 static void vduse_vq_kick(struct vduse_virtqueue *vq)
493 {
494 spin_lock(&vq->kick_lock);
495 if (!vq->ready)
496 goto unlock;
497
498 if (vq->kickfd)
499 eventfd_signal(vq->kickfd);
500 else
501 vq->kicked = true;
502 unlock:
503 spin_unlock(&vq->kick_lock);
504 }
505
vduse_vq_kick_work(struct work_struct * work)506 static void vduse_vq_kick_work(struct work_struct *work)
507 {
508 struct vduse_virtqueue *vq = container_of(work,
509 struct vduse_virtqueue, kick);
510
511 vduse_vq_kick(vq);
512 }
513
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
515 {
516 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
517 struct vduse_virtqueue *vq = dev->vqs[idx];
518
519 if (!eventfd_signal_allowed()) {
520 schedule_work(&vq->kick);
521 return;
522 }
523 vduse_vq_kick(vq);
524 }
525
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527 struct vdpa_callback *cb)
528 {
529 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
530 struct vduse_virtqueue *vq = dev->vqs[idx];
531
532 spin_lock(&vq->irq_lock);
533 vq->cb.callback = cb->callback;
534 vq->cb.private = cb->private;
535 vq->cb.trigger = cb->trigger;
536 spin_unlock(&vq->irq_lock);
537 }
538
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
540 {
541 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 struct vduse_virtqueue *vq = dev->vqs[idx];
543
544 vq->num = num;
545 }
546
vduse_vdpa_get_vq_size(struct vdpa_device * vdpa,u16 idx)547 static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
548 {
549 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
550 struct vduse_virtqueue *vq = dev->vqs[idx];
551
552 if (vq->num)
553 return vq->num;
554 else
555 return vq->num_max;
556 }
557
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)558 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
559 u16 idx, bool ready)
560 {
561 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
562 struct vduse_virtqueue *vq = dev->vqs[idx];
563
564 vq->ready = ready;
565 }
566
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)567 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
568 {
569 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
570 struct vduse_virtqueue *vq = dev->vqs[idx];
571
572 return vq->ready;
573 }
574
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)575 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
576 const struct vdpa_vq_state *state)
577 {
578 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
579 struct vduse_virtqueue *vq = dev->vqs[idx];
580
581 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
582 vq->state.packed.last_avail_counter =
583 state->packed.last_avail_counter;
584 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
585 vq->state.packed.last_used_counter =
586 state->packed.last_used_counter;
587 vq->state.packed.last_used_idx = state->packed.last_used_idx;
588 } else
589 vq->state.split.avail_index = state->split.avail_index;
590
591 return 0;
592 }
593
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)594 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
595 struct vdpa_vq_state *state)
596 {
597 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598 struct vduse_virtqueue *vq = dev->vqs[idx];
599
600 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
601 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
602
603 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
604 }
605
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)606 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
607 {
608 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
609
610 return dev->vq_align;
611 }
612
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)613 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
614 {
615 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
616
617 return dev->device_features;
618 }
619
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)620 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
621 {
622 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
623
624 dev->driver_features = features;
625 return 0;
626 }
627
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)628 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
629 {
630 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
631
632 return dev->driver_features;
633 }
634
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)635 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
636 struct vdpa_callback *cb)
637 {
638 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
639
640 spin_lock(&dev->irq_lock);
641 dev->config_cb.callback = cb->callback;
642 dev->config_cb.private = cb->private;
643 spin_unlock(&dev->irq_lock);
644 }
645
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)646 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
647 {
648 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
649 u16 num_max = 0;
650 int i;
651
652 for (i = 0; i < dev->vq_num; i++)
653 if (num_max < dev->vqs[i]->num_max)
654 num_max = dev->vqs[i]->num_max;
655
656 return num_max;
657 }
658
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)659 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
660 {
661 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
662
663 return dev->device_id;
664 }
665
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)666 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
667 {
668 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
669
670 return dev->vendor_id;
671 }
672
vduse_vdpa_get_status(struct vdpa_device * vdpa)673 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
674 {
675 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
676
677 return dev->status;
678 }
679
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)680 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
681 {
682 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
683
684 if (vduse_dev_set_status(dev, status))
685 return;
686
687 dev->status = status;
688 }
689
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)690 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
691 {
692 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
693
694 return dev->config_size;
695 }
696
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)697 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
698 void *buf, unsigned int len)
699 {
700 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
701
702 /* Initialize the buffer in case of partial copy. */
703 memset(buf, 0, len);
704
705 if (offset > dev->config_size)
706 return;
707
708 if (len > dev->config_size - offset)
709 len = dev->config_size - offset;
710
711 memcpy(buf, dev->config + offset, len);
712 }
713
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)714 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
715 const void *buf, unsigned int len)
716 {
717 /* Now we only support read-only configuration space */
718 }
719
vduse_vdpa_reset(struct vdpa_device * vdpa)720 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
721 {
722 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
723 int ret = vduse_dev_set_status(dev, 0);
724
725 vduse_dev_reset(dev);
726
727 return ret;
728 }
729
vduse_vdpa_get_generation(struct vdpa_device * vdpa)730 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
731 {
732 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
733
734 return dev->generation;
735 }
736
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)737 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
738 const struct cpumask *cpu_mask)
739 {
740 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
741
742 if (cpu_mask)
743 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
744 else
745 cpumask_setall(&dev->vqs[idx]->irq_affinity);
746
747 return 0;
748 }
749
750 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)751 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
752 {
753 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
754
755 return &dev->vqs[idx]->irq_affinity;
756 }
757
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)758 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
759 unsigned int asid,
760 struct vhost_iotlb *iotlb)
761 {
762 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
763 int ret;
764
765 ret = vduse_domain_set_map(dev->domain, iotlb);
766 if (ret)
767 return ret;
768
769 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
770 if (ret) {
771 vduse_domain_clear_map(dev->domain, iotlb);
772 return ret;
773 }
774
775 return 0;
776 }
777
vduse_vdpa_free(struct vdpa_device * vdpa)778 static void vduse_vdpa_free(struct vdpa_device *vdpa)
779 {
780 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
781
782 dev->vdev = NULL;
783 }
784
785 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
786 .set_vq_address = vduse_vdpa_set_vq_address,
787 .kick_vq = vduse_vdpa_kick_vq,
788 .set_vq_cb = vduse_vdpa_set_vq_cb,
789 .set_vq_num = vduse_vdpa_set_vq_num,
790 .get_vq_size = vduse_vdpa_get_vq_size,
791 .set_vq_ready = vduse_vdpa_set_vq_ready,
792 .get_vq_ready = vduse_vdpa_get_vq_ready,
793 .set_vq_state = vduse_vdpa_set_vq_state,
794 .get_vq_state = vduse_vdpa_get_vq_state,
795 .get_vq_align = vduse_vdpa_get_vq_align,
796 .get_device_features = vduse_vdpa_get_device_features,
797 .set_driver_features = vduse_vdpa_set_driver_features,
798 .get_driver_features = vduse_vdpa_get_driver_features,
799 .set_config_cb = vduse_vdpa_set_config_cb,
800 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
801 .get_device_id = vduse_vdpa_get_device_id,
802 .get_vendor_id = vduse_vdpa_get_vendor_id,
803 .get_status = vduse_vdpa_get_status,
804 .set_status = vduse_vdpa_set_status,
805 .get_config_size = vduse_vdpa_get_config_size,
806 .get_config = vduse_vdpa_get_config,
807 .set_config = vduse_vdpa_set_config,
808 .get_generation = vduse_vdpa_get_generation,
809 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
810 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
811 .reset = vduse_vdpa_reset,
812 .set_map = vduse_vdpa_set_map,
813 .free = vduse_vdpa_free,
814 };
815
vduse_dev_sync_single_for_device(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)816 static void vduse_dev_sync_single_for_device(struct device *dev,
817 dma_addr_t dma_addr, size_t size,
818 enum dma_data_direction dir)
819 {
820 struct vduse_dev *vdev = dev_to_vduse(dev);
821 struct vduse_iova_domain *domain = vdev->domain;
822
823 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
824 }
825
vduse_dev_sync_single_for_cpu(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)826 static void vduse_dev_sync_single_for_cpu(struct device *dev,
827 dma_addr_t dma_addr, size_t size,
828 enum dma_data_direction dir)
829 {
830 struct vduse_dev *vdev = dev_to_vduse(dev);
831 struct vduse_iova_domain *domain = vdev->domain;
832
833 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
834 }
835
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)836 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
837 unsigned long offset, size_t size,
838 enum dma_data_direction dir,
839 unsigned long attrs)
840 {
841 struct vduse_dev *vdev = dev_to_vduse(dev);
842 struct vduse_iova_domain *domain = vdev->domain;
843
844 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
845 }
846
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)847 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
848 size_t size, enum dma_data_direction dir,
849 unsigned long attrs)
850 {
851 struct vduse_dev *vdev = dev_to_vduse(dev);
852 struct vduse_iova_domain *domain = vdev->domain;
853
854 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
855 }
856
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)857 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
858 dma_addr_t *dma_addr, gfp_t flag,
859 unsigned long attrs)
860 {
861 struct vduse_dev *vdev = dev_to_vduse(dev);
862 struct vduse_iova_domain *domain = vdev->domain;
863 unsigned long iova;
864 void *addr;
865
866 *dma_addr = DMA_MAPPING_ERROR;
867 addr = vduse_domain_alloc_coherent(domain, size,
868 (dma_addr_t *)&iova, flag, attrs);
869 if (!addr)
870 return NULL;
871
872 *dma_addr = (dma_addr_t)iova;
873
874 return addr;
875 }
876
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)877 static void vduse_dev_free_coherent(struct device *dev, size_t size,
878 void *vaddr, dma_addr_t dma_addr,
879 unsigned long attrs)
880 {
881 struct vduse_dev *vdev = dev_to_vduse(dev);
882 struct vduse_iova_domain *domain = vdev->domain;
883
884 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
885 }
886
vduse_dev_max_mapping_size(struct device * dev)887 static size_t vduse_dev_max_mapping_size(struct device *dev)
888 {
889 struct vduse_dev *vdev = dev_to_vduse(dev);
890 struct vduse_iova_domain *domain = vdev->domain;
891
892 return domain->bounce_size;
893 }
894
895 static const struct dma_map_ops vduse_dev_dma_ops = {
896 .sync_single_for_device = vduse_dev_sync_single_for_device,
897 .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
898 .map_page = vduse_dev_map_page,
899 .unmap_page = vduse_dev_unmap_page,
900 .alloc = vduse_dev_alloc_coherent,
901 .free = vduse_dev_free_coherent,
902 .max_mapping_size = vduse_dev_max_mapping_size,
903 };
904
perm_to_file_flags(u8 perm)905 static unsigned int perm_to_file_flags(u8 perm)
906 {
907 unsigned int flags = 0;
908
909 switch (perm) {
910 case VDUSE_ACCESS_WO:
911 flags |= O_WRONLY;
912 break;
913 case VDUSE_ACCESS_RO:
914 flags |= O_RDONLY;
915 break;
916 case VDUSE_ACCESS_RW:
917 flags |= O_RDWR;
918 break;
919 default:
920 WARN(1, "invalidate vhost IOTLB permission\n");
921 break;
922 }
923
924 return flags;
925 }
926
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)927 static int vduse_kickfd_setup(struct vduse_dev *dev,
928 struct vduse_vq_eventfd *eventfd)
929 {
930 struct eventfd_ctx *ctx = NULL;
931 struct vduse_virtqueue *vq;
932 u32 index;
933
934 if (eventfd->index >= dev->vq_num)
935 return -EINVAL;
936
937 index = array_index_nospec(eventfd->index, dev->vq_num);
938 vq = dev->vqs[index];
939 if (eventfd->fd >= 0) {
940 ctx = eventfd_ctx_fdget(eventfd->fd);
941 if (IS_ERR(ctx))
942 return PTR_ERR(ctx);
943 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
944 return 0;
945
946 spin_lock(&vq->kick_lock);
947 if (vq->kickfd)
948 eventfd_ctx_put(vq->kickfd);
949 vq->kickfd = ctx;
950 if (vq->ready && vq->kicked && vq->kickfd) {
951 eventfd_signal(vq->kickfd);
952 vq->kicked = false;
953 }
954 spin_unlock(&vq->kick_lock);
955
956 return 0;
957 }
958
vduse_dev_is_ready(struct vduse_dev * dev)959 static bool vduse_dev_is_ready(struct vduse_dev *dev)
960 {
961 int i;
962
963 for (i = 0; i < dev->vq_num; i++)
964 if (!dev->vqs[i]->num_max)
965 return false;
966
967 return true;
968 }
969
vduse_dev_irq_inject(struct work_struct * work)970 static void vduse_dev_irq_inject(struct work_struct *work)
971 {
972 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
973
974 spin_lock_bh(&dev->irq_lock);
975 if (dev->config_cb.callback)
976 dev->config_cb.callback(dev->config_cb.private);
977 spin_unlock_bh(&dev->irq_lock);
978 }
979
vduse_vq_irq_inject(struct work_struct * work)980 static void vduse_vq_irq_inject(struct work_struct *work)
981 {
982 struct vduse_virtqueue *vq = container_of(work,
983 struct vduse_virtqueue, inject);
984
985 spin_lock_bh(&vq->irq_lock);
986 if (vq->ready && vq->cb.callback)
987 vq->cb.callback(vq->cb.private);
988 spin_unlock_bh(&vq->irq_lock);
989 }
990
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)991 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
992 {
993 bool signal = false;
994
995 if (!vq->cb.trigger)
996 return false;
997
998 spin_lock_irq(&vq->irq_lock);
999 if (vq->ready && vq->cb.trigger) {
1000 eventfd_signal(vq->cb.trigger);
1001 signal = true;
1002 }
1003 spin_unlock_irq(&vq->irq_lock);
1004
1005 return signal;
1006 }
1007
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)1008 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
1009 struct work_struct *irq_work,
1010 int irq_effective_cpu)
1011 {
1012 int ret = -EINVAL;
1013
1014 down_read(&dev->rwsem);
1015 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1016 goto unlock;
1017
1018 ret = 0;
1019 if (irq_effective_cpu == IRQ_UNBOUND)
1020 queue_work(vduse_irq_wq, irq_work);
1021 else
1022 queue_work_on(irq_effective_cpu,
1023 vduse_irq_bound_wq, irq_work);
1024 unlock:
1025 up_read(&dev->rwsem);
1026
1027 return ret;
1028 }
1029
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)1030 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
1031 u64 iova, u64 size)
1032 {
1033 int ret;
1034
1035 mutex_lock(&dev->mem_lock);
1036 ret = -ENOENT;
1037 if (!dev->umem)
1038 goto unlock;
1039
1040 ret = -EINVAL;
1041 if (!dev->domain)
1042 goto unlock;
1043
1044 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1045 goto unlock;
1046
1047 vduse_domain_remove_user_bounce_pages(dev->domain);
1048 unpin_user_pages_dirty_lock(dev->umem->pages,
1049 dev->umem->npages, true);
1050 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1051 mmdrop(dev->umem->mm);
1052 vfree(dev->umem->pages);
1053 kfree(dev->umem);
1054 dev->umem = NULL;
1055 ret = 0;
1056 unlock:
1057 mutex_unlock(&dev->mem_lock);
1058 return ret;
1059 }
1060
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1061 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1062 u64 iova, u64 uaddr, u64 size)
1063 {
1064 struct page **page_list = NULL;
1065 struct vduse_umem *umem = NULL;
1066 long pinned = 0;
1067 unsigned long npages, lock_limit;
1068 int ret;
1069
1070 if (!dev->domain || !dev->domain->bounce_map ||
1071 size != dev->domain->bounce_size ||
1072 iova != 0 || uaddr & ~PAGE_MASK)
1073 return -EINVAL;
1074
1075 mutex_lock(&dev->mem_lock);
1076 ret = -EEXIST;
1077 if (dev->umem)
1078 goto unlock;
1079
1080 ret = -ENOMEM;
1081 npages = size >> PAGE_SHIFT;
1082 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1083 GFP_KERNEL_ACCOUNT);
1084 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1085 if (!page_list || !umem)
1086 goto unlock;
1087
1088 mmap_read_lock(current->mm);
1089
1090 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1091 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
1092 goto out;
1093
1094 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1095 page_list);
1096 if (pinned != npages) {
1097 ret = pinned < 0 ? pinned : -ENOMEM;
1098 goto out;
1099 }
1100
1101 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1102 page_list, pinned);
1103 if (ret)
1104 goto out;
1105
1106 atomic64_add(npages, ¤t->mm->pinned_vm);
1107
1108 umem->pages = page_list;
1109 umem->npages = pinned;
1110 umem->iova = iova;
1111 umem->mm = current->mm;
1112 mmgrab(current->mm);
1113
1114 dev->umem = umem;
1115 out:
1116 if (ret && pinned > 0)
1117 unpin_user_pages(page_list, pinned);
1118
1119 mmap_read_unlock(current->mm);
1120 unlock:
1121 if (ret) {
1122 vfree(page_list);
1123 kfree(umem);
1124 }
1125 mutex_unlock(&dev->mem_lock);
1126 return ret;
1127 }
1128
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1129 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1130 {
1131 int curr_cpu = vq->irq_effective_cpu;
1132
1133 while (true) {
1134 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1135 if (cpu_online(curr_cpu))
1136 break;
1137
1138 if (curr_cpu >= nr_cpu_ids)
1139 curr_cpu = IRQ_UNBOUND;
1140 }
1141
1142 vq->irq_effective_cpu = curr_cpu;
1143 }
1144
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1145 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1146 unsigned long arg)
1147 {
1148 struct vduse_dev *dev = file->private_data;
1149 void __user *argp = (void __user *)arg;
1150 int ret;
1151
1152 if (unlikely(dev->broken))
1153 return -EPERM;
1154
1155 switch (cmd) {
1156 case VDUSE_IOTLB_GET_FD: {
1157 struct vduse_iotlb_entry entry;
1158 struct vhost_iotlb_map *map;
1159 struct vdpa_map_file *map_file;
1160 struct file *f = NULL;
1161
1162 ret = -EFAULT;
1163 if (copy_from_user(&entry, argp, sizeof(entry)))
1164 break;
1165
1166 ret = -EINVAL;
1167 if (entry.start > entry.last)
1168 break;
1169
1170 mutex_lock(&dev->domain_lock);
1171 if (!dev->domain) {
1172 mutex_unlock(&dev->domain_lock);
1173 break;
1174 }
1175 spin_lock(&dev->domain->iotlb_lock);
1176 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1177 entry.start, entry.last);
1178 if (map) {
1179 map_file = (struct vdpa_map_file *)map->opaque;
1180 f = get_file(map_file->file);
1181 entry.offset = map_file->offset;
1182 entry.start = map->start;
1183 entry.last = map->last;
1184 entry.perm = map->perm;
1185 }
1186 spin_unlock(&dev->domain->iotlb_lock);
1187 mutex_unlock(&dev->domain_lock);
1188 ret = -EINVAL;
1189 if (!f)
1190 break;
1191
1192 ret = -EFAULT;
1193 if (copy_to_user(argp, &entry, sizeof(entry))) {
1194 fput(f);
1195 break;
1196 }
1197 ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
1198 fput(f);
1199 break;
1200 }
1201 case VDUSE_DEV_GET_FEATURES:
1202 /*
1203 * Just mirror what driver wrote here.
1204 * The driver is expected to check FEATURE_OK later.
1205 */
1206 ret = put_user(dev->driver_features, (u64 __user *)argp);
1207 break;
1208 case VDUSE_DEV_SET_CONFIG: {
1209 struct vduse_config_data config;
1210 unsigned long size = offsetof(struct vduse_config_data,
1211 buffer);
1212
1213 ret = -EFAULT;
1214 if (copy_from_user(&config, argp, size))
1215 break;
1216
1217 ret = -EINVAL;
1218 if (config.offset > dev->config_size ||
1219 config.length == 0 ||
1220 config.length > dev->config_size - config.offset)
1221 break;
1222
1223 ret = -EFAULT;
1224 if (copy_from_user(dev->config + config.offset, argp + size,
1225 config.length))
1226 break;
1227
1228 ret = 0;
1229 break;
1230 }
1231 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1232 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1233 break;
1234 case VDUSE_VQ_SETUP: {
1235 struct vduse_vq_config config;
1236 u32 index;
1237
1238 ret = -EFAULT;
1239 if (copy_from_user(&config, argp, sizeof(config)))
1240 break;
1241
1242 ret = -EINVAL;
1243 if (config.index >= dev->vq_num)
1244 break;
1245
1246 if (!is_mem_zero((const char *)config.reserved,
1247 sizeof(config.reserved)))
1248 break;
1249
1250 index = array_index_nospec(config.index, dev->vq_num);
1251 dev->vqs[index]->num_max = config.max_size;
1252 ret = 0;
1253 break;
1254 }
1255 case VDUSE_VQ_GET_INFO: {
1256 struct vduse_vq_info vq_info;
1257 struct vduse_virtqueue *vq;
1258 u32 index;
1259
1260 ret = -EFAULT;
1261 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1262 break;
1263
1264 ret = -EINVAL;
1265 if (vq_info.index >= dev->vq_num)
1266 break;
1267
1268 index = array_index_nospec(vq_info.index, dev->vq_num);
1269 vq = dev->vqs[index];
1270 vq_info.desc_addr = vq->desc_addr;
1271 vq_info.driver_addr = vq->driver_addr;
1272 vq_info.device_addr = vq->device_addr;
1273 vq_info.num = vq->num;
1274
1275 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1276 vq_info.packed.last_avail_counter =
1277 vq->state.packed.last_avail_counter;
1278 vq_info.packed.last_avail_idx =
1279 vq->state.packed.last_avail_idx;
1280 vq_info.packed.last_used_counter =
1281 vq->state.packed.last_used_counter;
1282 vq_info.packed.last_used_idx =
1283 vq->state.packed.last_used_idx;
1284 } else
1285 vq_info.split.avail_index =
1286 vq->state.split.avail_index;
1287
1288 vq_info.ready = vq->ready;
1289
1290 ret = -EFAULT;
1291 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1292 break;
1293
1294 ret = 0;
1295 break;
1296 }
1297 case VDUSE_VQ_SETUP_KICKFD: {
1298 struct vduse_vq_eventfd eventfd;
1299
1300 ret = -EFAULT;
1301 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1302 break;
1303
1304 ret = vduse_kickfd_setup(dev, &eventfd);
1305 break;
1306 }
1307 case VDUSE_VQ_INJECT_IRQ: {
1308 u32 index;
1309
1310 ret = -EFAULT;
1311 if (get_user(index, (u32 __user *)argp))
1312 break;
1313
1314 ret = -EINVAL;
1315 if (index >= dev->vq_num)
1316 break;
1317
1318 ret = 0;
1319 index = array_index_nospec(index, dev->vq_num);
1320 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1321 vduse_vq_update_effective_cpu(dev->vqs[index]);
1322 ret = vduse_dev_queue_irq_work(dev,
1323 &dev->vqs[index]->inject,
1324 dev->vqs[index]->irq_effective_cpu);
1325 }
1326 break;
1327 }
1328 case VDUSE_IOTLB_REG_UMEM: {
1329 struct vduse_iova_umem umem;
1330
1331 ret = -EFAULT;
1332 if (copy_from_user(&umem, argp, sizeof(umem)))
1333 break;
1334
1335 ret = -EINVAL;
1336 if (!is_mem_zero((const char *)umem.reserved,
1337 sizeof(umem.reserved)))
1338 break;
1339
1340 mutex_lock(&dev->domain_lock);
1341 ret = vduse_dev_reg_umem(dev, umem.iova,
1342 umem.uaddr, umem.size);
1343 mutex_unlock(&dev->domain_lock);
1344 break;
1345 }
1346 case VDUSE_IOTLB_DEREG_UMEM: {
1347 struct vduse_iova_umem umem;
1348
1349 ret = -EFAULT;
1350 if (copy_from_user(&umem, argp, sizeof(umem)))
1351 break;
1352
1353 ret = -EINVAL;
1354 if (!is_mem_zero((const char *)umem.reserved,
1355 sizeof(umem.reserved)))
1356 break;
1357 mutex_lock(&dev->domain_lock);
1358 ret = vduse_dev_dereg_umem(dev, umem.iova,
1359 umem.size);
1360 mutex_unlock(&dev->domain_lock);
1361 break;
1362 }
1363 case VDUSE_IOTLB_GET_INFO: {
1364 struct vduse_iova_info info;
1365 struct vhost_iotlb_map *map;
1366
1367 ret = -EFAULT;
1368 if (copy_from_user(&info, argp, sizeof(info)))
1369 break;
1370
1371 ret = -EINVAL;
1372 if (info.start > info.last)
1373 break;
1374
1375 if (!is_mem_zero((const char *)info.reserved,
1376 sizeof(info.reserved)))
1377 break;
1378
1379 mutex_lock(&dev->domain_lock);
1380 if (!dev->domain) {
1381 mutex_unlock(&dev->domain_lock);
1382 break;
1383 }
1384 spin_lock(&dev->domain->iotlb_lock);
1385 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1386 info.start, info.last);
1387 if (map) {
1388 info.start = map->start;
1389 info.last = map->last;
1390 info.capability = 0;
1391 if (dev->domain->bounce_map && map->start == 0 &&
1392 map->last == dev->domain->bounce_size - 1)
1393 info.capability |= VDUSE_IOVA_CAP_UMEM;
1394 }
1395 spin_unlock(&dev->domain->iotlb_lock);
1396 mutex_unlock(&dev->domain_lock);
1397 if (!map)
1398 break;
1399
1400 ret = -EFAULT;
1401 if (copy_to_user(argp, &info, sizeof(info)))
1402 break;
1403
1404 ret = 0;
1405 break;
1406 }
1407 default:
1408 ret = -ENOIOCTLCMD;
1409 break;
1410 }
1411
1412 return ret;
1413 }
1414
vduse_dev_release(struct inode * inode,struct file * file)1415 static int vduse_dev_release(struct inode *inode, struct file *file)
1416 {
1417 struct vduse_dev *dev = file->private_data;
1418
1419 mutex_lock(&dev->domain_lock);
1420 if (dev->domain)
1421 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1422 mutex_unlock(&dev->domain_lock);
1423 spin_lock(&dev->msg_lock);
1424 /* Make sure the inflight messages can processed after reconncection */
1425 list_splice_init(&dev->recv_list, &dev->send_list);
1426 spin_unlock(&dev->msg_lock);
1427 dev->connected = false;
1428
1429 return 0;
1430 }
1431
vduse_dev_get_from_minor(int minor)1432 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1433 {
1434 struct vduse_dev *dev;
1435
1436 mutex_lock(&vduse_lock);
1437 dev = idr_find(&vduse_idr, minor);
1438 mutex_unlock(&vduse_lock);
1439
1440 return dev;
1441 }
1442
vduse_dev_open(struct inode * inode,struct file * file)1443 static int vduse_dev_open(struct inode *inode, struct file *file)
1444 {
1445 int ret;
1446 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1447
1448 if (!dev)
1449 return -ENODEV;
1450
1451 ret = -EBUSY;
1452 mutex_lock(&dev->lock);
1453 if (dev->connected)
1454 goto unlock;
1455
1456 ret = 0;
1457 dev->connected = true;
1458 file->private_data = dev;
1459 unlock:
1460 mutex_unlock(&dev->lock);
1461
1462 return ret;
1463 }
1464
1465 static const struct file_operations vduse_dev_fops = {
1466 .owner = THIS_MODULE,
1467 .open = vduse_dev_open,
1468 .release = vduse_dev_release,
1469 .read_iter = vduse_dev_read_iter,
1470 .write_iter = vduse_dev_write_iter,
1471 .poll = vduse_dev_poll,
1472 .unlocked_ioctl = vduse_dev_ioctl,
1473 .compat_ioctl = compat_ptr_ioctl,
1474 .llseek = noop_llseek,
1475 };
1476
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1477 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1478 {
1479 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1480 }
1481
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1482 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1483 const char *buf, size_t count)
1484 {
1485 cpumask_var_t new_value;
1486 int ret;
1487
1488 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1489 return -ENOMEM;
1490
1491 ret = cpumask_parse(buf, new_value);
1492 if (ret)
1493 goto free_mask;
1494
1495 ret = -EINVAL;
1496 if (!cpumask_intersects(new_value, cpu_online_mask))
1497 goto free_mask;
1498
1499 cpumask_copy(&vq->irq_affinity, new_value);
1500 ret = count;
1501 free_mask:
1502 free_cpumask_var(new_value);
1503 return ret;
1504 }
1505
1506 struct vq_sysfs_entry {
1507 struct attribute attr;
1508 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1509 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1510 size_t count);
1511 };
1512
1513 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1514
1515 static struct attribute *vq_attrs[] = {
1516 &irq_cb_affinity_attr.attr,
1517 NULL,
1518 };
1519 ATTRIBUTE_GROUPS(vq);
1520
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1521 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1522 char *buf)
1523 {
1524 struct vduse_virtqueue *vq = container_of(kobj,
1525 struct vduse_virtqueue, kobj);
1526 struct vq_sysfs_entry *entry = container_of(attr,
1527 struct vq_sysfs_entry, attr);
1528
1529 if (!entry->show)
1530 return -EIO;
1531
1532 return entry->show(vq, buf);
1533 }
1534
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1535 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1536 const char *buf, size_t count)
1537 {
1538 struct vduse_virtqueue *vq = container_of(kobj,
1539 struct vduse_virtqueue, kobj);
1540 struct vq_sysfs_entry *entry = container_of(attr,
1541 struct vq_sysfs_entry, attr);
1542
1543 if (!entry->store)
1544 return -EIO;
1545
1546 return entry->store(vq, buf, count);
1547 }
1548
1549 static const struct sysfs_ops vq_sysfs_ops = {
1550 .show = vq_attr_show,
1551 .store = vq_attr_store,
1552 };
1553
vq_release(struct kobject * kobj)1554 static void vq_release(struct kobject *kobj)
1555 {
1556 struct vduse_virtqueue *vq = container_of(kobj,
1557 struct vduse_virtqueue, kobj);
1558 kfree(vq);
1559 }
1560
1561 static const struct kobj_type vq_type = {
1562 .release = vq_release,
1563 .sysfs_ops = &vq_sysfs_ops,
1564 .default_groups = vq_groups,
1565 };
1566
vduse_devnode(const struct device * dev,umode_t * mode)1567 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1568 {
1569 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1570 }
1571
1572 static const struct class vduse_class = {
1573 .name = "vduse",
1574 .devnode = vduse_devnode,
1575 };
1576
vduse_dev_deinit_vqs(struct vduse_dev * dev)1577 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1578 {
1579 int i;
1580
1581 if (!dev->vqs)
1582 return;
1583
1584 for (i = 0; i < dev->vq_num; i++)
1585 kobject_put(&dev->vqs[i]->kobj);
1586 kfree(dev->vqs);
1587 }
1588
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1589 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1590 {
1591 int ret, i;
1592
1593 dev->vq_align = vq_align;
1594 dev->vq_num = vq_num;
1595 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1596 if (!dev->vqs)
1597 return -ENOMEM;
1598
1599 for (i = 0; i < vq_num; i++) {
1600 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1601 if (!dev->vqs[i]) {
1602 ret = -ENOMEM;
1603 goto err;
1604 }
1605
1606 dev->vqs[i]->index = i;
1607 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1608 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1609 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1610 spin_lock_init(&dev->vqs[i]->kick_lock);
1611 spin_lock_init(&dev->vqs[i]->irq_lock);
1612 cpumask_setall(&dev->vqs[i]->irq_affinity);
1613
1614 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1615 ret = kobject_add(&dev->vqs[i]->kobj,
1616 &dev->dev->kobj, "vq%d", i);
1617 if (ret) {
1618 kfree(dev->vqs[i]);
1619 goto err;
1620 }
1621 }
1622
1623 return 0;
1624 err:
1625 while (i--)
1626 kobject_put(&dev->vqs[i]->kobj);
1627 kfree(dev->vqs);
1628 dev->vqs = NULL;
1629 return ret;
1630 }
1631
vduse_dev_create(void)1632 static struct vduse_dev *vduse_dev_create(void)
1633 {
1634 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1635
1636 if (!dev)
1637 return NULL;
1638
1639 mutex_init(&dev->lock);
1640 mutex_init(&dev->mem_lock);
1641 mutex_init(&dev->domain_lock);
1642 spin_lock_init(&dev->msg_lock);
1643 INIT_LIST_HEAD(&dev->send_list);
1644 INIT_LIST_HEAD(&dev->recv_list);
1645 spin_lock_init(&dev->irq_lock);
1646 init_rwsem(&dev->rwsem);
1647
1648 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1649 init_waitqueue_head(&dev->waitq);
1650
1651 return dev;
1652 }
1653
vduse_dev_destroy(struct vduse_dev * dev)1654 static void vduse_dev_destroy(struct vduse_dev *dev)
1655 {
1656 kfree(dev);
1657 }
1658
vduse_find_dev(const char * name)1659 static struct vduse_dev *vduse_find_dev(const char *name)
1660 {
1661 struct vduse_dev *dev;
1662 int id;
1663
1664 idr_for_each_entry(&vduse_idr, dev, id)
1665 if (!strcmp(dev->name, name))
1666 return dev;
1667
1668 return NULL;
1669 }
1670
vduse_destroy_dev(char * name)1671 static int vduse_destroy_dev(char *name)
1672 {
1673 struct vduse_dev *dev = vduse_find_dev(name);
1674
1675 if (!dev)
1676 return -EINVAL;
1677
1678 mutex_lock(&dev->lock);
1679 if (dev->vdev || dev->connected) {
1680 mutex_unlock(&dev->lock);
1681 return -EBUSY;
1682 }
1683 dev->connected = true;
1684 mutex_unlock(&dev->lock);
1685
1686 vduse_dev_reset(dev);
1687 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1688 idr_remove(&vduse_idr, dev->minor);
1689 kvfree(dev->config);
1690 vduse_dev_deinit_vqs(dev);
1691 if (dev->domain)
1692 vduse_domain_destroy(dev->domain);
1693 kfree(dev->name);
1694 vduse_dev_destroy(dev);
1695 module_put(THIS_MODULE);
1696
1697 return 0;
1698 }
1699
device_is_allowed(u32 device_id)1700 static bool device_is_allowed(u32 device_id)
1701 {
1702 int i;
1703
1704 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1705 if (allowed_device_id[i] == device_id)
1706 return true;
1707
1708 return false;
1709 }
1710
features_is_valid(struct vduse_dev_config * config)1711 static bool features_is_valid(struct vduse_dev_config *config)
1712 {
1713 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1714 return false;
1715
1716 /* Now we only support read-only configuration space */
1717 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1718 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1719 return false;
1720 else if ((config->device_id == VIRTIO_ID_NET) &&
1721 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1722 return false;
1723
1724 if ((config->device_id == VIRTIO_ID_NET) &&
1725 !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
1726 return false;
1727
1728 return true;
1729 }
1730
vduse_validate_config(struct vduse_dev_config * config)1731 static bool vduse_validate_config(struct vduse_dev_config *config)
1732 {
1733 if (!is_mem_zero((const char *)config->reserved,
1734 sizeof(config->reserved)))
1735 return false;
1736
1737 if (config->vq_align > PAGE_SIZE)
1738 return false;
1739
1740 if (config->config_size > PAGE_SIZE)
1741 return false;
1742
1743 if (config->vq_num > 0xffff)
1744 return false;
1745
1746 if (!config->name[0])
1747 return false;
1748
1749 if (!device_is_allowed(config->device_id))
1750 return false;
1751
1752 if (!features_is_valid(config))
1753 return false;
1754
1755 return true;
1756 }
1757
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1758 static ssize_t msg_timeout_show(struct device *device,
1759 struct device_attribute *attr, char *buf)
1760 {
1761 struct vduse_dev *dev = dev_get_drvdata(device);
1762
1763 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1764 }
1765
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1766 static ssize_t msg_timeout_store(struct device *device,
1767 struct device_attribute *attr,
1768 const char *buf, size_t count)
1769 {
1770 struct vduse_dev *dev = dev_get_drvdata(device);
1771 int ret;
1772
1773 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1774 if (ret < 0)
1775 return ret;
1776
1777 return count;
1778 }
1779
1780 static DEVICE_ATTR_RW(msg_timeout);
1781
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1782 static ssize_t bounce_size_show(struct device *device,
1783 struct device_attribute *attr, char *buf)
1784 {
1785 struct vduse_dev *dev = dev_get_drvdata(device);
1786
1787 return sysfs_emit(buf, "%u\n", dev->bounce_size);
1788 }
1789
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1790 static ssize_t bounce_size_store(struct device *device,
1791 struct device_attribute *attr,
1792 const char *buf, size_t count)
1793 {
1794 struct vduse_dev *dev = dev_get_drvdata(device);
1795 unsigned int bounce_size;
1796 int ret;
1797
1798 ret = -EPERM;
1799 mutex_lock(&dev->domain_lock);
1800 if (dev->domain)
1801 goto unlock;
1802
1803 ret = kstrtouint(buf, 10, &bounce_size);
1804 if (ret < 0)
1805 goto unlock;
1806
1807 ret = -EINVAL;
1808 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1809 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1810 goto unlock;
1811
1812 dev->bounce_size = bounce_size & PAGE_MASK;
1813 ret = count;
1814 unlock:
1815 mutex_unlock(&dev->domain_lock);
1816 return ret;
1817 }
1818
1819 static DEVICE_ATTR_RW(bounce_size);
1820
1821 static struct attribute *vduse_dev_attrs[] = {
1822 &dev_attr_msg_timeout.attr,
1823 &dev_attr_bounce_size.attr,
1824 NULL
1825 };
1826
1827 ATTRIBUTE_GROUPS(vduse_dev);
1828
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1829 static int vduse_create_dev(struct vduse_dev_config *config,
1830 void *config_buf, u64 api_version)
1831 {
1832 int ret;
1833 struct vduse_dev *dev;
1834
1835 ret = -EPERM;
1836 if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
1837 goto err;
1838
1839 ret = -EEXIST;
1840 if (vduse_find_dev(config->name))
1841 goto err;
1842
1843 ret = -ENOMEM;
1844 dev = vduse_dev_create();
1845 if (!dev)
1846 goto err;
1847
1848 dev->api_version = api_version;
1849 dev->device_features = config->features;
1850 dev->device_id = config->device_id;
1851 dev->vendor_id = config->vendor_id;
1852 dev->name = kstrdup(config->name, GFP_KERNEL);
1853 if (!dev->name)
1854 goto err_str;
1855
1856 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1857 dev->config = config_buf;
1858 dev->config_size = config->config_size;
1859
1860 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1861 if (ret < 0)
1862 goto err_idr;
1863
1864 dev->minor = ret;
1865 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1866 dev->dev = device_create_with_groups(&vduse_class, NULL,
1867 MKDEV(MAJOR(vduse_major), dev->minor),
1868 dev, vduse_dev_groups, "%s", config->name);
1869 if (IS_ERR(dev->dev)) {
1870 ret = PTR_ERR(dev->dev);
1871 goto err_dev;
1872 }
1873
1874 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1875 if (ret)
1876 goto err_vqs;
1877
1878 __module_get(THIS_MODULE);
1879
1880 return 0;
1881 err_vqs:
1882 device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1883 err_dev:
1884 idr_remove(&vduse_idr, dev->minor);
1885 err_idr:
1886 kfree(dev->name);
1887 err_str:
1888 vduse_dev_destroy(dev);
1889 err:
1890 return ret;
1891 }
1892
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1893 static long vduse_ioctl(struct file *file, unsigned int cmd,
1894 unsigned long arg)
1895 {
1896 int ret;
1897 void __user *argp = (void __user *)arg;
1898 struct vduse_control *control = file->private_data;
1899
1900 mutex_lock(&vduse_lock);
1901 switch (cmd) {
1902 case VDUSE_GET_API_VERSION:
1903 ret = put_user(control->api_version, (u64 __user *)argp);
1904 break;
1905 case VDUSE_SET_API_VERSION: {
1906 u64 api_version;
1907
1908 ret = -EFAULT;
1909 if (get_user(api_version, (u64 __user *)argp))
1910 break;
1911
1912 ret = -EINVAL;
1913 if (api_version > VDUSE_API_VERSION)
1914 break;
1915
1916 ret = 0;
1917 control->api_version = api_version;
1918 break;
1919 }
1920 case VDUSE_CREATE_DEV: {
1921 struct vduse_dev_config config;
1922 unsigned long size = offsetof(struct vduse_dev_config, config);
1923 void *buf;
1924
1925 ret = -EFAULT;
1926 if (copy_from_user(&config, argp, size))
1927 break;
1928
1929 ret = -EINVAL;
1930 if (vduse_validate_config(&config) == false)
1931 break;
1932
1933 buf = vmemdup_user(argp + size, config.config_size);
1934 if (IS_ERR(buf)) {
1935 ret = PTR_ERR(buf);
1936 break;
1937 }
1938 config.name[VDUSE_NAME_MAX - 1] = '\0';
1939 ret = vduse_create_dev(&config, buf, control->api_version);
1940 if (ret)
1941 kvfree(buf);
1942 break;
1943 }
1944 case VDUSE_DESTROY_DEV: {
1945 char name[VDUSE_NAME_MAX];
1946
1947 ret = -EFAULT;
1948 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1949 break;
1950
1951 name[VDUSE_NAME_MAX - 1] = '\0';
1952 ret = vduse_destroy_dev(name);
1953 break;
1954 }
1955 default:
1956 ret = -EINVAL;
1957 break;
1958 }
1959 mutex_unlock(&vduse_lock);
1960
1961 return ret;
1962 }
1963
vduse_release(struct inode * inode,struct file * file)1964 static int vduse_release(struct inode *inode, struct file *file)
1965 {
1966 struct vduse_control *control = file->private_data;
1967
1968 kfree(control);
1969 return 0;
1970 }
1971
vduse_open(struct inode * inode,struct file * file)1972 static int vduse_open(struct inode *inode, struct file *file)
1973 {
1974 struct vduse_control *control;
1975
1976 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1977 if (!control)
1978 return -ENOMEM;
1979
1980 control->api_version = VDUSE_API_VERSION;
1981 file->private_data = control;
1982
1983 return 0;
1984 }
1985
1986 static const struct file_operations vduse_ctrl_fops = {
1987 .owner = THIS_MODULE,
1988 .open = vduse_open,
1989 .release = vduse_release,
1990 .unlocked_ioctl = vduse_ioctl,
1991 .compat_ioctl = compat_ptr_ioctl,
1992 .llseek = noop_llseek,
1993 };
1994
1995 struct vduse_mgmt_dev {
1996 struct vdpa_mgmt_dev mgmt_dev;
1997 struct device dev;
1998 };
1999
2000 static struct vduse_mgmt_dev *vduse_mgmt;
2001
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)2002 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
2003 {
2004 struct vduse_vdpa *vdev;
2005 int ret;
2006
2007 if (dev->vdev)
2008 return -EEXIST;
2009
2010 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
2011 &vduse_vdpa_config_ops, 1, 1, name, true);
2012 if (IS_ERR(vdev))
2013 return PTR_ERR(vdev);
2014
2015 dev->vdev = vdev;
2016 vdev->dev = dev;
2017 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
2018 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
2019 if (ret) {
2020 put_device(&vdev->vdpa.dev);
2021 return ret;
2022 }
2023 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
2024 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
2025 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
2026
2027 return 0;
2028 }
2029
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)2030 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
2031 const struct vdpa_dev_set_config *config)
2032 {
2033 struct vduse_dev *dev;
2034 int ret;
2035
2036 mutex_lock(&vduse_lock);
2037 dev = vduse_find_dev(name);
2038 if (!dev || !vduse_dev_is_ready(dev)) {
2039 mutex_unlock(&vduse_lock);
2040 return -EINVAL;
2041 }
2042 ret = vduse_dev_init_vdpa(dev, name);
2043 mutex_unlock(&vduse_lock);
2044 if (ret)
2045 return ret;
2046
2047 mutex_lock(&dev->domain_lock);
2048 if (!dev->domain)
2049 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2050 dev->bounce_size);
2051 mutex_unlock(&dev->domain_lock);
2052 if (!dev->domain) {
2053 put_device(&dev->vdev->vdpa.dev);
2054 return -ENOMEM;
2055 }
2056
2057 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2058 if (ret) {
2059 put_device(&dev->vdev->vdpa.dev);
2060 mutex_lock(&dev->domain_lock);
2061 vduse_domain_destroy(dev->domain);
2062 dev->domain = NULL;
2063 mutex_unlock(&dev->domain_lock);
2064 return ret;
2065 }
2066
2067 return 0;
2068 }
2069
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2070 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2071 {
2072 _vdpa_unregister_device(dev);
2073 }
2074
2075 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2076 .dev_add = vdpa_dev_add,
2077 .dev_del = vdpa_dev_del,
2078 };
2079
2080 static struct virtio_device_id id_table[] = {
2081 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2082 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2083 { 0 },
2084 };
2085
vduse_mgmtdev_release(struct device * dev)2086 static void vduse_mgmtdev_release(struct device *dev)
2087 {
2088 struct vduse_mgmt_dev *mgmt_dev;
2089
2090 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2091 kfree(mgmt_dev);
2092 }
2093
vduse_mgmtdev_init(void)2094 static int vduse_mgmtdev_init(void)
2095 {
2096 int ret;
2097
2098 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2099 if (!vduse_mgmt)
2100 return -ENOMEM;
2101
2102 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2103 if (ret) {
2104 kfree(vduse_mgmt);
2105 return ret;
2106 }
2107
2108 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2109
2110 ret = device_register(&vduse_mgmt->dev);
2111 if (ret)
2112 goto dev_reg_err;
2113
2114 vduse_mgmt->mgmt_dev.id_table = id_table;
2115 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2116 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2117 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2118 if (ret)
2119 device_unregister(&vduse_mgmt->dev);
2120
2121 return ret;
2122
2123 dev_reg_err:
2124 put_device(&vduse_mgmt->dev);
2125 return ret;
2126 }
2127
vduse_mgmtdev_exit(void)2128 static void vduse_mgmtdev_exit(void)
2129 {
2130 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2131 device_unregister(&vduse_mgmt->dev);
2132 }
2133
vduse_init(void)2134 static int vduse_init(void)
2135 {
2136 int ret;
2137 struct device *dev;
2138
2139 ret = class_register(&vduse_class);
2140 if (ret)
2141 return ret;
2142
2143 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2144 if (ret)
2145 goto err_chardev_region;
2146
2147 /* /dev/vduse/control */
2148 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2149 vduse_ctrl_cdev.owner = THIS_MODULE;
2150 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2151 if (ret)
2152 goto err_ctrl_cdev;
2153
2154 dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
2155 if (IS_ERR(dev)) {
2156 ret = PTR_ERR(dev);
2157 goto err_device;
2158 }
2159
2160 /* /dev/vduse/$DEVICE */
2161 cdev_init(&vduse_cdev, &vduse_dev_fops);
2162 vduse_cdev.owner = THIS_MODULE;
2163 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2164 VDUSE_DEV_MAX - 1);
2165 if (ret)
2166 goto err_cdev;
2167
2168 ret = -ENOMEM;
2169 vduse_irq_wq = alloc_workqueue("vduse-irq",
2170 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2171 if (!vduse_irq_wq)
2172 goto err_wq;
2173
2174 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2175 if (!vduse_irq_bound_wq)
2176 goto err_bound_wq;
2177
2178 ret = vduse_domain_init();
2179 if (ret)
2180 goto err_domain;
2181
2182 ret = vduse_mgmtdev_init();
2183 if (ret)
2184 goto err_mgmtdev;
2185
2186 return 0;
2187 err_mgmtdev:
2188 vduse_domain_exit();
2189 err_domain:
2190 destroy_workqueue(vduse_irq_bound_wq);
2191 err_bound_wq:
2192 destroy_workqueue(vduse_irq_wq);
2193 err_wq:
2194 cdev_del(&vduse_cdev);
2195 err_cdev:
2196 device_destroy(&vduse_class, vduse_major);
2197 err_device:
2198 cdev_del(&vduse_ctrl_cdev);
2199 err_ctrl_cdev:
2200 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2201 err_chardev_region:
2202 class_unregister(&vduse_class);
2203 return ret;
2204 }
2205 module_init(vduse_init);
2206
vduse_exit(void)2207 static void vduse_exit(void)
2208 {
2209 vduse_mgmtdev_exit();
2210 vduse_domain_exit();
2211 destroy_workqueue(vduse_irq_bound_wq);
2212 destroy_workqueue(vduse_irq_wq);
2213 cdev_del(&vduse_cdev);
2214 device_destroy(&vduse_class, vduse_major);
2215 cdev_del(&vduse_ctrl_cdev);
2216 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2217 class_unregister(&vduse_class);
2218 }
2219 module_exit(vduse_exit);
2220
2221 MODULE_LICENSE(DRV_LICENSE);
2222 MODULE_AUTHOR(DRV_AUTHOR);
2223 MODULE_DESCRIPTION(DRV_DESC);
2224