1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22 
23 #include <glib.h>
24 
25 struct virtio_blk_inhdr {
26     unsigned char status;
27 };
28 
29 /* vhost user block device */
30 typedef struct VubDev {
31     VugDev parent;
32     int blk_fd;
33     struct virtio_blk_config blkcfg;
34     bool enable_ro;
35     char *blk_name;
36     GMainLoop *loop;
37 } VubDev;
38 
39 typedef struct VubReq {
40     VuVirtqElement *elem;
41     int64_t sector_num;
42     size_t size;
43     struct virtio_blk_inhdr *in;
44     struct virtio_blk_outhdr *out;
45     VubDev *vdev_blk;
46     struct VuVirtq *vq;
47 } VubReq;
48 
49 /* refer util/iov.c */
50 static size_t vub_iov_size(const struct iovec *iov,
51                               const unsigned int iov_cnt)
52 {
53     size_t len;
54     unsigned int i;
55 
56     len = 0;
57     for (i = 0; i < iov_cnt; i++) {
58         len += iov[i].iov_len;
59     }
60     return len;
61 }
62 
63 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
64 {
65     VugDev *gdev;
66     VubDev *vdev_blk;
67 
68     assert(vu_dev);
69 
70     gdev = container_of(vu_dev, VugDev, parent);
71     vdev_blk = container_of(gdev, VubDev, parent);
72     if (buf) {
73         g_warning("vu_panic: %s", buf);
74     }
75 
76     g_main_loop_quit(vdev_blk->loop);
77 }
78 
79 static void vub_req_complete(VubReq *req)
80 {
81     VugDev *gdev = &req->vdev_blk->parent;
82     VuDev *vu_dev = &gdev->parent;
83 
84     /* IO size with 1 extra status byte */
85     vu_queue_push(vu_dev, req->vq, req->elem,
86                   req->size + 1);
87     vu_queue_notify(vu_dev, req->vq);
88 
89     if (req->elem) {
90         free(req->elem);
91     }
92 
93     g_free(req);
94 }
95 
96 static int vub_open(const char *file_name, bool wce)
97 {
98     int fd;
99     int flags = O_RDWR;
100 
101     if (!wce) {
102         flags |= O_DIRECT;
103     }
104 
105     fd = open(file_name, flags);
106     if (fd < 0) {
107         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
108                 strerror(errno));
109         return -1;
110     }
111 
112     return fd;
113 }
114 
115 static ssize_t
116 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
117 {
118     VubDev *vdev_blk = req->vdev_blk;
119     ssize_t rc;
120 
121     if (!iovcnt) {
122         fprintf(stderr, "Invalid Read IOV count\n");
123         return -1;
124     }
125 
126     req->size = vub_iov_size(iov, iovcnt);
127     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
128     if (rc < 0) {
129         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
130                 vdev_blk->blk_name, req->sector_num, req->size,
131                 strerror(errno));
132         return -1;
133     }
134 
135     return rc;
136 }
137 
138 static ssize_t
139 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
140 {
141     VubDev *vdev_blk = req->vdev_blk;
142     ssize_t rc;
143 
144     if (!iovcnt) {
145         fprintf(stderr, "Invalid Write IOV count\n");
146         return -1;
147     }
148 
149     req->size = vub_iov_size(iov, iovcnt);
150     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
151     if (rc < 0) {
152         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
153                 vdev_blk->blk_name, req->sector_num, req->size,
154                 strerror(errno));
155         return -1;
156     }
157 
158     return rc;
159 }
160 
161 static void
162 vub_flush(VubReq *req)
163 {
164     VubDev *vdev_blk = req->vdev_blk;
165 
166     fdatasync(vdev_blk->blk_fd);
167 }
168 
169 static int vub_virtio_process_req(VubDev *vdev_blk,
170                                      VuVirtq *vq)
171 {
172     VugDev *gdev = &vdev_blk->parent;
173     VuDev *vu_dev = &gdev->parent;
174     VuVirtqElement *elem;
175     uint32_t type;
176     unsigned in_num;
177     unsigned out_num;
178     VubReq *req;
179 
180     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
181     if (!elem) {
182         return -1;
183     }
184 
185     /* refer to hw/block/virtio_blk.c */
186     if (elem->out_num < 1 || elem->in_num < 1) {
187         fprintf(stderr, "virtio-blk request missing headers\n");
188         free(elem);
189         return -1;
190     }
191 
192     req = g_new0(VubReq, 1);
193     req->vdev_blk = vdev_blk;
194     req->vq = vq;
195     req->elem = elem;
196 
197     in_num = elem->in_num;
198     out_num = elem->out_num;
199 
200     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
201     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
202         fprintf(stderr, "Invalid outhdr size\n");
203         goto err;
204     }
205     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
206     out_num--;
207 
208     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
209         fprintf(stderr, "Invalid inhdr size\n");
210         goto err;
211     }
212     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
213     in_num--;
214 
215     type = le32toh(req->out->type);
216     switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
217         case VIRTIO_BLK_T_IN: {
218             ssize_t ret = 0;
219             bool is_write = type & VIRTIO_BLK_T_OUT;
220             req->sector_num = le64toh(req->out->sector);
221             if (is_write) {
222                 ret  = vub_writev(req, &elem->out_sg[1], out_num);
223             } else {
224                 ret = vub_readv(req, &elem->in_sg[0], in_num);
225             }
226             if (ret >= 0) {
227                 req->in->status = VIRTIO_BLK_S_OK;
228             } else {
229                 req->in->status = VIRTIO_BLK_S_IOERR;
230             }
231             vub_req_complete(req);
232             break;
233         }
234         case VIRTIO_BLK_T_FLUSH: {
235             vub_flush(req);
236             req->in->status = VIRTIO_BLK_S_OK;
237             vub_req_complete(req);
238             break;
239         }
240         case VIRTIO_BLK_T_GET_ID: {
241             size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
242                               VIRTIO_BLK_ID_BYTES);
243             snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
244             req->in->status = VIRTIO_BLK_S_OK;
245             req->size = elem->in_sg[0].iov_len;
246             vub_req_complete(req);
247             break;
248         }
249         default: {
250             req->in->status = VIRTIO_BLK_S_UNSUPP;
251             vub_req_complete(req);
252             break;
253         }
254     }
255 
256     return 0;
257 
258 err:
259     free(elem);
260     g_free(req);
261     return -1;
262 }
263 
264 static void vub_process_vq(VuDev *vu_dev, int idx)
265 {
266     VugDev *gdev;
267     VubDev *vdev_blk;
268     VuVirtq *vq;
269     int ret;
270 
271     if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
272         fprintf(stderr, "VQ Index out of range: %d\n", idx);
273         vub_panic_cb(vu_dev, NULL);
274         return;
275     }
276 
277     gdev = container_of(vu_dev, VugDev, parent);
278     vdev_blk = container_of(gdev, VubDev, parent);
279     assert(vdev_blk);
280 
281     vq = vu_get_queue(vu_dev, idx);
282     assert(vq);
283 
284     while (1) {
285         ret = vub_virtio_process_req(vdev_blk, vq);
286         if (ret) {
287             break;
288         }
289     }
290 }
291 
292 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
293 {
294     VuVirtq *vq;
295 
296     assert(vu_dev);
297 
298     vq = vu_get_queue(vu_dev, idx);
299     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
300 }
301 
302 static uint64_t
303 vub_get_features(VuDev *dev)
304 {
305     uint64_t features;
306     VugDev *gdev;
307     VubDev *vdev_blk;
308 
309     gdev = container_of(dev, VugDev, parent);
310     vdev_blk = container_of(gdev, VubDev, parent);
311 
312     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
313                1ull << VIRTIO_BLK_F_SEG_MAX |
314                1ull << VIRTIO_BLK_F_TOPOLOGY |
315                1ull << VIRTIO_BLK_F_BLK_SIZE |
316                1ull << VIRTIO_BLK_F_FLUSH |
317                1ull << VIRTIO_BLK_F_CONFIG_WCE |
318                1ull << VIRTIO_F_VERSION_1 |
319                1ull << VHOST_USER_F_PROTOCOL_FEATURES;
320 
321     if (vdev_blk->enable_ro) {
322         features |= 1ull << VIRTIO_BLK_F_RO;
323     }
324 
325     return features;
326 }
327 
328 static uint64_t
329 vub_get_protocol_features(VuDev *dev)
330 {
331     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
332 }
333 
334 static int
335 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
336 {
337     VugDev *gdev;
338     VubDev *vdev_blk;
339 
340     gdev = container_of(vu_dev, VugDev, parent);
341     vdev_blk = container_of(gdev, VubDev, parent);
342     memcpy(config, &vdev_blk->blkcfg, len);
343 
344     return 0;
345 }
346 
347 static int
348 vub_set_config(VuDev *vu_dev, const uint8_t *data,
349                uint32_t offset, uint32_t size, uint32_t flags)
350 {
351     VugDev *gdev;
352     VubDev *vdev_blk;
353     uint8_t wce;
354     int fd;
355 
356     /* don't support live migration */
357     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
358         return -1;
359     }
360 
361     gdev = container_of(vu_dev, VugDev, parent);
362     vdev_blk = container_of(gdev, VubDev, parent);
363 
364     if (offset != offsetof(struct virtio_blk_config, wce) ||
365         size != 1) {
366         return -1;
367     }
368 
369     wce = *data;
370     if (wce == vdev_blk->blkcfg.wce) {
371         /* Do nothing as same with old configuration */
372         return 0;
373     }
374 
375     vdev_blk->blkcfg.wce = wce;
376     fprintf(stdout, "Write Cache Policy Changed\n");
377     if (vdev_blk->blk_fd >= 0) {
378         close(vdev_blk->blk_fd);
379         vdev_blk->blk_fd = -1;
380     }
381 
382     fd = vub_open(vdev_blk->blk_name, wce);
383     if (fd < 0) {
384         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
385         vdev_blk->blk_fd = -1;
386         return -1;
387     }
388     vdev_blk->blk_fd = fd;
389 
390     return 0;
391 }
392 
393 static const VuDevIface vub_iface = {
394     .get_features = vub_get_features,
395     .queue_set_started = vub_queue_set_started,
396     .get_protocol_features = vub_get_protocol_features,
397     .get_config = vub_get_config,
398     .set_config = vub_set_config,
399 };
400 
401 static int unix_sock_new(char *unix_fn)
402 {
403     int sock;
404     struct sockaddr_un un;
405     size_t len;
406 
407     assert(unix_fn);
408 
409     sock = socket(AF_UNIX, SOCK_STREAM, 0);
410     if (sock <= 0) {
411         perror("socket");
412         return -1;
413     }
414 
415     un.sun_family = AF_UNIX;
416     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
417     len = sizeof(un.sun_family) + strlen(un.sun_path);
418 
419     (void)unlink(unix_fn);
420     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
421         perror("bind");
422         goto fail;
423     }
424 
425     if (listen(sock, 1) < 0) {
426         perror("listen");
427         goto fail;
428     }
429 
430     return sock;
431 
432 fail:
433     (void)close(sock);
434 
435     return -1;
436 }
437 
438 static void vub_free(struct VubDev *vdev_blk)
439 {
440     if (!vdev_blk) {
441         return;
442     }
443 
444     g_main_loop_unref(vdev_blk->loop);
445     if (vdev_blk->blk_fd >= 0) {
446         close(vdev_blk->blk_fd);
447     }
448     g_free(vdev_blk);
449 }
450 
451 static uint32_t
452 vub_get_blocksize(int fd)
453 {
454     uint32_t blocksize = 512;
455 
456 #if defined(__linux__) && defined(BLKSSZGET)
457     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
458         return blocklen;
459     }
460 #endif
461 
462     return blocksize;
463 }
464 
465 static void
466 vub_initialize_config(int fd, struct virtio_blk_config *config)
467 {
468     off64_t capacity;
469 
470     capacity = lseek64(fd, 0, SEEK_END);
471     config->capacity = capacity >> 9;
472     config->blk_size = vub_get_blocksize(fd);
473     config->size_max = 65536;
474     config->seg_max = 128 - 2;
475     config->min_io_size = 1;
476     config->opt_io_size = 1;
477     config->num_queues = 1;
478 }
479 
480 static VubDev *
481 vub_new(char *blk_file)
482 {
483     VubDev *vdev_blk;
484 
485     vdev_blk = g_new0(VubDev, 1);
486     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
487     vdev_blk->blk_fd = vub_open(blk_file, 0);
488     if (vdev_blk->blk_fd  < 0) {
489         fprintf(stderr, "Error to open block device %s\n", blk_file);
490         vub_free(vdev_blk);
491         return NULL;
492     }
493     vdev_blk->enable_ro = false;
494     vdev_blk->blkcfg.wce = 0;
495     vdev_blk->blk_name = blk_file;
496 
497     /* fill virtio_blk_config with block parameters */
498     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
499 
500     return vdev_blk;
501 }
502 
503 int main(int argc, char **argv)
504 {
505     int opt;
506     char *unix_socket = NULL;
507     char *blk_file = NULL;
508     bool enable_ro = false;
509     int lsock = -1, csock = -1;
510     VubDev *vdev_blk = NULL;
511 
512     while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
513         switch (opt) {
514         case 'b':
515             blk_file = g_strdup(optarg);
516             break;
517         case 's':
518             unix_socket = g_strdup(optarg);
519             break;
520         case 'r':
521             enable_ro = true;
522             break;
523         case 'h':
524         default:
525             printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
526                    " | -r Enable read-only ] | [ -h ]\n", argv[0]);
527             return 0;
528         }
529     }
530 
531     if (!unix_socket || !blk_file) {
532         printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
533                " | -r Enable read-only ] | [ -h ]\n", argv[0]);
534         return -1;
535     }
536 
537     lsock = unix_sock_new(unix_socket);
538     if (lsock < 0) {
539         goto err;
540     }
541 
542     csock = accept(lsock, (void *)0, (void *)0);
543     if (csock < 0) {
544         fprintf(stderr, "Accept error %s\n", strerror(errno));
545         goto err;
546     }
547 
548     vdev_blk = vub_new(blk_file);
549     if (!vdev_blk) {
550         goto err;
551     }
552     if (enable_ro) {
553         vdev_blk->enable_ro = true;
554     }
555 
556     vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
557 
558     g_main_loop_run(vdev_blk->loop);
559 
560     vug_deinit(&vdev_blk->parent);
561 
562 err:
563     vub_free(vdev_blk);
564     if (csock >= 0) {
565         close(csock);
566     }
567     if (lsock >= 0) {
568         close(lsock);
569     }
570     g_free(unix_socket);
571     g_free(blk_file);
572 
573     return 0;
574 }
575