1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22 
23 #if defined(__linux__)
24 #include <linux/fs.h>
25 #include <sys/ioctl.h>
26 #endif
27 
28 enum {
29     VHOST_USER_BLK_MAX_QUEUES = 8,
30 };
31 
32 struct virtio_blk_inhdr {
33     unsigned char status;
34 };
35 
36 /* vhost user block device */
37 typedef struct VubDev {
38     VugDev parent;
39     int blk_fd;
40     struct virtio_blk_config blkcfg;
41     bool enable_ro;
42     char *blk_name;
43     GMainLoop *loop;
44 } VubDev;
45 
46 typedef struct VubReq {
47     VuVirtqElement *elem;
48     int64_t sector_num;
49     size_t size;
50     struct virtio_blk_inhdr *in;
51     struct virtio_blk_outhdr *out;
52     VubDev *vdev_blk;
53     struct VuVirtq *vq;
54 } VubReq;
55 
56 /* refer util/iov.c */
57 static size_t vub_iov_size(const struct iovec *iov,
58                               const unsigned int iov_cnt)
59 {
60     size_t len;
61     unsigned int i;
62 
63     len = 0;
64     for (i = 0; i < iov_cnt; i++) {
65         len += iov[i].iov_len;
66     }
67     return len;
68 }
69 
70 static size_t vub_iov_to_buf(const struct iovec *iov,
71                              const unsigned int iov_cnt, void *buf)
72 {
73     size_t len;
74     unsigned int i;
75 
76     len = 0;
77     for (i = 0; i < iov_cnt; i++) {
78         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
79         len += iov[i].iov_len;
80     }
81     return len;
82 }
83 
84 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
85 {
86     VugDev *gdev;
87     VubDev *vdev_blk;
88 
89     assert(vu_dev);
90 
91     gdev = container_of(vu_dev, VugDev, parent);
92     vdev_blk = container_of(gdev, VubDev, parent);
93     if (buf) {
94         g_warning("vu_panic: %s", buf);
95     }
96 
97     g_main_loop_quit(vdev_blk->loop);
98 }
99 
100 static void vub_req_complete(VubReq *req)
101 {
102     VugDev *gdev = &req->vdev_blk->parent;
103     VuDev *vu_dev = &gdev->parent;
104 
105     /* IO size with 1 extra status byte */
106     vu_queue_push(vu_dev, req->vq, req->elem,
107                   req->size + 1);
108     vu_queue_notify(vu_dev, req->vq);
109 
110     if (req->elem) {
111         free(req->elem);
112     }
113 
114     g_free(req);
115 }
116 
117 static int vub_open(const char *file_name, bool wce)
118 {
119     int fd;
120     int flags = O_RDWR;
121 
122     if (!wce) {
123         flags |= O_DIRECT;
124     }
125 
126     fd = open(file_name, flags);
127     if (fd < 0) {
128         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
129                 strerror(errno));
130         return -1;
131     }
132 
133     return fd;
134 }
135 
136 static ssize_t
137 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
138 {
139     VubDev *vdev_blk = req->vdev_blk;
140     ssize_t rc;
141 
142     if (!iovcnt) {
143         fprintf(stderr, "Invalid Read IOV count\n");
144         return -1;
145     }
146 
147     req->size = vub_iov_size(iov, iovcnt);
148     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
149     if (rc < 0) {
150         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
151                 vdev_blk->blk_name, req->sector_num, req->size,
152                 strerror(errno));
153         return -1;
154     }
155 
156     return rc;
157 }
158 
159 static ssize_t
160 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
161 {
162     VubDev *vdev_blk = req->vdev_blk;
163     ssize_t rc;
164 
165     if (!iovcnt) {
166         fprintf(stderr, "Invalid Write IOV count\n");
167         return -1;
168     }
169 
170     req->size = vub_iov_size(iov, iovcnt);
171     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
172     if (rc < 0) {
173         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
174                 vdev_blk->blk_name, req->sector_num, req->size,
175                 strerror(errno));
176         return -1;
177     }
178 
179     return rc;
180 }
181 
182 static int
183 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
184                          uint32_t type)
185 {
186     struct virtio_blk_discard_write_zeroes *desc;
187     ssize_t size;
188     void *buf;
189 
190     size = vub_iov_size(iov, iovcnt);
191     if (size != sizeof(*desc)) {
192         fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
193         return -1;
194     }
195     buf = g_new0(char, size);
196     vub_iov_to_buf(iov, iovcnt, buf);
197 
198     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
199     VubDev *vdev_blk = req->vdev_blk;
200     desc = (struct virtio_blk_discard_write_zeroes *)buf;
201     uint64_t range[2] = { le64toh(desc->sector) << 9,
202                           le32toh(desc->num_sectors) << 9 };
203     if (type == VIRTIO_BLK_T_DISCARD) {
204         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
205             g_free(buf);
206             return 0;
207         }
208     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
209         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
210             g_free(buf);
211             return 0;
212         }
213     }
214     #endif
215 
216     g_free(buf);
217     return -1;
218 }
219 
220 static void
221 vub_flush(VubReq *req)
222 {
223     VubDev *vdev_blk = req->vdev_blk;
224 
225     fdatasync(vdev_blk->blk_fd);
226 }
227 
228 static int vub_virtio_process_req(VubDev *vdev_blk,
229                                      VuVirtq *vq)
230 {
231     VugDev *gdev = &vdev_blk->parent;
232     VuDev *vu_dev = &gdev->parent;
233     VuVirtqElement *elem;
234     uint32_t type;
235     unsigned in_num;
236     unsigned out_num;
237     VubReq *req;
238 
239     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
240     if (!elem) {
241         return -1;
242     }
243 
244     /* refer to hw/block/virtio_blk.c */
245     if (elem->out_num < 1 || elem->in_num < 1) {
246         fprintf(stderr, "virtio-blk request missing headers\n");
247         free(elem);
248         return -1;
249     }
250 
251     req = g_new0(VubReq, 1);
252     req->vdev_blk = vdev_blk;
253     req->vq = vq;
254     req->elem = elem;
255 
256     in_num = elem->in_num;
257     out_num = elem->out_num;
258 
259     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
260     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
261         fprintf(stderr, "Invalid outhdr size\n");
262         goto err;
263     }
264     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
265     out_num--;
266 
267     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
268         fprintf(stderr, "Invalid inhdr size\n");
269         goto err;
270     }
271     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
272     in_num--;
273 
274     type = le32toh(req->out->type);
275     switch (type & ~VIRTIO_BLK_T_BARRIER) {
276     case VIRTIO_BLK_T_IN:
277     case VIRTIO_BLK_T_OUT: {
278         ssize_t ret = 0;
279         bool is_write = type & VIRTIO_BLK_T_OUT;
280         req->sector_num = le64toh(req->out->sector);
281         if (is_write) {
282             ret  = vub_writev(req, &elem->out_sg[1], out_num);
283         } else {
284             ret = vub_readv(req, &elem->in_sg[0], in_num);
285         }
286         if (ret >= 0) {
287             req->in->status = VIRTIO_BLK_S_OK;
288         } else {
289             req->in->status = VIRTIO_BLK_S_IOERR;
290         }
291         vub_req_complete(req);
292         break;
293     }
294     case VIRTIO_BLK_T_FLUSH:
295         vub_flush(req);
296         req->in->status = VIRTIO_BLK_S_OK;
297         vub_req_complete(req);
298         break;
299     case VIRTIO_BLK_T_GET_ID: {
300         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
301                           VIRTIO_BLK_ID_BYTES);
302         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
303         req->in->status = VIRTIO_BLK_S_OK;
304         req->size = elem->in_sg[0].iov_len;
305         vub_req_complete(req);
306         break;
307     }
308     case VIRTIO_BLK_T_DISCARD:
309     case VIRTIO_BLK_T_WRITE_ZEROES: {
310         int rc;
311         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
312         if (rc == 0) {
313             req->in->status = VIRTIO_BLK_S_OK;
314         } else {
315             req->in->status = VIRTIO_BLK_S_IOERR;
316         }
317         vub_req_complete(req);
318         break;
319     }
320     default:
321         req->in->status = VIRTIO_BLK_S_UNSUPP;
322         vub_req_complete(req);
323         break;
324     }
325 
326     return 0;
327 
328 err:
329     free(elem);
330     g_free(req);
331     return -1;
332 }
333 
334 static void vub_process_vq(VuDev *vu_dev, int idx)
335 {
336     VugDev *gdev;
337     VubDev *vdev_blk;
338     VuVirtq *vq;
339     int ret;
340 
341     gdev = container_of(vu_dev, VugDev, parent);
342     vdev_blk = container_of(gdev, VubDev, parent);
343     assert(vdev_blk);
344 
345     vq = vu_get_queue(vu_dev, idx);
346     assert(vq);
347 
348     while (1) {
349         ret = vub_virtio_process_req(vdev_blk, vq);
350         if (ret) {
351             break;
352         }
353     }
354 }
355 
356 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
357 {
358     VuVirtq *vq;
359 
360     assert(vu_dev);
361 
362     vq = vu_get_queue(vu_dev, idx);
363     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
364 }
365 
366 static uint64_t
367 vub_get_features(VuDev *dev)
368 {
369     uint64_t features;
370     VugDev *gdev;
371     VubDev *vdev_blk;
372 
373     gdev = container_of(dev, VugDev, parent);
374     vdev_blk = container_of(gdev, VubDev, parent);
375 
376     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
377                1ull << VIRTIO_BLK_F_SEG_MAX |
378                1ull << VIRTIO_BLK_F_TOPOLOGY |
379                1ull << VIRTIO_BLK_F_BLK_SIZE |
380                1ull << VIRTIO_BLK_F_FLUSH |
381                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
382                1ull << VIRTIO_BLK_F_DISCARD |
383                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
384                #endif
385                1ull << VIRTIO_BLK_F_CONFIG_WCE;
386 
387     if (vdev_blk->enable_ro) {
388         features |= 1ull << VIRTIO_BLK_F_RO;
389     }
390 
391     return features;
392 }
393 
394 static uint64_t
395 vub_get_protocol_features(VuDev *dev)
396 {
397     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
398            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
399 }
400 
401 static int
402 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
403 {
404     VugDev *gdev;
405     VubDev *vdev_blk;
406 
407     gdev = container_of(vu_dev, VugDev, parent);
408     vdev_blk = container_of(gdev, VubDev, parent);
409     memcpy(config, &vdev_blk->blkcfg, len);
410 
411     return 0;
412 }
413 
414 static int
415 vub_set_config(VuDev *vu_dev, const uint8_t *data,
416                uint32_t offset, uint32_t size, uint32_t flags)
417 {
418     VugDev *gdev;
419     VubDev *vdev_blk;
420     uint8_t wce;
421     int fd;
422 
423     /* don't support live migration */
424     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
425         return -1;
426     }
427 
428     gdev = container_of(vu_dev, VugDev, parent);
429     vdev_blk = container_of(gdev, VubDev, parent);
430 
431     if (offset != offsetof(struct virtio_blk_config, wce) ||
432         size != 1) {
433         return -1;
434     }
435 
436     wce = *data;
437     if (wce == vdev_blk->blkcfg.wce) {
438         /* Do nothing as same with old configuration */
439         return 0;
440     }
441 
442     vdev_blk->blkcfg.wce = wce;
443     fprintf(stdout, "Write Cache Policy Changed\n");
444     if (vdev_blk->blk_fd >= 0) {
445         close(vdev_blk->blk_fd);
446         vdev_blk->blk_fd = -1;
447     }
448 
449     fd = vub_open(vdev_blk->blk_name, wce);
450     if (fd < 0) {
451         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
452         vdev_blk->blk_fd = -1;
453         return -1;
454     }
455     vdev_blk->blk_fd = fd;
456 
457     return 0;
458 }
459 
460 static const VuDevIface vub_iface = {
461     .get_features = vub_get_features,
462     .queue_set_started = vub_queue_set_started,
463     .get_protocol_features = vub_get_protocol_features,
464     .get_config = vub_get_config,
465     .set_config = vub_set_config,
466 };
467 
468 static int unix_sock_new(char *unix_fn)
469 {
470     int sock;
471     struct sockaddr_un un;
472     size_t len;
473 
474     assert(unix_fn);
475 
476     sock = socket(AF_UNIX, SOCK_STREAM, 0);
477     if (sock <= 0) {
478         perror("socket");
479         return -1;
480     }
481 
482     un.sun_family = AF_UNIX;
483     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
484     len = sizeof(un.sun_family) + strlen(un.sun_path);
485 
486     (void)unlink(unix_fn);
487     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
488         perror("bind");
489         goto fail;
490     }
491 
492     if (listen(sock, 1) < 0) {
493         perror("listen");
494         goto fail;
495     }
496 
497     return sock;
498 
499 fail:
500     (void)close(sock);
501 
502     return -1;
503 }
504 
505 static void vub_free(struct VubDev *vdev_blk)
506 {
507     if (!vdev_blk) {
508         return;
509     }
510 
511     g_main_loop_unref(vdev_blk->loop);
512     if (vdev_blk->blk_fd >= 0) {
513         close(vdev_blk->blk_fd);
514     }
515     g_free(vdev_blk);
516 }
517 
518 static uint32_t
519 vub_get_blocksize(int fd)
520 {
521     uint32_t blocksize = 512;
522 
523 #if defined(__linux__) && defined(BLKSSZGET)
524     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
525         return blocksize;
526     }
527 #endif
528 
529     return blocksize;
530 }
531 
532 static void
533 vub_initialize_config(int fd, struct virtio_blk_config *config)
534 {
535     off64_t capacity;
536 
537     capacity = lseek64(fd, 0, SEEK_END);
538     config->capacity = capacity >> 9;
539     config->blk_size = vub_get_blocksize(fd);
540     config->size_max = 65536;
541     config->seg_max = 128 - 2;
542     config->min_io_size = 1;
543     config->opt_io_size = 1;
544     config->num_queues = 1;
545     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
546     config->max_discard_sectors = 32768;
547     config->max_discard_seg = 1;
548     config->discard_sector_alignment = config->blk_size >> 9;
549     config->max_write_zeroes_sectors = 32768;
550     config->max_write_zeroes_seg = 1;
551     #endif
552 }
553 
554 static VubDev *
555 vub_new(char *blk_file)
556 {
557     VubDev *vdev_blk;
558 
559     vdev_blk = g_new0(VubDev, 1);
560     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
561     vdev_blk->blk_fd = vub_open(blk_file, 0);
562     if (vdev_blk->blk_fd  < 0) {
563         fprintf(stderr, "Error to open block device %s\n", blk_file);
564         vub_free(vdev_blk);
565         return NULL;
566     }
567     vdev_blk->enable_ro = false;
568     vdev_blk->blkcfg.wce = 0;
569     vdev_blk->blk_name = blk_file;
570 
571     /* fill virtio_blk_config with block parameters */
572     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
573 
574     return vdev_blk;
575 }
576 
577 static int opt_fdnum = -1;
578 static char *opt_socket_path;
579 static char *opt_blk_file;
580 static gboolean opt_print_caps;
581 static gboolean opt_read_only;
582 
583 static GOptionEntry entries[] = {
584     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
585       "Print capabilities", NULL },
586     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
587       "Use inherited fd socket", "FDNUM" },
588     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
589       "Use UNIX socket path", "PATH" },
590     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
591      "block device or file path", "PATH"},
592     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
593       "Enable read-only", NULL }
594 };
595 
596 int main(int argc, char **argv)
597 {
598     int lsock = -1, csock = -1;
599     VubDev *vdev_blk = NULL;
600     GError *error = NULL;
601     GOptionContext *context;
602 
603     context = g_option_context_new(NULL);
604     g_option_context_add_main_entries(context, entries, NULL);
605     if (!g_option_context_parse(context, &argc, &argv, &error)) {
606         g_printerr("Option parsing failed: %s\n", error->message);
607         exit(EXIT_FAILURE);
608     }
609     if (opt_print_caps) {
610         g_print("{\n");
611         g_print("  \"type\": \"block\",\n");
612         g_print("  \"features\": [\n");
613         g_print("    \"read-only\",\n");
614         g_print("    \"blk-file\"\n");
615         g_print("  ]\n");
616         g_print("}\n");
617         exit(EXIT_SUCCESS);
618     }
619 
620     if (!opt_blk_file) {
621         g_print("%s\n", g_option_context_get_help(context, true, NULL));
622         exit(EXIT_FAILURE);
623     }
624 
625     if (opt_socket_path) {
626         lsock = unix_sock_new(opt_socket_path);
627         if (lsock < 0) {
628             exit(EXIT_FAILURE);
629         }
630     } else if (opt_fdnum < 0) {
631         g_print("%s\n", g_option_context_get_help(context, true, NULL));
632         exit(EXIT_FAILURE);
633     } else {
634         lsock = opt_fdnum;
635     }
636 
637     csock = accept(lsock, NULL, NULL);
638     if (csock < 0) {
639         g_printerr("Accept error %s\n", strerror(errno));
640         exit(EXIT_FAILURE);
641     }
642 
643     vdev_blk = vub_new(opt_blk_file);
644     if (!vdev_blk) {
645         exit(EXIT_FAILURE);
646     }
647     if (opt_read_only) {
648         vdev_blk->enable_ro = true;
649     }
650 
651     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
652                   vub_panic_cb, &vub_iface)) {
653         g_printerr("Failed to initialize libvhost-user-glib\n");
654         exit(EXIT_FAILURE);
655     }
656 
657     g_main_loop_run(vdev_blk->loop);
658     g_main_loop_unref(vdev_blk->loop);
659     g_option_context_free(context);
660     vug_deinit(&vdev_blk->parent);
661     vub_free(vdev_blk);
662     if (csock >= 0) {
663         close(csock);
664     }
665     if (lsock >= 0) {
666         close(lsock);
667     }
668     g_free(opt_socket_path);
669     g_free(opt_blk_file);
670 
671     return 0;
672 }
673