xref: /qemu/hw/block/xen-block.c (revision ec6f3fc3)
1 /*
2  * Copyright (c) 2018  Citrix Systems Inc.
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7 
8 #include "qemu/osdep.h"
9 #include "qemu/cutils.h"
10 #include "qemu/main-loop.h"
11 #include "qemu/module.h"
12 #include "qemu/option.h"
13 #include "qapi/error.h"
14 #include "qapi/qapi-commands-block-core.h"
15 #include "qapi/qapi-commands-qom.h"
16 #include "qapi/qapi-visit-block-core.h"
17 #include "qapi/qobject-input-visitor.h"
18 #include "qapi/visitor.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qstring.h"
21 #include "qom/object_interfaces.h"
22 #include "hw/block/xen_blkif.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/xen/xen-block.h"
25 #include "hw/xen/xen-backend.h"
26 #include "sysemu/blockdev.h"
27 #include "sysemu/block-backend.h"
28 #include "sysemu/iothread.h"
29 #include "dataplane/xen-block.h"
30 #include "hw/xen/interface/io/xs_wire.h"
31 #include "trace.h"
32 
33 #define XVDA_MAJOR 202
34 #define XVDQ_MAJOR (1 << 20)
35 #define XVDBGQCV_MAJOR ((1 << 21) - 1)
36 #define HDA_MAJOR 3
37 #define HDC_MAJOR 22
38 #define SDA_MAJOR 8
39 
40 
41 static int vdev_to_diskno(unsigned int vdev_nr)
42 {
43     switch (vdev_nr >> 8) {
44     case XVDA_MAJOR:
45     case SDA_MAJOR:
46         return (vdev_nr >> 4) & 0x15;
47 
48     case HDA_MAJOR:
49         return (vdev_nr >> 6) & 1;
50 
51     case HDC_MAJOR:
52         return ((vdev_nr >> 6) & 1) + 2;
53 
54     case XVDQ_MAJOR ... XVDBGQCV_MAJOR:
55         return (vdev_nr >> 8) & 0xfffff;
56 
57     default:
58         return -1;
59     }
60 }
61 
62 #define MAX_AUTO_VDEV 4096
63 
64 /*
65  * Find a free device name in the xvda → xvdfan range and set it in
66  * blockdev->props.vdev. Our definition of "free" is that there must
67  * be no other disk or partition with the same disk number.
68  *
69  * You are technically permitted to have all of hda, hda1, sda, sda1,
70  * xvda and xvda1 as *separate* PV block devices with separate backing
71  * stores. That doesn't make it a good idea. This code will skip xvda
72  * if *any* of those "conflicting" devices already exists.
73  *
74  * The limit of xvdfan (disk 4095) is fairly arbitrary just to avoid a
75  * stupidly sized bitmap, but Linux as of v6.6 doesn't support anything
76  * higher than that anyway.
77  */
78 static bool xen_block_find_free_vdev(XenBlockDevice *blockdev, Error **errp)
79 {
80     XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(blockdev)));
81     unsigned long used_devs[BITS_TO_LONGS(MAX_AUTO_VDEV)];
82     XenBlockVdev *vdev = &blockdev->props.vdev;
83     char fe_path[XENSTORE_ABS_PATH_MAX + 1];
84     char **existing_frontends;
85     unsigned int nr_existing = 0;
86     unsigned int vdev_nr;
87     int i, disk = 0;
88 
89     snprintf(fe_path, sizeof(fe_path), "/local/domain/%u/device/vbd",
90              blockdev->xendev.frontend_id);
91 
92     existing_frontends = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, fe_path,
93                                                &nr_existing);
94     if (!existing_frontends && errno != ENOENT) {
95         error_setg_errno(errp, errno, "cannot read %s", fe_path);
96         return false;
97     }
98 
99     memset(used_devs, 0, sizeof(used_devs));
100     for (i = 0; i < nr_existing; i++) {
101         if (qemu_strtoui(existing_frontends[i], NULL, 10, &vdev_nr)) {
102             free(existing_frontends[i]);
103             continue;
104         }
105 
106         free(existing_frontends[i]);
107 
108         disk = vdev_to_diskno(vdev_nr);
109         if (disk < 0 || disk >= MAX_AUTO_VDEV) {
110             continue;
111         }
112 
113         set_bit(disk, used_devs);
114     }
115     free(existing_frontends);
116 
117     disk = find_first_zero_bit(used_devs, MAX_AUTO_VDEV);
118     if (disk == MAX_AUTO_VDEV) {
119         error_setg(errp, "cannot find device vdev for block device");
120         return false;
121     }
122 
123     vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
124     vdev->partition = 0;
125     vdev->disk = disk;
126     if (disk < (1 << 4)) {
127         vdev->number = (XVDA_MAJOR << 8) | (disk << 4);
128     } else {
129         vdev->number = (XVDQ_MAJOR << 8) | (disk << 8);
130     }
131     return true;
132 }
133 
134 static char *xen_block_get_name(XenDevice *xendev, Error **errp)
135 {
136     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
137     XenBlockVdev *vdev = &blockdev->props.vdev;
138 
139     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID &&
140         !xen_block_find_free_vdev(blockdev, errp)) {
141         return NULL;
142     }
143     return g_strdup_printf("%lu", vdev->number);
144 }
145 
146 static void xen_block_disconnect(XenDevice *xendev, Error **errp)
147 {
148     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
149     const char *type = object_get_typename(OBJECT(blockdev));
150     XenBlockVdev *vdev = &blockdev->props.vdev;
151 
152     trace_xen_block_disconnect(type, vdev->disk, vdev->partition);
153 
154     xen_block_dataplane_stop(blockdev->dataplane);
155 }
156 
157 static void xen_block_connect(XenDevice *xendev, Error **errp)
158 {
159     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
160     const char *type = object_get_typename(OBJECT(blockdev));
161     XenBlockVdev *vdev = &blockdev->props.vdev;
162     BlockConf *conf = &blockdev->props.conf;
163     unsigned int feature_large_sector_size;
164     unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol;
165     char *str;
166 
167     trace_xen_block_connect(type, vdev->disk, vdev->partition);
168 
169     if (xen_device_frontend_scanf(xendev, "feature-large-sector-size", "%u",
170                                   &feature_large_sector_size) != 1) {
171         feature_large_sector_size = 0;
172     }
173 
174     if (feature_large_sector_size != 1 &&
175         conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
176         error_setg(errp, "logical_block_size != %u not supported by frontend",
177                    XEN_BLKIF_SECTOR_SIZE);
178         return;
179     }
180 
181     if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
182                                   &order) != 1) {
183         nr_ring_ref = 1;
184         ring_ref = g_new(unsigned int, nr_ring_ref);
185 
186         if (xen_device_frontend_scanf(xendev, "ring-ref", "%u",
187                                       &ring_ref[0]) != 1) {
188             error_setg(errp, "failed to read ring-ref");
189             g_free(ring_ref);
190             return;
191         }
192     } else if (qemu_xen_gnttab_can_map_multi() &&
193                order <= blockdev->props.max_ring_page_order) {
194         unsigned int i;
195 
196         nr_ring_ref = 1 << order;
197         ring_ref = g_new(unsigned int, nr_ring_ref);
198 
199         for (i = 0; i < nr_ring_ref; i++) {
200             const char *key = g_strdup_printf("ring-ref%u", i);
201 
202             if (xen_device_frontend_scanf(xendev, key, "%u",
203                                           &ring_ref[i]) != 1) {
204                 error_setg(errp, "failed to read %s", key);
205                 g_free((gpointer)key);
206                 g_free(ring_ref);
207                 return;
208             }
209 
210             g_free((gpointer)key);
211         }
212     } else {
213         error_setg(errp, "invalid ring-page-order (%d)", order);
214         return;
215     }
216 
217     if (xen_device_frontend_scanf(xendev, "event-channel", "%u",
218                                   &event_channel) != 1) {
219         error_setg(errp, "failed to read event-channel");
220         g_free(ring_ref);
221         return;
222     }
223 
224     if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
225         /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
226         if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
227             protocol = BLKIF_PROTOCOL_X86_32;
228         } else {
229             protocol = BLKIF_PROTOCOL_NATIVE;
230         }
231     } else {
232         if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
233             protocol = BLKIF_PROTOCOL_X86_32;
234         } else if (strcmp(str, XEN_IO_PROTO_ABI_X86_64) == 0) {
235             protocol = BLKIF_PROTOCOL_X86_64;
236         } else {
237             protocol = BLKIF_PROTOCOL_NATIVE;
238         }
239 
240         free(str);
241     }
242 
243     xen_block_dataplane_start(blockdev->dataplane, ring_ref, nr_ring_ref,
244                               event_channel, protocol, errp);
245 
246     g_free(ring_ref);
247 }
248 
249 static void xen_block_unrealize(XenDevice *xendev)
250 {
251     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
252     XenBlockDeviceClass *blockdev_class =
253         XEN_BLOCK_DEVICE_GET_CLASS(xendev);
254     const char *type = object_get_typename(OBJECT(blockdev));
255     XenBlockVdev *vdev = &blockdev->props.vdev;
256 
257     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
258         return;
259     }
260 
261     trace_xen_block_unrealize(type, vdev->disk, vdev->partition);
262 
263     /* Disconnect from the frontend in case this has not already happened */
264     xen_block_disconnect(xendev, NULL);
265 
266     xen_block_dataplane_destroy(blockdev->dataplane);
267     blockdev->dataplane = NULL;
268 
269     if (blockdev_class->unrealize) {
270         blockdev_class->unrealize(blockdev);
271     }
272 }
273 
274 static void xen_block_set_size(XenBlockDevice *blockdev)
275 {
276     const char *type = object_get_typename(OBJECT(blockdev));
277     XenBlockVdev *vdev = &blockdev->props.vdev;
278     BlockConf *conf = &blockdev->props.conf;
279     int64_t sectors = blk_getlength(conf->blk) / conf->logical_block_size;
280     XenDevice *xendev = XEN_DEVICE(blockdev);
281 
282     trace_xen_block_size(type, vdev->disk, vdev->partition, sectors);
283 
284     xen_device_backend_printf(xendev, "sectors", "%"PRIi64, sectors);
285 }
286 
287 static void xen_block_resize_cb(void *opaque)
288 {
289     XenBlockDevice *blockdev = opaque;
290     XenDevice *xendev = XEN_DEVICE(blockdev);
291     enum xenbus_state state = xen_device_backend_get_state(xendev);
292 
293     xen_block_set_size(blockdev);
294 
295     /*
296      * Mimic the behaviour of Linux xen-blkback and re-write the state
297      * to trigger the frontend watch.
298      */
299     xen_device_backend_printf(xendev, "state", "%u", state);
300 }
301 
302 /* Suspend request handling */
303 static void xen_block_drained_begin(void *opaque)
304 {
305     XenBlockDevice *blockdev = opaque;
306 
307     xen_block_dataplane_detach(blockdev->dataplane);
308 }
309 
310 /* Resume request handling */
311 static void xen_block_drained_end(void *opaque)
312 {
313     XenBlockDevice *blockdev = opaque;
314 
315     xen_block_dataplane_attach(blockdev->dataplane);
316 }
317 
318 static const BlockDevOps xen_block_dev_ops = {
319     .resize_cb     = xen_block_resize_cb,
320     .drained_begin = xen_block_drained_begin,
321     .drained_end   = xen_block_drained_end,
322 };
323 
324 static void xen_block_realize(XenDevice *xendev, Error **errp)
325 {
326     ERRP_GUARD();
327     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
328     XenBlockDeviceClass *blockdev_class =
329         XEN_BLOCK_DEVICE_GET_CLASS(xendev);
330     const char *type = object_get_typename(OBJECT(blockdev));
331     XenBlockVdev *vdev = &blockdev->props.vdev;
332     BlockConf *conf = &blockdev->props.conf;
333     BlockBackend *blk = conf->blk;
334 
335     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
336         error_setg(errp, "vdev property not set");
337         return;
338     }
339 
340     trace_xen_block_realize(type, vdev->disk, vdev->partition);
341 
342     if (blockdev_class->realize) {
343         blockdev_class->realize(blockdev, errp);
344         if (*errp) {
345             return;
346         }
347     }
348 
349     /*
350      * The blkif protocol does not deal with removable media, so it must
351      * always be present, even for CDRom devices.
352      */
353     assert(blk);
354     if (!blk_is_inserted(blk)) {
355         error_setg(errp, "device needs media, but drive is empty");
356         return;
357     }
358 
359     if (!blkconf_apply_backend_options(conf, blockdev->info & VDISK_READONLY,
360                                        true, errp)) {
361         return;
362     }
363 
364     if (!(blockdev->info & VDISK_CDROM) &&
365         !blkconf_geometry(conf, NULL, 65535, 255, 255, errp)) {
366         return;
367     }
368 
369     if (!blkconf_blocksizes(conf, errp)) {
370         return;
371     }
372 
373     if (conf->discard_granularity == -1) {
374         conf->discard_granularity = conf->physical_block_size;
375     }
376 
377     if (blk_get_flags(blk) & BDRV_O_UNMAP) {
378         xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
379         xen_device_backend_printf(xendev, "discard-granularity", "%u",
380                                   conf->discard_granularity);
381         xen_device_backend_printf(xendev, "discard-alignment", "%u", 0);
382     }
383 
384     xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
385 
386     if (qemu_xen_gnttab_can_map_multi()) {
387         xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
388                                   blockdev->props.max_ring_page_order);
389     }
390 
391     xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
392 
393     xen_device_frontend_printf(xendev, "virtual-device", "%lu",
394                                vdev->number);
395     xen_device_frontend_printf(xendev, "device-type", "%s",
396                                blockdev->device_type);
397 
398     xen_device_backend_printf(xendev, "sector-size", "%u",
399                               conf->logical_block_size);
400 
401     xen_block_set_size(blockdev);
402 
403     blockdev->dataplane =
404         xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
405                                    blockdev->props.iothread);
406 
407     blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
408 }
409 
410 static void xen_block_frontend_changed(XenDevice *xendev,
411                                        enum xenbus_state frontend_state,
412                                        Error **errp)
413 {
414     ERRP_GUARD();
415     enum xenbus_state backend_state = xen_device_backend_get_state(xendev);
416 
417     switch (frontend_state) {
418     case XenbusStateInitialised:
419     case XenbusStateConnected:
420         if (backend_state == XenbusStateConnected) {
421             break;
422         }
423 
424         xen_block_disconnect(xendev, errp);
425         if (*errp) {
426             break;
427         }
428 
429         xen_block_connect(xendev, errp);
430         if (*errp) {
431             break;
432         }
433 
434         xen_device_backend_set_state(xendev, XenbusStateConnected);
435         break;
436 
437     case XenbusStateClosing:
438         xen_device_backend_set_state(xendev, XenbusStateClosing);
439         break;
440 
441     case XenbusStateClosed:
442     case XenbusStateUnknown:
443         xen_block_disconnect(xendev, errp);
444         if (*errp) {
445             break;
446         }
447 
448         xen_device_backend_set_state(xendev, XenbusStateClosed);
449         break;
450 
451     default:
452         break;
453     }
454 }
455 
456 static char *disk_to_vbd_name(unsigned int disk)
457 {
458     char *name, *prefix = (disk >= 26) ?
459         disk_to_vbd_name((disk / 26) - 1) : g_strdup("");
460 
461     name = g_strdup_printf("%s%c", prefix, 'a' + disk % 26);
462     g_free(prefix);
463 
464     return name;
465 }
466 
467 static void xen_block_get_vdev(Object *obj, Visitor *v, const char *name,
468                                void *opaque, Error **errp)
469 {
470     Property *prop = opaque;
471     XenBlockVdev *vdev = object_field_prop_ptr(obj, prop);
472     char *str;
473 
474     switch (vdev->type) {
475     case XEN_BLOCK_VDEV_TYPE_DP:
476         str = g_strdup_printf("d%lup%lu", vdev->disk, vdev->partition);
477         break;
478 
479     case XEN_BLOCK_VDEV_TYPE_XVD:
480     case XEN_BLOCK_VDEV_TYPE_HD:
481     case XEN_BLOCK_VDEV_TYPE_SD: {
482         char *vbd_name = disk_to_vbd_name(vdev->disk);
483 
484         str = g_strdup_printf("%s%s%lu",
485                               (vdev->type == XEN_BLOCK_VDEV_TYPE_XVD) ?
486                               "xvd" :
487                               (vdev->type == XEN_BLOCK_VDEV_TYPE_HD) ?
488                               "hd" :
489                               "sd",
490                               vbd_name, vdev->partition);
491         g_free(vbd_name);
492         break;
493     }
494     default:
495         error_setg(errp, "invalid vdev type");
496         return;
497     }
498 
499     visit_type_str(v, name, &str, errp);
500     g_free(str);
501 }
502 
503 static int vbd_name_to_disk(const char *name, const char **endp,
504                             unsigned long *disk)
505 {
506     unsigned int n = 0;
507 
508     while (*name != '\0') {
509         if (!g_ascii_isalpha(*name) || !g_ascii_islower(*name)) {
510             break;
511         }
512 
513         n *= 26;
514         n += *name++ - 'a' + 1;
515     }
516     *endp = name;
517 
518     if (!n) {
519         return -1;
520     }
521 
522     *disk = n - 1;
523 
524     return 0;
525 }
526 
527 static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
528                                void *opaque, Error **errp)
529 {
530     Property *prop = opaque;
531     XenBlockVdev *vdev = object_field_prop_ptr(obj, prop);
532     char *str, *p;
533     const char *end;
534 
535     if (!visit_type_str(v, name, &str, errp)) {
536         return;
537     }
538 
539     p = strchr(str, 'd');
540     if (!p) {
541         goto invalid;
542     }
543 
544     *p++ = '\0';
545     if (*str == '\0') {
546         vdev->type = XEN_BLOCK_VDEV_TYPE_DP;
547     } else if (strcmp(str, "xv") == 0) {
548         vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
549     } else if (strcmp(str, "h") == 0) {
550         vdev->type = XEN_BLOCK_VDEV_TYPE_HD;
551     } else if (strcmp(str, "s") == 0) {
552         vdev->type = XEN_BLOCK_VDEV_TYPE_SD;
553     } else {
554         goto invalid;
555     }
556 
557     if (vdev->type == XEN_BLOCK_VDEV_TYPE_DP) {
558         if (qemu_strtoul(p, &end, 10, &vdev->disk)) {
559             goto invalid;
560         }
561 
562         if (*end == 'p') {
563             if (*(++end) == '\0') {
564                 goto invalid;
565             }
566         }
567     } else {
568         if (vbd_name_to_disk(p, &end, &vdev->disk)) {
569             goto invalid;
570         }
571     }
572 
573     if (*end != '\0') {
574         p = (char *)end;
575 
576         if (qemu_strtoul(p, &end, 10, &vdev->partition)) {
577             goto invalid;
578         }
579 
580         if (*end != '\0') {
581             goto invalid;
582         }
583     } else {
584         vdev->partition = 0;
585     }
586 
587     switch (vdev->type) {
588     case XEN_BLOCK_VDEV_TYPE_DP:
589     case XEN_BLOCK_VDEV_TYPE_XVD:
590         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
591             vdev->number = (XVDA_MAJOR << 8) | (vdev->disk << 4) |
592                 vdev->partition;
593         } else if (vdev->disk < (1 << 20) && vdev->partition < (1 << 8)) {
594             vdev->number = (XVDQ_MAJOR << 8) | (vdev->disk << 8) |
595                 vdev->partition;
596         } else {
597             goto invalid;
598         }
599         break;
600 
601     case XEN_BLOCK_VDEV_TYPE_HD:
602         if ((vdev->disk == 0 || vdev->disk == 1) &&
603             vdev->partition < (1 << 6)) {
604             vdev->number = (HDA_MAJOR << 8) | (vdev->disk << 6) |
605                 vdev->partition;
606         } else if ((vdev->disk == 2 || vdev->disk == 3) &&
607                    vdev->partition < (1 << 6)) {
608             vdev->number = (HDC_MAJOR << 8) | ((vdev->disk - 2) << 6) |
609                 vdev->partition;
610         } else {
611             goto invalid;
612         }
613         break;
614 
615     case XEN_BLOCK_VDEV_TYPE_SD:
616         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
617             vdev->number = (SDA_MAJOR << 8) | (vdev->disk << 4) |
618                 vdev->partition;
619         } else {
620             goto invalid;
621         }
622         break;
623 
624     default:
625         goto invalid;
626     }
627 
628     g_free(str);
629     return;
630 
631 invalid:
632     error_setg(errp, "invalid virtual disk specifier");
633 
634     vdev->type = XEN_BLOCK_VDEV_TYPE_INVALID;
635     g_free(str);
636 }
637 
638 /*
639  * This property deals with 'vdev' names adhering to the Xen VBD naming
640  * scheme described in:
641  *
642  * https://xenbits.xen.org/docs/unstable/man/xen-vbd-interface.7.html
643  */
644 const PropertyInfo xen_block_prop_vdev = {
645     .name  = "str",
646     .description = "Virtual Disk specifier: d*p*/xvd*/hd*/sd*",
647     .get = xen_block_get_vdev,
648     .set = xen_block_set_vdev,
649 };
650 
651 static Property xen_block_props[] = {
652     DEFINE_PROP("vdev", XenBlockDevice, props.vdev,
653                 xen_block_prop_vdev, XenBlockVdev),
654     DEFINE_BLOCK_PROPERTIES(XenBlockDevice, props.conf),
655     DEFINE_PROP_UINT32("max-ring-page-order", XenBlockDevice,
656                        props.max_ring_page_order, 4),
657     DEFINE_PROP_LINK("iothread", XenBlockDevice, props.iothread,
658                      TYPE_IOTHREAD, IOThread *),
659     DEFINE_PROP_END_OF_LIST()
660 };
661 
662 static void xen_block_class_init(ObjectClass *class, void *data)
663 {
664     DeviceClass *dev_class = DEVICE_CLASS(class);
665     XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
666 
667     xendev_class->backend = "qdisk";
668     xendev_class->device = "vbd";
669     xendev_class->get_name = xen_block_get_name;
670     xendev_class->realize = xen_block_realize;
671     xendev_class->frontend_changed = xen_block_frontend_changed;
672     xendev_class->unrealize = xen_block_unrealize;
673 
674     device_class_set_props(dev_class, xen_block_props);
675 }
676 
677 static const TypeInfo xen_block_type_info = {
678     .name = TYPE_XEN_BLOCK_DEVICE,
679     .parent = TYPE_XEN_DEVICE,
680     .instance_size = sizeof(XenBlockDevice),
681     .abstract = true,
682     .class_size = sizeof(XenBlockDeviceClass),
683     .class_init = xen_block_class_init,
684 };
685 
686 static void xen_disk_unrealize(XenBlockDevice *blockdev)
687 {
688     trace_xen_disk_unrealize();
689 }
690 
691 static void xen_disk_realize(XenBlockDevice *blockdev, Error **errp)
692 {
693     BlockConf *conf = &blockdev->props.conf;
694 
695     trace_xen_disk_realize();
696 
697     blockdev->device_type = "disk";
698 
699     if (!conf->blk) {
700         error_setg(errp, "drive property not set");
701         return;
702     }
703 
704     blockdev->info = blk_supports_write_perm(conf->blk) ? 0 : VDISK_READONLY;
705 }
706 
707 static void xen_disk_class_init(ObjectClass *class, void *data)
708 {
709     DeviceClass *dev_class = DEVICE_CLASS(class);
710     XenBlockDeviceClass *blockdev_class = XEN_BLOCK_DEVICE_CLASS(class);
711 
712     blockdev_class->realize = xen_disk_realize;
713     blockdev_class->unrealize = xen_disk_unrealize;
714 
715     dev_class->desc = "Xen Disk Device";
716 }
717 
718 static const TypeInfo xen_disk_type_info = {
719     .name = TYPE_XEN_DISK_DEVICE,
720     .parent = TYPE_XEN_BLOCK_DEVICE,
721     .instance_size = sizeof(XenDiskDevice),
722     .class_init = xen_disk_class_init,
723 };
724 
725 static void xen_cdrom_unrealize(XenBlockDevice *blockdev)
726 {
727     trace_xen_cdrom_unrealize();
728 }
729 
730 static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp)
731 {
732     BlockConf *conf = &blockdev->props.conf;
733 
734     trace_xen_cdrom_realize();
735 
736     blockdev->device_type = "cdrom";
737 
738     if (!conf->blk) {
739         int rc;
740 
741         /* Set up an empty drive */
742         conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
743 
744         rc = blk_attach_dev(conf->blk, DEVICE(blockdev));
745         if (!rc) {
746             error_setg_errno(errp, -rc, "failed to create drive");
747             return;
748         }
749     }
750 
751     blockdev->info = VDISK_READONLY | VDISK_CDROM;
752 }
753 
754 static void xen_cdrom_class_init(ObjectClass *class, void *data)
755 {
756     DeviceClass *dev_class = DEVICE_CLASS(class);
757     XenBlockDeviceClass *blockdev_class = XEN_BLOCK_DEVICE_CLASS(class);
758 
759     blockdev_class->realize = xen_cdrom_realize;
760     blockdev_class->unrealize = xen_cdrom_unrealize;
761 
762     dev_class->desc = "Xen CD-ROM Device";
763 }
764 
765 static const TypeInfo xen_cdrom_type_info = {
766     .name = TYPE_XEN_CDROM_DEVICE,
767     .parent = TYPE_XEN_BLOCK_DEVICE,
768     .instance_size = sizeof(XenCDRomDevice),
769     .class_init = xen_cdrom_class_init,
770 };
771 
772 static void xen_block_register_types(void)
773 {
774     type_register_static(&xen_block_type_info);
775     type_register_static(&xen_disk_type_info);
776     type_register_static(&xen_cdrom_type_info);
777 }
778 
779 type_init(xen_block_register_types)
780 
781 static void xen_block_blockdev_del(const char *node_name, Error **errp)
782 {
783     trace_xen_block_blockdev_del(node_name);
784 
785     qmp_blockdev_del(node_name, errp);
786 }
787 
788 static char *xen_block_blockdev_add(const char *id, QDict *qdict,
789                                     Error **errp)
790 {
791     ERRP_GUARD();
792     const char *driver = qdict_get_try_str(qdict, "driver");
793     BlockdevOptions *options = NULL;
794     char *node_name;
795     Visitor *v;
796 
797     if (!driver) {
798         error_setg(errp, "no 'driver' parameter");
799         return NULL;
800     }
801 
802     node_name = g_strdup_printf("%s-%s", id, driver);
803     qdict_put_str(qdict, "node-name", node_name);
804 
805     trace_xen_block_blockdev_add(node_name);
806 
807     v = qobject_input_visitor_new(QOBJECT(qdict));
808     visit_type_BlockdevOptions(v, NULL, &options, errp);
809     visit_free(v);
810     if (!options) {
811         goto fail;
812     }
813 
814     qmp_blockdev_add(options, errp);
815 
816     if (*errp) {
817         goto fail;
818     }
819 
820     qapi_free_BlockdevOptions(options);
821 
822     return node_name;
823 
824 fail:
825     if (options) {
826         qapi_free_BlockdevOptions(options);
827     }
828     g_free(node_name);
829 
830     return NULL;
831 }
832 
833 static void xen_block_drive_destroy(XenBlockDrive *drive, Error **errp)
834 {
835     ERRP_GUARD();
836     char *node_name = drive->node_name;
837 
838     if (node_name) {
839         xen_block_blockdev_del(node_name, errp);
840         if (*errp) {
841             return;
842         }
843         g_free(node_name);
844         drive->node_name = NULL;
845     }
846     g_free(drive->id);
847     g_free(drive);
848 }
849 
850 static XenBlockDrive *xen_block_drive_create(const char *id,
851                                              const char *device_type,
852                                              QDict *opts, Error **errp)
853 {
854     ERRP_GUARD();
855     const char *params = qdict_get_try_str(opts, "params");
856     const char *mode = qdict_get_try_str(opts, "mode");
857     const char *direct_io_safe = qdict_get_try_str(opts, "direct-io-safe");
858     const char *discard_enable = qdict_get_try_str(opts, "discard-enable");
859     char *driver = NULL;
860     char *filename = NULL;
861     XenBlockDrive *drive = NULL;
862     QDict *file_layer;
863     QDict *driver_layer;
864     struct stat st;
865     int rc;
866 
867     if (params) {
868         char **v = g_strsplit(params, ":", 2);
869 
870         if (v[1] == NULL) {
871             filename = g_strdup(v[0]);
872             driver = g_strdup("raw");
873         } else {
874             if (strcmp(v[0], "aio") == 0) {
875                 driver = g_strdup("raw");
876             } else if (strcmp(v[0], "vhd") == 0) {
877                 driver = g_strdup("vpc");
878             } else {
879                 driver = g_strdup(v[0]);
880             }
881             filename = g_strdup(v[1]);
882         }
883 
884         g_strfreev(v);
885     } else {
886         error_setg(errp, "no params");
887         goto done;
888     }
889 
890     assert(filename);
891     assert(driver);
892 
893     drive = g_new0(XenBlockDrive, 1);
894     drive->id = g_strdup(id);
895 
896     rc = stat(filename, &st);
897     if (rc) {
898         error_setg_errno(errp, errno, "Could not stat file '%s'", filename);
899         goto done;
900     }
901 
902     file_layer = qdict_new();
903     driver_layer = qdict_new();
904 
905     if (S_ISBLK(st.st_mode)) {
906         qdict_put_str(file_layer, "driver", "host_device");
907     } else {
908         qdict_put_str(file_layer, "driver", "file");
909     }
910 
911     qdict_put_str(file_layer, "filename", filename);
912 
913     if (mode && *mode != 'w') {
914         qdict_put_bool(file_layer, "read-only", true);
915     }
916 
917     if (direct_io_safe) {
918         unsigned long value;
919 
920         if (!qemu_strtoul(direct_io_safe, NULL, 2, &value) && !!value) {
921             QDict *cache_qdict = qdict_new();
922 
923             qdict_put_bool(cache_qdict, "direct", true);
924             qdict_put(file_layer, "cache", cache_qdict);
925 
926             qdict_put_str(file_layer, "aio", "native");
927         }
928     }
929 
930     if (discard_enable) {
931         unsigned long value;
932 
933         if (!qemu_strtoul(discard_enable, NULL, 2, &value) && !!value) {
934             qdict_put_str(file_layer, "discard", "unmap");
935             qdict_put_str(driver_layer, "discard", "unmap");
936         }
937     }
938 
939     /*
940      * It is necessary to turn file locking off as an emulated device
941      * may have already opened the same image file.
942      */
943     qdict_put_str(file_layer, "locking", "off");
944 
945     qdict_put_str(driver_layer, "driver", driver);
946 
947     qdict_put(driver_layer, "file", file_layer);
948 
949     g_assert(!drive->node_name);
950     drive->node_name = xen_block_blockdev_add(drive->id, driver_layer,
951                                               errp);
952 
953     qobject_unref(driver_layer);
954 
955 done:
956     g_free(filename);
957     g_free(driver);
958     if (*errp) {
959         xen_block_drive_destroy(drive, NULL);
960         return NULL;
961     }
962 
963     return drive;
964 }
965 
966 static const char *xen_block_drive_get_node_name(XenBlockDrive *drive)
967 {
968     return drive->node_name ? drive->node_name : "";
969 }
970 
971 static void xen_block_iothread_destroy(XenBlockIOThread *iothread,
972                                        Error **errp)
973 {
974     qmp_object_del(iothread->id, errp);
975 
976     g_free(iothread->id);
977     g_free(iothread);
978 }
979 
980 static XenBlockIOThread *xen_block_iothread_create(const char *id,
981                                                    Error **errp)
982 {
983     ERRP_GUARD();
984     XenBlockIOThread *iothread = g_new(XenBlockIOThread, 1);
985     ObjectOptions *opts;
986 
987     iothread->id = g_strdup(id);
988 
989     opts = g_new(ObjectOptions, 1);
990     *opts = (ObjectOptions) {
991         .qom_type = OBJECT_TYPE_IOTHREAD,
992         .id = g_strdup(id),
993     };
994     qmp_object_add(opts, errp);
995     qapi_free_ObjectOptions(opts);
996 
997     if (*errp) {
998         g_free(iothread->id);
999         g_free(iothread);
1000         return NULL;
1001     }
1002 
1003     return iothread;
1004 }
1005 
1006 static void xen_block_device_create(XenBackendInstance *backend,
1007                                     QDict *opts, Error **errp)
1008 {
1009     ERRP_GUARD();
1010     XenBus *xenbus = xen_backend_get_bus(backend);
1011     const char *name = xen_backend_get_name(backend);
1012     unsigned long number;
1013     const char *vdev, *device_type;
1014     XenBlockDrive *drive = NULL;
1015     XenBlockIOThread *iothread = NULL;
1016     XenDevice *xendev = NULL;
1017     const char *type;
1018     XenBlockDevice *blockdev;
1019 
1020     if (qemu_strtoul(name, NULL, 10, &number)) {
1021         error_setg(errp, "failed to parse name '%s'", name);
1022         goto fail;
1023     }
1024 
1025     trace_xen_block_device_create(number);
1026 
1027     vdev = qdict_get_try_str(opts, "dev");
1028     if (!vdev) {
1029         error_setg(errp, "no dev parameter");
1030         goto fail;
1031     }
1032 
1033     device_type = qdict_get_try_str(opts, "device-type");
1034     if (!device_type) {
1035         error_setg(errp, "no device-type parameter");
1036         goto fail;
1037     }
1038 
1039     if (!strcmp(device_type, "disk")) {
1040         type = TYPE_XEN_DISK_DEVICE;
1041     } else if (!strcmp(device_type, "cdrom")) {
1042         type = TYPE_XEN_CDROM_DEVICE;
1043     } else {
1044         error_setg(errp, "invalid device-type parameter '%s'", device_type);
1045         goto fail;
1046     }
1047 
1048     drive = xen_block_drive_create(vdev, device_type, opts, errp);
1049     if (!drive) {
1050         error_prepend(errp, "failed to create drive: ");
1051         goto fail;
1052     }
1053 
1054     iothread = xen_block_iothread_create(vdev, errp);
1055     if (*errp) {
1056         error_prepend(errp, "failed to create iothread: ");
1057         goto fail;
1058     }
1059 
1060     xendev = XEN_DEVICE(qdev_new(type));
1061     blockdev = XEN_BLOCK_DEVICE(xendev);
1062 
1063     if (!object_property_set_str(OBJECT(xendev), "vdev", vdev,
1064                                  errp)) {
1065         error_prepend(errp, "failed to set 'vdev': ");
1066         goto fail;
1067     }
1068 
1069     if (!object_property_set_str(OBJECT(xendev), "drive",
1070                                  xen_block_drive_get_node_name(drive),
1071                                  errp)) {
1072         error_prepend(errp, "failed to set 'drive': ");
1073         goto fail;
1074     }
1075 
1076     if (!object_property_set_str(OBJECT(xendev), "iothread", iothread->id,
1077                                  errp)) {
1078         error_prepend(errp, "failed to set 'iothread': ");
1079         goto fail;
1080     }
1081 
1082     blockdev->iothread = iothread;
1083     blockdev->drive = drive;
1084 
1085     if (!qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) {
1086         error_prepend(errp, "realization of device %s failed: ", type);
1087         goto fail;
1088     }
1089 
1090     xen_backend_set_device(backend, xendev);
1091     return;
1092 
1093 fail:
1094     if (xendev) {
1095         object_unparent(OBJECT(xendev));
1096     }
1097 
1098     if (iothread) {
1099         xen_block_iothread_destroy(iothread, NULL);
1100     }
1101 
1102     if (drive) {
1103         xen_block_drive_destroy(drive, NULL);
1104     }
1105 }
1106 
1107 static void xen_block_device_destroy(XenBackendInstance *backend,
1108                                      Error **errp)
1109 {
1110     ERRP_GUARD();
1111     XenDevice *xendev = xen_backend_get_device(backend);
1112     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
1113     XenBlockVdev *vdev = &blockdev->props.vdev;
1114     XenBlockDrive *drive = blockdev->drive;
1115     XenBlockIOThread *iothread = blockdev->iothread;
1116 
1117     trace_xen_block_device_destroy(vdev->number);
1118 
1119     object_unparent(OBJECT(xendev));
1120 
1121     /*
1122      * Drain all pending RCU callbacks as object_unparent() frees `xendev'
1123      * in a RCU callback.
1124      * And due to the property "drive" still existing in `xendev', we
1125      * can't destroy the XenBlockDrive associated with `xendev' with
1126      * xen_block_drive_destroy() below.
1127      */
1128     drain_call_rcu();
1129 
1130     if (iothread) {
1131         xen_block_iothread_destroy(iothread, errp);
1132         if (*errp) {
1133             error_prepend(errp, "failed to destroy iothread: ");
1134             return;
1135         }
1136     }
1137 
1138     if (drive) {
1139         xen_block_drive_destroy(drive, errp);
1140         if (*errp) {
1141             error_prepend(errp, "failed to destroy drive: ");
1142             return;
1143         }
1144     }
1145 }
1146 
1147 static const XenBackendInfo xen_block_backend_info = {
1148     .type = "qdisk",
1149     .create = xen_block_device_create,
1150     .destroy = xen_block_device_destroy,
1151 };
1152 
1153 static void xen_block_register_backend(void)
1154 {
1155     xen_backend_register(&xen_block_backend_info);
1156 }
1157 
1158 xen_backend_init(xen_block_register_backend);
1159