xref: /qemu/block.c (revision 75fb3d28)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
43 
44 #ifdef CONFIG_BSD
45 #include <sys/ioctl.h>
46 #include <sys/queue.h>
47 #ifndef __DragonFly__
48 #include <sys/disk.h>
49 #endif
50 #endif
51 
52 #ifdef _WIN32
53 #include <windows.h>
54 #endif
55 
56 /**
57  * A BdrvDirtyBitmap can be in three possible states:
58  * (1) successor is NULL and disabled is false: full r/w mode
59  * (2) successor is NULL and disabled is true: read only mode ("disabled")
60  * (3) successor is set: frozen mode.
61  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
62  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
63  */
64 struct BdrvDirtyBitmap {
65     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
66     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
67     char *name;                 /* Optional non-empty unique ID */
68     int64_t size;               /* Size of the bitmap (Number of sectors) */
69     bool disabled;              /* Bitmap is read-only */
70     QLIST_ENTRY(BdrvDirtyBitmap) list;
71 };
72 
73 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
74 
75 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
76 
77 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
78     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
79 
80 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
81     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
82 
83 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84     QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 
86 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
87                              const char *reference, QDict *options, int flags,
88                              BlockDriverState *parent,
89                              const BdrvChildRole *child_role, Error **errp);
90 
91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
92 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
93 
94 /* If non-zero, use only whitelisted block drivers */
95 static int use_bdrv_whitelist;
96 
97 static void bdrv_close(BlockDriverState *bs);
98 
99 #ifdef _WIN32
100 static int is_windows_drive_prefix(const char *filename)
101 {
102     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
103              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
104             filename[1] == ':');
105 }
106 
107 int is_windows_drive(const char *filename)
108 {
109     if (is_windows_drive_prefix(filename) &&
110         filename[2] == '\0')
111         return 1;
112     if (strstart(filename, "\\\\.\\", NULL) ||
113         strstart(filename, "//./", NULL))
114         return 1;
115     return 0;
116 }
117 #endif
118 
119 size_t bdrv_opt_mem_align(BlockDriverState *bs)
120 {
121     if (!bs || !bs->drv) {
122         /* page size or 4k (hdd sector size) should be on the safe side */
123         return MAX(4096, getpagesize());
124     }
125 
126     return bs->bl.opt_mem_alignment;
127 }
128 
129 size_t bdrv_min_mem_align(BlockDriverState *bs)
130 {
131     if (!bs || !bs->drv) {
132         /* page size or 4k (hdd sector size) should be on the safe side */
133         return MAX(4096, getpagesize());
134     }
135 
136     return bs->bl.min_mem_alignment;
137 }
138 
139 /* check if the path starts with "<protocol>:" */
140 int path_has_protocol(const char *path)
141 {
142     const char *p;
143 
144 #ifdef _WIN32
145     if (is_windows_drive(path) ||
146         is_windows_drive_prefix(path)) {
147         return 0;
148     }
149     p = path + strcspn(path, ":/\\");
150 #else
151     p = path + strcspn(path, ":/");
152 #endif
153 
154     return *p == ':';
155 }
156 
157 int path_is_absolute(const char *path)
158 {
159 #ifdef _WIN32
160     /* specific case for names like: "\\.\d:" */
161     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
162         return 1;
163     }
164     return (*path == '/' || *path == '\\');
165 #else
166     return (*path == '/');
167 #endif
168 }
169 
170 /* if filename is absolute, just copy it to dest. Otherwise, build a
171    path to it by considering it is relative to base_path. URL are
172    supported. */
173 void path_combine(char *dest, int dest_size,
174                   const char *base_path,
175                   const char *filename)
176 {
177     const char *p, *p1;
178     int len;
179 
180     if (dest_size <= 0)
181         return;
182     if (path_is_absolute(filename)) {
183         pstrcpy(dest, dest_size, filename);
184     } else {
185         p = strchr(base_path, ':');
186         if (p)
187             p++;
188         else
189             p = base_path;
190         p1 = strrchr(base_path, '/');
191 #ifdef _WIN32
192         {
193             const char *p2;
194             p2 = strrchr(base_path, '\\');
195             if (!p1 || p2 > p1)
196                 p1 = p2;
197         }
198 #endif
199         if (p1)
200             p1++;
201         else
202             p1 = base_path;
203         if (p1 > p)
204             p = p1;
205         len = p - base_path;
206         if (len > dest_size - 1)
207             len = dest_size - 1;
208         memcpy(dest, base_path, len);
209         dest[len] = '\0';
210         pstrcat(dest, dest_size, filename);
211     }
212 }
213 
214 void bdrv_get_full_backing_filename_from_filename(const char *backed,
215                                                   const char *backing,
216                                                   char *dest, size_t sz,
217                                                   Error **errp)
218 {
219     if (backing[0] == '\0' || path_has_protocol(backing) ||
220         path_is_absolute(backing))
221     {
222         pstrcpy(dest, sz, backing);
223     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
224         error_setg(errp, "Cannot use relative backing file names for '%s'",
225                    backed);
226     } else {
227         path_combine(dest, sz, backed, backing);
228     }
229 }
230 
231 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
232                                     Error **errp)
233 {
234     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
235 
236     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
237                                                  dest, sz, errp);
238 }
239 
240 void bdrv_register(BlockDriver *bdrv)
241 {
242     bdrv_setup_io_funcs(bdrv);
243 
244     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
245 }
246 
247 BlockDriverState *bdrv_new_root(void)
248 {
249     BlockDriverState *bs = bdrv_new();
250 
251     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
252     return bs;
253 }
254 
255 BlockDriverState *bdrv_new(void)
256 {
257     BlockDriverState *bs;
258     int i;
259 
260     bs = g_new0(BlockDriverState, 1);
261     QLIST_INIT(&bs->dirty_bitmaps);
262     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
263         QLIST_INIT(&bs->op_blockers[i]);
264     }
265     notifier_with_return_list_init(&bs->before_write_notifiers);
266     qemu_co_queue_init(&bs->throttled_reqs[0]);
267     qemu_co_queue_init(&bs->throttled_reqs[1]);
268     bs->refcnt = 1;
269     bs->aio_context = qemu_get_aio_context();
270 
271     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
272 
273     return bs;
274 }
275 
276 BlockDriver *bdrv_find_format(const char *format_name)
277 {
278     BlockDriver *drv1;
279     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280         if (!strcmp(drv1->format_name, format_name)) {
281             return drv1;
282         }
283     }
284     return NULL;
285 }
286 
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
288 {
289     static const char *whitelist_rw[] = {
290         CONFIG_BDRV_RW_WHITELIST
291     };
292     static const char *whitelist_ro[] = {
293         CONFIG_BDRV_RO_WHITELIST
294     };
295     const char **p;
296 
297     if (!whitelist_rw[0] && !whitelist_ro[0]) {
298         return 1;               /* no whitelist, anything goes */
299     }
300 
301     for (p = whitelist_rw; *p; p++) {
302         if (!strcmp(drv->format_name, *p)) {
303             return 1;
304         }
305     }
306     if (read_only) {
307         for (p = whitelist_ro; *p; p++) {
308             if (!strcmp(drv->format_name, *p)) {
309                 return 1;
310             }
311         }
312     }
313     return 0;
314 }
315 
316 typedef struct CreateCo {
317     BlockDriver *drv;
318     char *filename;
319     QemuOpts *opts;
320     int ret;
321     Error *err;
322 } CreateCo;
323 
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
325 {
326     Error *local_err = NULL;
327     int ret;
328 
329     CreateCo *cco = opaque;
330     assert(cco->drv);
331 
332     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333     if (local_err) {
334         error_propagate(&cco->err, local_err);
335     }
336     cco->ret = ret;
337 }
338 
339 int bdrv_create(BlockDriver *drv, const char* filename,
340                 QemuOpts *opts, Error **errp)
341 {
342     int ret;
343 
344     Coroutine *co;
345     CreateCo cco = {
346         .drv = drv,
347         .filename = g_strdup(filename),
348         .opts = opts,
349         .ret = NOT_DONE,
350         .err = NULL,
351     };
352 
353     if (!drv->bdrv_create) {
354         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355         ret = -ENOTSUP;
356         goto out;
357     }
358 
359     if (qemu_in_coroutine()) {
360         /* Fast-path if already in coroutine context */
361         bdrv_create_co_entry(&cco);
362     } else {
363         co = qemu_coroutine_create(bdrv_create_co_entry);
364         qemu_coroutine_enter(co, &cco);
365         while (cco.ret == NOT_DONE) {
366             aio_poll(qemu_get_aio_context(), true);
367         }
368     }
369 
370     ret = cco.ret;
371     if (ret < 0) {
372         if (cco.err) {
373             error_propagate(errp, cco.err);
374         } else {
375             error_setg_errno(errp, -ret, "Could not create image");
376         }
377     }
378 
379 out:
380     g_free(cco.filename);
381     return ret;
382 }
383 
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
385 {
386     BlockDriver *drv;
387     Error *local_err = NULL;
388     int ret;
389 
390     drv = bdrv_find_protocol(filename, true, errp);
391     if (drv == NULL) {
392         return -ENOENT;
393     }
394 
395     ret = bdrv_create(drv, filename, opts, &local_err);
396     if (local_err) {
397         error_propagate(errp, local_err);
398     }
399     return ret;
400 }
401 
402 /**
403  * Try to get @bs's logical and physical block size.
404  * On success, store them in @bsz struct and return 0.
405  * On failure return -errno.
406  * @bs must not be empty.
407  */
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
409 {
410     BlockDriver *drv = bs->drv;
411 
412     if (drv && drv->bdrv_probe_blocksizes) {
413         return drv->bdrv_probe_blocksizes(bs, bsz);
414     }
415 
416     return -ENOTSUP;
417 }
418 
419 /**
420  * Try to get @bs's geometry (cyls, heads, sectors).
421  * On success, store them in @geo struct and return 0.
422  * On failure return -errno.
423  * @bs must not be empty.
424  */
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
426 {
427     BlockDriver *drv = bs->drv;
428 
429     if (drv && drv->bdrv_probe_geometry) {
430         return drv->bdrv_probe_geometry(bs, geo);
431     }
432 
433     return -ENOTSUP;
434 }
435 
436 /*
437  * Create a uniquely-named empty temporary file.
438  * Return 0 upon success, otherwise a negative errno value.
439  */
440 int get_tmp_filename(char *filename, int size)
441 {
442 #ifdef _WIN32
443     char temp_dir[MAX_PATH];
444     /* GetTempFileName requires that its output buffer (4th param)
445        have length MAX_PATH or greater.  */
446     assert(size >= MAX_PATH);
447     return (GetTempPath(MAX_PATH, temp_dir)
448             && GetTempFileName(temp_dir, "qem", 0, filename)
449             ? 0 : -GetLastError());
450 #else
451     int fd;
452     const char *tmpdir;
453     tmpdir = getenv("TMPDIR");
454     if (!tmpdir) {
455         tmpdir = "/var/tmp";
456     }
457     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458         return -EOVERFLOW;
459     }
460     fd = mkstemp(filename);
461     if (fd < 0) {
462         return -errno;
463     }
464     if (close(fd) != 0) {
465         unlink(filename);
466         return -errno;
467     }
468     return 0;
469 #endif
470 }
471 
472 /*
473  * Detect host devices. By convention, /dev/cdrom[N] is always
474  * recognized as a host CDROM.
475  */
476 static BlockDriver *find_hdev_driver(const char *filename)
477 {
478     int score_max = 0, score;
479     BlockDriver *drv = NULL, *d;
480 
481     QLIST_FOREACH(d, &bdrv_drivers, list) {
482         if (d->bdrv_probe_device) {
483             score = d->bdrv_probe_device(filename);
484             if (score > score_max) {
485                 score_max = score;
486                 drv = d;
487             }
488         }
489     }
490 
491     return drv;
492 }
493 
494 BlockDriver *bdrv_find_protocol(const char *filename,
495                                 bool allow_protocol_prefix,
496                                 Error **errp)
497 {
498     BlockDriver *drv1;
499     char protocol[128];
500     int len;
501     const char *p;
502 
503     /* TODO Drivers without bdrv_file_open must be specified explicitly */
504 
505     /*
506      * XXX(hch): we really should not let host device detection
507      * override an explicit protocol specification, but moving this
508      * later breaks access to device names with colons in them.
509      * Thanks to the brain-dead persistent naming schemes on udev-
510      * based Linux systems those actually are quite common.
511      */
512     drv1 = find_hdev_driver(filename);
513     if (drv1) {
514         return drv1;
515     }
516 
517     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518         return &bdrv_file;
519     }
520 
521     p = strchr(filename, ':');
522     assert(p != NULL);
523     len = p - filename;
524     if (len > sizeof(protocol) - 1)
525         len = sizeof(protocol) - 1;
526     memcpy(protocol, filename, len);
527     protocol[len] = '\0';
528     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529         if (drv1->protocol_name &&
530             !strcmp(drv1->protocol_name, protocol)) {
531             return drv1;
532         }
533     }
534 
535     error_setg(errp, "Unknown protocol '%s'", protocol);
536     return NULL;
537 }
538 
539 /*
540  * Guess image format by probing its contents.
541  * This is not a good idea when your image is raw (CVE-2008-2004), but
542  * we do it anyway for backward compatibility.
543  *
544  * @buf         contains the image's first @buf_size bytes.
545  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546  *              but can be smaller if the image file is smaller)
547  * @filename    is its filename.
548  *
549  * For all block drivers, call the bdrv_probe() method to get its
550  * probing score.
551  * Return the first block driver with the highest probing score.
552  */
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554                             const char *filename)
555 {
556     int score_max = 0, score;
557     BlockDriver *drv = NULL, *d;
558 
559     QLIST_FOREACH(d, &bdrv_drivers, list) {
560         if (d->bdrv_probe) {
561             score = d->bdrv_probe(buf, buf_size, filename);
562             if (score > score_max) {
563                 score_max = score;
564                 drv = d;
565             }
566         }
567     }
568 
569     return drv;
570 }
571 
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573                              BlockDriver **pdrv, Error **errp)
574 {
575     BlockDriver *drv;
576     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577     int ret = 0;
578 
579     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581         *pdrv = &bdrv_raw;
582         return ret;
583     }
584 
585     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586     if (ret < 0) {
587         error_setg_errno(errp, -ret, "Could not read image for determining its "
588                          "format");
589         *pdrv = NULL;
590         return ret;
591     }
592 
593     drv = bdrv_probe_all(buf, ret, filename);
594     if (!drv) {
595         error_setg(errp, "Could not determine image format: No compatible "
596                    "driver found");
597         ret = -ENOENT;
598     }
599     *pdrv = drv;
600     return ret;
601 }
602 
603 /**
604  * Set the current 'total_sectors' value
605  * Return 0 on success, -errno on error.
606  */
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
608 {
609     BlockDriver *drv = bs->drv;
610 
611     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612     if (bdrv_is_sg(bs))
613         return 0;
614 
615     /* query actual device if possible, otherwise just trust the hint */
616     if (drv->bdrv_getlength) {
617         int64_t length = drv->bdrv_getlength(bs);
618         if (length < 0) {
619             return length;
620         }
621         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
622     }
623 
624     bs->total_sectors = hint;
625     return 0;
626 }
627 
628 /**
629  * Combines a QDict of new block driver @options with any missing options taken
630  * from @old_options, so that leaving out an option defaults to its old value.
631  */
632 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
633                               QDict *old_options)
634 {
635     if (bs->drv && bs->drv->bdrv_join_options) {
636         bs->drv->bdrv_join_options(options, old_options);
637     } else {
638         qdict_join(options, old_options, false);
639     }
640 }
641 
642 /**
643  * Set open flags for a given discard mode
644  *
645  * Return 0 on success, -1 if the discard mode was invalid.
646  */
647 int bdrv_parse_discard_flags(const char *mode, int *flags)
648 {
649     *flags &= ~BDRV_O_UNMAP;
650 
651     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
652         /* do nothing */
653     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
654         *flags |= BDRV_O_UNMAP;
655     } else {
656         return -1;
657     }
658 
659     return 0;
660 }
661 
662 /**
663  * Set open flags for a given cache mode
664  *
665  * Return 0 on success, -1 if the cache mode was invalid.
666  */
667 int bdrv_parse_cache_flags(const char *mode, int *flags)
668 {
669     *flags &= ~BDRV_O_CACHE_MASK;
670 
671     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
672         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
673     } else if (!strcmp(mode, "directsync")) {
674         *flags |= BDRV_O_NOCACHE;
675     } else if (!strcmp(mode, "writeback")) {
676         *flags |= BDRV_O_CACHE_WB;
677     } else if (!strcmp(mode, "unsafe")) {
678         *flags |= BDRV_O_CACHE_WB;
679         *flags |= BDRV_O_NO_FLUSH;
680     } else if (!strcmp(mode, "writethrough")) {
681         /* this is the default */
682     } else {
683         return -1;
684     }
685 
686     return 0;
687 }
688 
689 /*
690  * Returns the flags that a temporary snapshot should get, based on the
691  * originally requested flags (the originally requested image will have flags
692  * like a backing file)
693  */
694 static int bdrv_temp_snapshot_flags(int flags)
695 {
696     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
697 }
698 
699 /*
700  * Returns the options and flags that bs->file should get if a protocol driver
701  * is expected, based on the given options and flags for the parent BDS
702  */
703 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
704                                    int parent_flags, QDict *parent_options)
705 {
706     int flags = parent_flags;
707 
708     /* Enable protocol handling, disable format probing for bs->file */
709     flags |= BDRV_O_PROTOCOL;
710 
711     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
712      * the parent. */
713     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
714     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
715 
716     /* Our block drivers take care to send flushes and respect unmap policy,
717      * so we can default to enable both on lower layers regardless of the
718      * corresponding parent options. */
719     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
720     flags |= BDRV_O_UNMAP;
721 
722     /* Clear flags that only apply to the top layer */
723     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
724 
725     *child_flags = flags;
726 }
727 
728 const BdrvChildRole child_file = {
729     .inherit_options = bdrv_inherited_options,
730 };
731 
732 /*
733  * Returns the options and flags that bs->file should get if the use of formats
734  * (and not only protocols) is permitted for it, based on the given options and
735  * flags for the parent BDS
736  */
737 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
738                                        int parent_flags, QDict *parent_options)
739 {
740     child_file.inherit_options(child_flags, child_options,
741                                parent_flags, parent_options);
742 
743     *child_flags &= ~BDRV_O_PROTOCOL;
744 }
745 
746 const BdrvChildRole child_format = {
747     .inherit_options = bdrv_inherited_fmt_options,
748 };
749 
750 /*
751  * Returns the options and flags that bs->backing should get, based on the
752  * given options and flags for the parent BDS
753  */
754 static void bdrv_backing_options(int *child_flags, QDict *child_options,
755                                  int parent_flags, QDict *parent_options)
756 {
757     int flags = parent_flags;
758 
759     /* The cache mode is inherited unmodified for backing files */
760     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
761     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
762     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
763 
764     /* backing files always opened read-only */
765     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
766 
767     /* snapshot=on is handled on the top layer */
768     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
769 
770     *child_flags = flags;
771 }
772 
773 static const BdrvChildRole child_backing = {
774     .inherit_options = bdrv_backing_options,
775 };
776 
777 static int bdrv_open_flags(BlockDriverState *bs, int flags)
778 {
779     int open_flags = flags | BDRV_O_CACHE_WB;
780 
781     /*
782      * Clear flags that are internal to the block layer before opening the
783      * image.
784      */
785     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
786 
787     /*
788      * Snapshots should be writable.
789      */
790     if (flags & BDRV_O_TEMPORARY) {
791         open_flags |= BDRV_O_RDWR;
792     }
793 
794     return open_flags;
795 }
796 
797 static void update_flags_from_options(int *flags, QemuOpts *opts)
798 {
799     *flags &= ~BDRV_O_CACHE_MASK;
800 
801     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
802     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
803         *flags |= BDRV_O_CACHE_WB;
804     }
805 
806     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
807     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
808         *flags |= BDRV_O_NO_FLUSH;
809     }
810 
811     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
812     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
813         *flags |= BDRV_O_NOCACHE;
814     }
815 }
816 
817 static void update_options_from_flags(QDict *options, int flags)
818 {
819     if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
820         qdict_put(options, BDRV_OPT_CACHE_WB,
821                   qbool_from_bool(flags & BDRV_O_CACHE_WB));
822     }
823     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
824         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
825                   qbool_from_bool(flags & BDRV_O_NOCACHE));
826     }
827     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
828         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
829                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
830     }
831 }
832 
833 static void bdrv_assign_node_name(BlockDriverState *bs,
834                                   const char *node_name,
835                                   Error **errp)
836 {
837     char *gen_node_name = NULL;
838 
839     if (!node_name) {
840         node_name = gen_node_name = id_generate(ID_BLOCK);
841     } else if (!id_wellformed(node_name)) {
842         /*
843          * Check for empty string or invalid characters, but not if it is
844          * generated (generated names use characters not available to the user)
845          */
846         error_setg(errp, "Invalid node name");
847         return;
848     }
849 
850     /* takes care of avoiding namespaces collisions */
851     if (blk_by_name(node_name)) {
852         error_setg(errp, "node-name=%s is conflicting with a device id",
853                    node_name);
854         goto out;
855     }
856 
857     /* takes care of avoiding duplicates node names */
858     if (bdrv_find_node(node_name)) {
859         error_setg(errp, "Duplicate node name");
860         goto out;
861     }
862 
863     /* copy node name into the bs and insert it into the graph list */
864     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
865     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
866 out:
867     g_free(gen_node_name);
868 }
869 
870 static QemuOptsList bdrv_runtime_opts = {
871     .name = "bdrv_common",
872     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
873     .desc = {
874         {
875             .name = "node-name",
876             .type = QEMU_OPT_STRING,
877             .help = "Node name of the block device node",
878         },
879         {
880             .name = "driver",
881             .type = QEMU_OPT_STRING,
882             .help = "Block driver to use for the node",
883         },
884         {
885             .name = BDRV_OPT_CACHE_WB,
886             .type = QEMU_OPT_BOOL,
887             .help = "Enable writeback mode",
888         },
889         {
890             .name = BDRV_OPT_CACHE_DIRECT,
891             .type = QEMU_OPT_BOOL,
892             .help = "Bypass software writeback cache on the host",
893         },
894         {
895             .name = BDRV_OPT_CACHE_NO_FLUSH,
896             .type = QEMU_OPT_BOOL,
897             .help = "Ignore flush requests",
898         },
899         { /* end of list */ }
900     },
901 };
902 
903 /*
904  * Common part for opening disk images and files
905  *
906  * Removes all processed options from *options.
907  */
908 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
909                             QDict *options, Error **errp)
910 {
911     int ret, open_flags;
912     const char *filename;
913     const char *driver_name = NULL;
914     const char *node_name = NULL;
915     QemuOpts *opts;
916     BlockDriver *drv;
917     Error *local_err = NULL;
918 
919     assert(bs->file == NULL);
920     assert(options != NULL && bs->options != options);
921 
922     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
923     qemu_opts_absorb_qdict(opts, options, &local_err);
924     if (local_err) {
925         error_propagate(errp, local_err);
926         ret = -EINVAL;
927         goto fail_opts;
928     }
929 
930     driver_name = qemu_opt_get(opts, "driver");
931     drv = bdrv_find_format(driver_name);
932     assert(drv != NULL);
933 
934     if (file != NULL) {
935         filename = file->bs->filename;
936     } else {
937         filename = qdict_get_try_str(options, "filename");
938     }
939 
940     if (drv->bdrv_needs_filename && !filename) {
941         error_setg(errp, "The '%s' block driver requires a file name",
942                    drv->format_name);
943         ret = -EINVAL;
944         goto fail_opts;
945     }
946 
947     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
948                            drv->format_name);
949 
950     node_name = qemu_opt_get(opts, "node-name");
951     bdrv_assign_node_name(bs, node_name, &local_err);
952     if (local_err) {
953         error_propagate(errp, local_err);
954         ret = -EINVAL;
955         goto fail_opts;
956     }
957 
958     bs->request_alignment = 512;
959     bs->zero_beyond_eof = true;
960     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
961 
962     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
963         error_setg(errp,
964                    !bs->read_only && bdrv_is_whitelisted(drv, true)
965                         ? "Driver '%s' can only be used for read-only devices"
966                         : "Driver '%s' is not whitelisted",
967                    drv->format_name);
968         ret = -ENOTSUP;
969         goto fail_opts;
970     }
971 
972     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
973     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
974         if (!bs->read_only) {
975             bdrv_enable_copy_on_read(bs);
976         } else {
977             error_setg(errp, "Can't use copy-on-read on read-only device");
978             ret = -EINVAL;
979             goto fail_opts;
980         }
981     }
982 
983     if (filename != NULL) {
984         pstrcpy(bs->filename, sizeof(bs->filename), filename);
985     } else {
986         bs->filename[0] = '\0';
987     }
988     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
989 
990     bs->drv = drv;
991     bs->opaque = g_malloc0(drv->instance_size);
992 
993     /* Apply cache mode options */
994     update_flags_from_options(&bs->open_flags, opts);
995     bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
996 
997     /* Open the image, either directly or using a protocol */
998     open_flags = bdrv_open_flags(bs, bs->open_flags);
999     if (drv->bdrv_file_open) {
1000         assert(file == NULL);
1001         assert(!drv->bdrv_needs_filename || filename != NULL);
1002         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1003     } else {
1004         if (file == NULL) {
1005             error_setg(errp, "Can't use '%s' as a block driver for the "
1006                        "protocol level", drv->format_name);
1007             ret = -EINVAL;
1008             goto free_and_fail;
1009         }
1010         bs->file = file;
1011         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1012     }
1013 
1014     if (ret < 0) {
1015         if (local_err) {
1016             error_propagate(errp, local_err);
1017         } else if (bs->filename[0]) {
1018             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1019         } else {
1020             error_setg_errno(errp, -ret, "Could not open image");
1021         }
1022         goto free_and_fail;
1023     }
1024 
1025     if (bs->encrypted) {
1026         error_report("Encrypted images are deprecated");
1027         error_printf("Support for them will be removed in a future release.\n"
1028                      "You can use 'qemu-img convert' to convert your image"
1029                      " to an unencrypted one.\n");
1030     }
1031 
1032     ret = refresh_total_sectors(bs, bs->total_sectors);
1033     if (ret < 0) {
1034         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1035         goto free_and_fail;
1036     }
1037 
1038     bdrv_refresh_limits(bs, &local_err);
1039     if (local_err) {
1040         error_propagate(errp, local_err);
1041         ret = -EINVAL;
1042         goto free_and_fail;
1043     }
1044 
1045     assert(bdrv_opt_mem_align(bs) != 0);
1046     assert(bdrv_min_mem_align(bs) != 0);
1047     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1048 
1049     qemu_opts_del(opts);
1050     return 0;
1051 
1052 free_and_fail:
1053     bs->file = NULL;
1054     g_free(bs->opaque);
1055     bs->opaque = NULL;
1056     bs->drv = NULL;
1057 fail_opts:
1058     qemu_opts_del(opts);
1059     return ret;
1060 }
1061 
1062 static QDict *parse_json_filename(const char *filename, Error **errp)
1063 {
1064     QObject *options_obj;
1065     QDict *options;
1066     int ret;
1067 
1068     ret = strstart(filename, "json:", &filename);
1069     assert(ret);
1070 
1071     options_obj = qobject_from_json(filename);
1072     if (!options_obj) {
1073         error_setg(errp, "Could not parse the JSON options");
1074         return NULL;
1075     }
1076 
1077     if (qobject_type(options_obj) != QTYPE_QDICT) {
1078         qobject_decref(options_obj);
1079         error_setg(errp, "Invalid JSON object given");
1080         return NULL;
1081     }
1082 
1083     options = qobject_to_qdict(options_obj);
1084     qdict_flatten(options);
1085 
1086     return options;
1087 }
1088 
1089 static void parse_json_protocol(QDict *options, const char **pfilename,
1090                                 Error **errp)
1091 {
1092     QDict *json_options;
1093     Error *local_err = NULL;
1094 
1095     /* Parse json: pseudo-protocol */
1096     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1097         return;
1098     }
1099 
1100     json_options = parse_json_filename(*pfilename, &local_err);
1101     if (local_err) {
1102         error_propagate(errp, local_err);
1103         return;
1104     }
1105 
1106     /* Options given in the filename have lower priority than options
1107      * specified directly */
1108     qdict_join(options, json_options, false);
1109     QDECREF(json_options);
1110     *pfilename = NULL;
1111 }
1112 
1113 /*
1114  * Fills in default options for opening images and converts the legacy
1115  * filename/flags pair to option QDict entries.
1116  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1117  * block driver has been specified explicitly.
1118  */
1119 static int bdrv_fill_options(QDict **options, const char *filename,
1120                              int *flags, Error **errp)
1121 {
1122     const char *drvname;
1123     bool protocol = *flags & BDRV_O_PROTOCOL;
1124     bool parse_filename = false;
1125     BlockDriver *drv = NULL;
1126     Error *local_err = NULL;
1127 
1128     drvname = qdict_get_try_str(*options, "driver");
1129     if (drvname) {
1130         drv = bdrv_find_format(drvname);
1131         if (!drv) {
1132             error_setg(errp, "Unknown driver '%s'", drvname);
1133             return -ENOENT;
1134         }
1135         /* If the user has explicitly specified the driver, this choice should
1136          * override the BDRV_O_PROTOCOL flag */
1137         protocol = drv->bdrv_file_open;
1138     }
1139 
1140     if (protocol) {
1141         *flags |= BDRV_O_PROTOCOL;
1142     } else {
1143         *flags &= ~BDRV_O_PROTOCOL;
1144     }
1145 
1146     /* Translate cache options from flags into options */
1147     update_options_from_flags(*options, *flags);
1148 
1149     /* Fetch the file name from the options QDict if necessary */
1150     if (protocol && filename) {
1151         if (!qdict_haskey(*options, "filename")) {
1152             qdict_put(*options, "filename", qstring_from_str(filename));
1153             parse_filename = true;
1154         } else {
1155             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1156                              "the same time");
1157             return -EINVAL;
1158         }
1159     }
1160 
1161     /* Find the right block driver */
1162     filename = qdict_get_try_str(*options, "filename");
1163 
1164     if (!drvname && protocol) {
1165         if (filename) {
1166             drv = bdrv_find_protocol(filename, parse_filename, errp);
1167             if (!drv) {
1168                 return -EINVAL;
1169             }
1170 
1171             drvname = drv->format_name;
1172             qdict_put(*options, "driver", qstring_from_str(drvname));
1173         } else {
1174             error_setg(errp, "Must specify either driver or file");
1175             return -EINVAL;
1176         }
1177     }
1178 
1179     assert(drv || !protocol);
1180 
1181     /* Driver-specific filename parsing */
1182     if (drv && drv->bdrv_parse_filename && parse_filename) {
1183         drv->bdrv_parse_filename(filename, *options, &local_err);
1184         if (local_err) {
1185             error_propagate(errp, local_err);
1186             return -EINVAL;
1187         }
1188 
1189         if (!drv->bdrv_needs_filename) {
1190             qdict_del(*options, "filename");
1191         }
1192     }
1193 
1194     return 0;
1195 }
1196 
1197 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1198                                     BlockDriverState *child_bs,
1199                                     const char *child_name,
1200                                     const BdrvChildRole *child_role)
1201 {
1202     BdrvChild *child = g_new(BdrvChild, 1);
1203     *child = (BdrvChild) {
1204         .bs     = child_bs,
1205         .name   = g_strdup(child_name),
1206         .role   = child_role,
1207     };
1208 
1209     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1210     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1211 
1212     return child;
1213 }
1214 
1215 static void bdrv_detach_child(BdrvChild *child)
1216 {
1217     QLIST_REMOVE(child, next);
1218     QLIST_REMOVE(child, next_parent);
1219     g_free(child->name);
1220     g_free(child);
1221 }
1222 
1223 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1224 {
1225     BlockDriverState *child_bs;
1226 
1227     if (child == NULL) {
1228         return;
1229     }
1230 
1231     if (child->bs->inherits_from == parent) {
1232         child->bs->inherits_from = NULL;
1233     }
1234 
1235     child_bs = child->bs;
1236     bdrv_detach_child(child);
1237     bdrv_unref(child_bs);
1238 }
1239 
1240 /*
1241  * Sets the backing file link of a BDS. A new reference is created; callers
1242  * which don't need their own reference any more must call bdrv_unref().
1243  */
1244 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1245 {
1246     if (backing_hd) {
1247         bdrv_ref(backing_hd);
1248     }
1249 
1250     if (bs->backing) {
1251         assert(bs->backing_blocker);
1252         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1253         bdrv_unref_child(bs, bs->backing);
1254     } else if (backing_hd) {
1255         error_setg(&bs->backing_blocker,
1256                    "node is used as backing hd of '%s'",
1257                    bdrv_get_device_or_node_name(bs));
1258     }
1259 
1260     if (!backing_hd) {
1261         error_free(bs->backing_blocker);
1262         bs->backing_blocker = NULL;
1263         bs->backing = NULL;
1264         goto out;
1265     }
1266     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1267     bs->open_flags &= ~BDRV_O_NO_BACKING;
1268     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1269     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1270             backing_hd->drv ? backing_hd->drv->format_name : "");
1271 
1272     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1273     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1274     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1275                     bs->backing_blocker);
1276 out:
1277     bdrv_refresh_limits(bs, NULL);
1278 }
1279 
1280 /*
1281  * Opens the backing file for a BlockDriverState if not yet open
1282  *
1283  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1284  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1285  * itself, all options starting with "${bdref_key}." are considered part of the
1286  * BlockdevRef.
1287  *
1288  * TODO Can this be unified with bdrv_open_image()?
1289  */
1290 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1291                            const char *bdref_key, Error **errp)
1292 {
1293     char *backing_filename = g_malloc0(PATH_MAX);
1294     char *bdref_key_dot;
1295     const char *reference = NULL;
1296     int ret = 0;
1297     BlockDriverState *backing_hd;
1298     QDict *options;
1299     QDict *tmp_parent_options = NULL;
1300     Error *local_err = NULL;
1301 
1302     if (bs->backing != NULL) {
1303         goto free_exit;
1304     }
1305 
1306     /* NULL means an empty set of options */
1307     if (parent_options == NULL) {
1308         tmp_parent_options = qdict_new();
1309         parent_options = tmp_parent_options;
1310     }
1311 
1312     bs->open_flags &= ~BDRV_O_NO_BACKING;
1313 
1314     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1315     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1316     g_free(bdref_key_dot);
1317 
1318     reference = qdict_get_try_str(parent_options, bdref_key);
1319     if (reference || qdict_haskey(options, "file.filename")) {
1320         backing_filename[0] = '\0';
1321     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1322         QDECREF(options);
1323         goto free_exit;
1324     } else {
1325         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1326                                        &local_err);
1327         if (local_err) {
1328             ret = -EINVAL;
1329             error_propagate(errp, local_err);
1330             QDECREF(options);
1331             goto free_exit;
1332         }
1333     }
1334 
1335     if (!bs->drv || !bs->drv->supports_backing) {
1336         ret = -EINVAL;
1337         error_setg(errp, "Driver doesn't support backing files");
1338         QDECREF(options);
1339         goto free_exit;
1340     }
1341 
1342     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1343         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1344     }
1345 
1346     backing_hd = NULL;
1347     ret = bdrv_open_inherit(&backing_hd,
1348                             *backing_filename ? backing_filename : NULL,
1349                             reference, options, 0, bs, &child_backing,
1350                             errp);
1351     if (ret < 0) {
1352         bs->open_flags |= BDRV_O_NO_BACKING;
1353         error_prepend(errp, "Could not open backing file: ");
1354         goto free_exit;
1355     }
1356 
1357     /* Hook up the backing file link; drop our reference, bs owns the
1358      * backing_hd reference now */
1359     bdrv_set_backing_hd(bs, backing_hd);
1360     bdrv_unref(backing_hd);
1361 
1362     qdict_del(parent_options, bdref_key);
1363 
1364 free_exit:
1365     g_free(backing_filename);
1366     QDECREF(tmp_parent_options);
1367     return ret;
1368 }
1369 
1370 /*
1371  * Opens a disk image whose options are given as BlockdevRef in another block
1372  * device's options.
1373  *
1374  * If allow_none is true, no image will be opened if filename is false and no
1375  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1376  *
1377  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1378  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1379  * itself, all options starting with "${bdref_key}." are considered part of the
1380  * BlockdevRef.
1381  *
1382  * The BlockdevRef will be removed from the options QDict.
1383  */
1384 BdrvChild *bdrv_open_child(const char *filename,
1385                            QDict *options, const char *bdref_key,
1386                            BlockDriverState* parent,
1387                            const BdrvChildRole *child_role,
1388                            bool allow_none, Error **errp)
1389 {
1390     BdrvChild *c = NULL;
1391     BlockDriverState *bs;
1392     QDict *image_options;
1393     int ret;
1394     char *bdref_key_dot;
1395     const char *reference;
1396 
1397     assert(child_role != NULL);
1398 
1399     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1400     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1401     g_free(bdref_key_dot);
1402 
1403     reference = qdict_get_try_str(options, bdref_key);
1404     if (!filename && !reference && !qdict_size(image_options)) {
1405         if (!allow_none) {
1406             error_setg(errp, "A block device must be specified for \"%s\"",
1407                        bdref_key);
1408         }
1409         QDECREF(image_options);
1410         goto done;
1411     }
1412 
1413     bs = NULL;
1414     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1415                             parent, child_role, errp);
1416     if (ret < 0) {
1417         goto done;
1418     }
1419 
1420     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1421 
1422 done:
1423     qdict_del(options, bdref_key);
1424     return c;
1425 }
1426 
1427 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1428 {
1429     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1430     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1431     int64_t total_size;
1432     QemuOpts *opts = NULL;
1433     QDict *snapshot_options;
1434     BlockDriverState *bs_snapshot;
1435     Error *local_err = NULL;
1436     int ret;
1437 
1438     /* if snapshot, we create a temporary backing file and open it
1439        instead of opening 'filename' directly */
1440 
1441     /* Get the required size from the image */
1442     total_size = bdrv_getlength(bs);
1443     if (total_size < 0) {
1444         ret = total_size;
1445         error_setg_errno(errp, -total_size, "Could not get image size");
1446         goto out;
1447     }
1448 
1449     /* Create the temporary image */
1450     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1451     if (ret < 0) {
1452         error_setg_errno(errp, -ret, "Could not get temporary filename");
1453         goto out;
1454     }
1455 
1456     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1457                             &error_abort);
1458     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1459     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1460     qemu_opts_del(opts);
1461     if (ret < 0) {
1462         error_prepend(errp, "Could not create temporary overlay '%s': ",
1463                       tmp_filename);
1464         goto out;
1465     }
1466 
1467     /* Prepare a new options QDict for the temporary file */
1468     snapshot_options = qdict_new();
1469     qdict_put(snapshot_options, "file.driver",
1470               qstring_from_str("file"));
1471     qdict_put(snapshot_options, "file.filename",
1472               qstring_from_str(tmp_filename));
1473     qdict_put(snapshot_options, "driver",
1474               qstring_from_str("qcow2"));
1475 
1476     bs_snapshot = bdrv_new();
1477 
1478     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1479                     flags, &local_err);
1480     if (ret < 0) {
1481         error_propagate(errp, local_err);
1482         goto out;
1483     }
1484 
1485     bdrv_append(bs_snapshot, bs);
1486 
1487 out:
1488     g_free(tmp_filename);
1489     return ret;
1490 }
1491 
1492 /*
1493  * Opens a disk image (raw, qcow2, vmdk, ...)
1494  *
1495  * options is a QDict of options to pass to the block drivers, or NULL for an
1496  * empty set of options. The reference to the QDict belongs to the block layer
1497  * after the call (even on failure), so if the caller intends to reuse the
1498  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1499  *
1500  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1501  * If it is not NULL, the referenced BDS will be reused.
1502  *
1503  * The reference parameter may be used to specify an existing block device which
1504  * should be opened. If specified, neither options nor a filename may be given,
1505  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1506  */
1507 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1508                              const char *reference, QDict *options, int flags,
1509                              BlockDriverState *parent,
1510                              const BdrvChildRole *child_role, Error **errp)
1511 {
1512     int ret;
1513     BdrvChild *file = NULL;
1514     BlockDriverState *bs;
1515     BlockDriver *drv = NULL;
1516     const char *drvname;
1517     const char *backing;
1518     Error *local_err = NULL;
1519     int snapshot_flags = 0;
1520 
1521     assert(pbs);
1522     assert(!child_role || !flags);
1523     assert(!child_role == !parent);
1524 
1525     if (reference) {
1526         bool options_non_empty = options ? qdict_size(options) : false;
1527         QDECREF(options);
1528 
1529         if (*pbs) {
1530             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1531                        "another block device");
1532             return -EINVAL;
1533         }
1534 
1535         if (filename || options_non_empty) {
1536             error_setg(errp, "Cannot reference an existing block device with "
1537                        "additional options or a new filename");
1538             return -EINVAL;
1539         }
1540 
1541         bs = bdrv_lookup_bs(reference, reference, errp);
1542         if (!bs) {
1543             return -ENODEV;
1544         }
1545         bdrv_ref(bs);
1546         *pbs = bs;
1547         return 0;
1548     }
1549 
1550     if (*pbs) {
1551         bs = *pbs;
1552     } else {
1553         bs = bdrv_new();
1554     }
1555 
1556     /* NULL means an empty set of options */
1557     if (options == NULL) {
1558         options = qdict_new();
1559     }
1560 
1561     /* json: syntax counts as explicit options, as if in the QDict */
1562     parse_json_protocol(options, &filename, &local_err);
1563     if (local_err) {
1564         ret = -EINVAL;
1565         goto fail;
1566     }
1567 
1568     bs->explicit_options = qdict_clone_shallow(options);
1569 
1570     if (child_role) {
1571         bs->inherits_from = parent;
1572         child_role->inherit_options(&flags, options,
1573                                     parent->open_flags, parent->options);
1574     }
1575 
1576     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1577     if (local_err) {
1578         goto fail;
1579     }
1580 
1581     bs->open_flags = flags;
1582     bs->options = options;
1583     options = qdict_clone_shallow(options);
1584 
1585     /* Find the right image format driver */
1586     drvname = qdict_get_try_str(options, "driver");
1587     if (drvname) {
1588         drv = bdrv_find_format(drvname);
1589         if (!drv) {
1590             error_setg(errp, "Unknown driver: '%s'", drvname);
1591             ret = -EINVAL;
1592             goto fail;
1593         }
1594     }
1595 
1596     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1597 
1598     backing = qdict_get_try_str(options, "backing");
1599     if (backing && *backing == '\0') {
1600         flags |= BDRV_O_NO_BACKING;
1601         qdict_del(options, "backing");
1602     }
1603 
1604     /* Open image file without format layer */
1605     if ((flags & BDRV_O_PROTOCOL) == 0) {
1606         if (flags & BDRV_O_RDWR) {
1607             flags |= BDRV_O_ALLOW_RDWR;
1608         }
1609         if (flags & BDRV_O_SNAPSHOT) {
1610             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1611             bdrv_backing_options(&flags, options, flags, options);
1612         }
1613 
1614         bs->open_flags = flags;
1615 
1616         file = bdrv_open_child(filename, options, "file", bs,
1617                                &child_file, true, &local_err);
1618         if (local_err) {
1619             ret = -EINVAL;
1620             goto fail;
1621         }
1622     }
1623 
1624     /* Image format probing */
1625     bs->probed = !drv;
1626     if (!drv && file) {
1627         ret = find_image_format(file->bs, filename, &drv, &local_err);
1628         if (ret < 0) {
1629             goto fail;
1630         }
1631         /*
1632          * This option update would logically belong in bdrv_fill_options(),
1633          * but we first need to open bs->file for the probing to work, while
1634          * opening bs->file already requires the (mostly) final set of options
1635          * so that cache mode etc. can be inherited.
1636          *
1637          * Adding the driver later is somewhat ugly, but it's not an option
1638          * that would ever be inherited, so it's correct. We just need to make
1639          * sure to update both bs->options (which has the full effective
1640          * options for bs) and options (which has file.* already removed).
1641          */
1642         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1643         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1644     } else if (!drv) {
1645         error_setg(errp, "Must specify either driver or file");
1646         ret = -EINVAL;
1647         goto fail;
1648     }
1649 
1650     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1651     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1652     /* file must be NULL if a protocol BDS is about to be created
1653      * (the inverse results in an error message from bdrv_open_common()) */
1654     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1655 
1656     /* Open the image */
1657     ret = bdrv_open_common(bs, file, options, &local_err);
1658     if (ret < 0) {
1659         goto fail;
1660     }
1661 
1662     if (file && (bs->file != file)) {
1663         bdrv_unref_child(bs, file);
1664         file = NULL;
1665     }
1666 
1667     /* If there is a backing file, use it */
1668     if ((flags & BDRV_O_NO_BACKING) == 0) {
1669         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1670         if (ret < 0) {
1671             goto close_and_fail;
1672         }
1673     }
1674 
1675     bdrv_refresh_filename(bs);
1676 
1677     /* Check if any unknown options were used */
1678     if (options && (qdict_size(options) != 0)) {
1679         const QDictEntry *entry = qdict_first(options);
1680         if (flags & BDRV_O_PROTOCOL) {
1681             error_setg(errp, "Block protocol '%s' doesn't support the option "
1682                        "'%s'", drv->format_name, entry->key);
1683         } else {
1684             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1685                        "support the option '%s'", drv->format_name,
1686                        bdrv_get_device_name(bs), entry->key);
1687         }
1688 
1689         ret = -EINVAL;
1690         goto close_and_fail;
1691     }
1692 
1693     if (!bdrv_key_required(bs)) {
1694         if (bs->blk) {
1695             blk_dev_change_media_cb(bs->blk, true);
1696         }
1697     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1698                && !runstate_check(RUN_STATE_INMIGRATE)
1699                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1700         error_setg(errp,
1701                    "Guest must be stopped for opening of encrypted image");
1702         ret = -EBUSY;
1703         goto close_and_fail;
1704     }
1705 
1706     QDECREF(options);
1707     *pbs = bs;
1708 
1709     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1710      * temporary snapshot afterwards. */
1711     if (snapshot_flags) {
1712         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1713         if (local_err) {
1714             goto close_and_fail;
1715         }
1716     }
1717 
1718     return 0;
1719 
1720 fail:
1721     if (file != NULL) {
1722         bdrv_unref_child(bs, file);
1723     }
1724     QDECREF(bs->explicit_options);
1725     QDECREF(bs->options);
1726     QDECREF(options);
1727     bs->options = NULL;
1728     if (!*pbs) {
1729         /* If *pbs is NULL, a new BDS has been created in this function and
1730            needs to be freed now. Otherwise, it does not need to be closed,
1731            since it has not really been opened yet. */
1732         bdrv_unref(bs);
1733     }
1734     if (local_err) {
1735         error_propagate(errp, local_err);
1736     }
1737     return ret;
1738 
1739 close_and_fail:
1740     /* See fail path, but now the BDS has to be always closed */
1741     if (*pbs) {
1742         bdrv_close(bs);
1743     } else {
1744         bdrv_unref(bs);
1745     }
1746     QDECREF(options);
1747     if (local_err) {
1748         error_propagate(errp, local_err);
1749     }
1750     return ret;
1751 }
1752 
1753 int bdrv_open(BlockDriverState **pbs, const char *filename,
1754               const char *reference, QDict *options, int flags, Error **errp)
1755 {
1756     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1757                              NULL, errp);
1758 }
1759 
1760 typedef struct BlockReopenQueueEntry {
1761      bool prepared;
1762      BDRVReopenState state;
1763      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1764 } BlockReopenQueueEntry;
1765 
1766 /*
1767  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1768  * reopen of multiple devices.
1769  *
1770  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1771  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1772  * be created and initialized. This newly created BlockReopenQueue should be
1773  * passed back in for subsequent calls that are intended to be of the same
1774  * atomic 'set'.
1775  *
1776  * bs is the BlockDriverState to add to the reopen queue.
1777  *
1778  * options contains the changed options for the associated bs
1779  * (the BlockReopenQueue takes ownership)
1780  *
1781  * flags contains the open flags for the associated bs
1782  *
1783  * returns a pointer to bs_queue, which is either the newly allocated
1784  * bs_queue, or the existing bs_queue being used.
1785  *
1786  */
1787 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1788                                                  BlockDriverState *bs,
1789                                                  QDict *options,
1790                                                  int flags,
1791                                                  const BdrvChildRole *role,
1792                                                  QDict *parent_options,
1793                                                  int parent_flags)
1794 {
1795     assert(bs != NULL);
1796 
1797     BlockReopenQueueEntry *bs_entry;
1798     BdrvChild *child;
1799     QDict *old_options, *explicit_options;
1800 
1801     if (bs_queue == NULL) {
1802         bs_queue = g_new0(BlockReopenQueue, 1);
1803         QSIMPLEQ_INIT(bs_queue);
1804     }
1805 
1806     if (!options) {
1807         options = qdict_new();
1808     }
1809 
1810     /*
1811      * Precedence of options:
1812      * 1. Explicitly passed in options (highest)
1813      * 2. Set in flags (only for top level)
1814      * 3. Retained from explicitly set options of bs
1815      * 4. Inherited from parent node
1816      * 5. Retained from effective options of bs
1817      */
1818 
1819     if (!parent_options) {
1820         /*
1821          * Any setting represented by flags is always updated. If the
1822          * corresponding QDict option is set, it takes precedence. Otherwise
1823          * the flag is translated into a QDict option. The old setting of bs is
1824          * not considered.
1825          */
1826         update_options_from_flags(options, flags);
1827     }
1828 
1829     /* Old explicitly set values (don't overwrite by inherited value) */
1830     old_options = qdict_clone_shallow(bs->explicit_options);
1831     bdrv_join_options(bs, options, old_options);
1832     QDECREF(old_options);
1833 
1834     explicit_options = qdict_clone_shallow(options);
1835 
1836     /* Inherit from parent node */
1837     if (parent_options) {
1838         assert(!flags);
1839         role->inherit_options(&flags, options, parent_flags, parent_options);
1840     }
1841 
1842     /* Old values are used for options that aren't set yet */
1843     old_options = qdict_clone_shallow(bs->options);
1844     bdrv_join_options(bs, options, old_options);
1845     QDECREF(old_options);
1846 
1847     /* bdrv_open() masks this flag out */
1848     flags &= ~BDRV_O_PROTOCOL;
1849 
1850     QLIST_FOREACH(child, &bs->children, next) {
1851         QDict *new_child_options;
1852         char *child_key_dot;
1853 
1854         /* reopen can only change the options of block devices that were
1855          * implicitly created and inherited options. For other (referenced)
1856          * block devices, a syntax like "backing.foo" results in an error. */
1857         if (child->bs->inherits_from != bs) {
1858             continue;
1859         }
1860 
1861         child_key_dot = g_strdup_printf("%s.", child->name);
1862         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1863         g_free(child_key_dot);
1864 
1865         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1866                                 child->role, options, flags);
1867     }
1868 
1869     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1870     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1871 
1872     bs_entry->state.bs = bs;
1873     bs_entry->state.options = options;
1874     bs_entry->state.explicit_options = explicit_options;
1875     bs_entry->state.flags = flags;
1876 
1877     return bs_queue;
1878 }
1879 
1880 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1881                                     BlockDriverState *bs,
1882                                     QDict *options, int flags)
1883 {
1884     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1885                                    NULL, NULL, 0);
1886 }
1887 
1888 /*
1889  * Reopen multiple BlockDriverStates atomically & transactionally.
1890  *
1891  * The queue passed in (bs_queue) must have been built up previous
1892  * via bdrv_reopen_queue().
1893  *
1894  * Reopens all BDS specified in the queue, with the appropriate
1895  * flags.  All devices are prepared for reopen, and failure of any
1896  * device will cause all device changes to be abandonded, and intermediate
1897  * data cleaned up.
1898  *
1899  * If all devices prepare successfully, then the changes are committed
1900  * to all devices.
1901  *
1902  */
1903 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1904 {
1905     int ret = -1;
1906     BlockReopenQueueEntry *bs_entry, *next;
1907     Error *local_err = NULL;
1908 
1909     assert(bs_queue != NULL);
1910 
1911     bdrv_drain_all();
1912 
1913     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1914         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1915             error_propagate(errp, local_err);
1916             goto cleanup;
1917         }
1918         bs_entry->prepared = true;
1919     }
1920 
1921     /* If we reach this point, we have success and just need to apply the
1922      * changes
1923      */
1924     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1925         bdrv_reopen_commit(&bs_entry->state);
1926     }
1927 
1928     ret = 0;
1929 
1930 cleanup:
1931     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1932         if (ret && bs_entry->prepared) {
1933             bdrv_reopen_abort(&bs_entry->state);
1934         } else if (ret) {
1935             QDECREF(bs_entry->state.explicit_options);
1936         }
1937         QDECREF(bs_entry->state.options);
1938         g_free(bs_entry);
1939     }
1940     g_free(bs_queue);
1941     return ret;
1942 }
1943 
1944 
1945 /* Reopen a single BlockDriverState with the specified flags. */
1946 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1947 {
1948     int ret = -1;
1949     Error *local_err = NULL;
1950     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1951 
1952     ret = bdrv_reopen_multiple(queue, &local_err);
1953     if (local_err != NULL) {
1954         error_propagate(errp, local_err);
1955     }
1956     return ret;
1957 }
1958 
1959 
1960 /*
1961  * Prepares a BlockDriverState for reopen. All changes are staged in the
1962  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1963  * the block driver layer .bdrv_reopen_prepare()
1964  *
1965  * bs is the BlockDriverState to reopen
1966  * flags are the new open flags
1967  * queue is the reopen queue
1968  *
1969  * Returns 0 on success, non-zero on error.  On error errp will be set
1970  * as well.
1971  *
1972  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1973  * It is the responsibility of the caller to then call the abort() or
1974  * commit() for any other BDS that have been left in a prepare() state
1975  *
1976  */
1977 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1978                         Error **errp)
1979 {
1980     int ret = -1;
1981     Error *local_err = NULL;
1982     BlockDriver *drv;
1983     QemuOpts *opts;
1984     const char *value;
1985 
1986     assert(reopen_state != NULL);
1987     assert(reopen_state->bs->drv != NULL);
1988     drv = reopen_state->bs->drv;
1989 
1990     /* Process generic block layer options */
1991     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1992     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1993     if (local_err) {
1994         error_propagate(errp, local_err);
1995         ret = -EINVAL;
1996         goto error;
1997     }
1998 
1999     update_flags_from_options(&reopen_state->flags, opts);
2000 
2001     /* If a guest device is attached, it owns WCE */
2002     if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2003         bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2004         bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2005         if (old_wce != new_wce) {
2006             error_setg(errp, "Cannot change cache.writeback: Device attached");
2007             ret = -EINVAL;
2008             goto error;
2009         }
2010     }
2011 
2012     /* node-name and driver must be unchanged. Put them back into the QDict, so
2013      * that they are checked at the end of this function. */
2014     value = qemu_opt_get(opts, "node-name");
2015     if (value) {
2016         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2017     }
2018 
2019     value = qemu_opt_get(opts, "driver");
2020     if (value) {
2021         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2022     }
2023 
2024     /* if we are to stay read-only, do not allow permission change
2025      * to r/w */
2026     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2027         reopen_state->flags & BDRV_O_RDWR) {
2028         error_setg(errp, "Node '%s' is read only",
2029                    bdrv_get_device_or_node_name(reopen_state->bs));
2030         goto error;
2031     }
2032 
2033 
2034     ret = bdrv_flush(reopen_state->bs);
2035     if (ret) {
2036         error_setg_errno(errp, -ret, "Error flushing drive");
2037         goto error;
2038     }
2039 
2040     if (drv->bdrv_reopen_prepare) {
2041         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2042         if (ret) {
2043             if (local_err != NULL) {
2044                 error_propagate(errp, local_err);
2045             } else {
2046                 error_setg(errp, "failed while preparing to reopen image '%s'",
2047                            reopen_state->bs->filename);
2048             }
2049             goto error;
2050         }
2051     } else {
2052         /* It is currently mandatory to have a bdrv_reopen_prepare()
2053          * handler for each supported drv. */
2054         error_setg(errp, "Block format '%s' used by node '%s' "
2055                    "does not support reopening files", drv->format_name,
2056                    bdrv_get_device_or_node_name(reopen_state->bs));
2057         ret = -1;
2058         goto error;
2059     }
2060 
2061     /* Options that are not handled are only okay if they are unchanged
2062      * compared to the old state. It is expected that some options are only
2063      * used for the initial open, but not reopen (e.g. filename) */
2064     if (qdict_size(reopen_state->options)) {
2065         const QDictEntry *entry = qdict_first(reopen_state->options);
2066 
2067         do {
2068             QString *new_obj = qobject_to_qstring(entry->value);
2069             const char *new = qstring_get_str(new_obj);
2070             const char *old = qdict_get_try_str(reopen_state->bs->options,
2071                                                 entry->key);
2072 
2073             if (!old || strcmp(new, old)) {
2074                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2075                 ret = -EINVAL;
2076                 goto error;
2077             }
2078         } while ((entry = qdict_next(reopen_state->options, entry)));
2079     }
2080 
2081     ret = 0;
2082 
2083 error:
2084     qemu_opts_del(opts);
2085     return ret;
2086 }
2087 
2088 /*
2089  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2090  * makes them final by swapping the staging BlockDriverState contents into
2091  * the active BlockDriverState contents.
2092  */
2093 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2094 {
2095     BlockDriver *drv;
2096 
2097     assert(reopen_state != NULL);
2098     drv = reopen_state->bs->drv;
2099     assert(drv != NULL);
2100 
2101     /* If there are any driver level actions to take */
2102     if (drv->bdrv_reopen_commit) {
2103         drv->bdrv_reopen_commit(reopen_state);
2104     }
2105 
2106     /* set BDS specific flags now */
2107     QDECREF(reopen_state->bs->explicit_options);
2108 
2109     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2110     reopen_state->bs->open_flags         = reopen_state->flags;
2111     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2112                                               BDRV_O_CACHE_WB);
2113     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2114 
2115     bdrv_refresh_limits(reopen_state->bs, NULL);
2116 }
2117 
2118 /*
2119  * Abort the reopen, and delete and free the staged changes in
2120  * reopen_state
2121  */
2122 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2123 {
2124     BlockDriver *drv;
2125 
2126     assert(reopen_state != NULL);
2127     drv = reopen_state->bs->drv;
2128     assert(drv != NULL);
2129 
2130     if (drv->bdrv_reopen_abort) {
2131         drv->bdrv_reopen_abort(reopen_state);
2132     }
2133 
2134     QDECREF(reopen_state->explicit_options);
2135 }
2136 
2137 
2138 static void bdrv_close(BlockDriverState *bs)
2139 {
2140     BdrvAioNotifier *ban, *ban_next;
2141 
2142     assert(!bs->job);
2143 
2144     /* Disable I/O limits and drain all pending throttled requests */
2145     if (bs->throttle_state) {
2146         bdrv_io_limits_disable(bs);
2147     }
2148 
2149     bdrv_drained_begin(bs); /* complete I/O */
2150     bdrv_flush(bs);
2151     bdrv_drain(bs); /* in case flush left pending I/O */
2152 
2153     bdrv_release_named_dirty_bitmaps(bs);
2154     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2155 
2156     if (bs->blk) {
2157         blk_dev_change_media_cb(bs->blk, false);
2158     }
2159 
2160     if (bs->drv) {
2161         BdrvChild *child, *next;
2162 
2163         bs->drv->bdrv_close(bs);
2164         bs->drv = NULL;
2165 
2166         bdrv_set_backing_hd(bs, NULL);
2167 
2168         if (bs->file != NULL) {
2169             bdrv_unref_child(bs, bs->file);
2170             bs->file = NULL;
2171         }
2172 
2173         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2174             /* TODO Remove bdrv_unref() from drivers' close function and use
2175              * bdrv_unref_child() here */
2176             if (child->bs->inherits_from == bs) {
2177                 child->bs->inherits_from = NULL;
2178             }
2179             bdrv_detach_child(child);
2180         }
2181 
2182         g_free(bs->opaque);
2183         bs->opaque = NULL;
2184         bs->copy_on_read = 0;
2185         bs->backing_file[0] = '\0';
2186         bs->backing_format[0] = '\0';
2187         bs->total_sectors = 0;
2188         bs->encrypted = 0;
2189         bs->valid_key = 0;
2190         bs->sg = 0;
2191         bs->zero_beyond_eof = false;
2192         QDECREF(bs->options);
2193         QDECREF(bs->explicit_options);
2194         bs->options = NULL;
2195         QDECREF(bs->full_open_options);
2196         bs->full_open_options = NULL;
2197     }
2198 
2199     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2200         g_free(ban);
2201     }
2202     QLIST_INIT(&bs->aio_notifiers);
2203     bdrv_drained_end(bs);
2204 }
2205 
2206 void bdrv_close_all(void)
2207 {
2208     BlockDriverState *bs;
2209     AioContext *aio_context;
2210 
2211     /* Drop references from requests still in flight, such as canceled block
2212      * jobs whose AIO context has not been polled yet */
2213     bdrv_drain_all();
2214 
2215     blk_remove_all_bs();
2216     blockdev_close_all_bdrv_states();
2217 
2218     /* Cancel all block jobs */
2219     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2220         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2221             aio_context = bdrv_get_aio_context(bs);
2222 
2223             aio_context_acquire(aio_context);
2224             if (bs->job) {
2225                 block_job_cancel_sync(bs->job);
2226                 aio_context_release(aio_context);
2227                 break;
2228             }
2229             aio_context_release(aio_context);
2230         }
2231 
2232         /* All the remaining BlockDriverStates are referenced directly or
2233          * indirectly from block jobs, so there needs to be at least one BDS
2234          * directly used by a block job */
2235         assert(bs);
2236     }
2237 }
2238 
2239 /* Note that bs->device_list.tqe_prev is initially null,
2240  * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2241  * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2242  * resetting it to null on remove.  */
2243 void bdrv_device_remove(BlockDriverState *bs)
2244 {
2245     QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2246     bs->device_list.tqe_prev = NULL;
2247 }
2248 
2249 /* make a BlockDriverState anonymous by removing from bdrv_state and
2250  * graph_bdrv_state list.
2251    Also, NULL terminate the device_name to prevent double remove */
2252 void bdrv_make_anon(BlockDriverState *bs)
2253 {
2254     /* Take care to remove bs from bdrv_states only when it's actually
2255      * in it. */
2256     if (bs->device_list.tqe_prev) {
2257         bdrv_device_remove(bs);
2258     }
2259     if (bs->node_name[0] != '\0') {
2260         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2261     }
2262     bs->node_name[0] = '\0';
2263 }
2264 
2265 /* Fields that need to stay with the top-level BDS */
2266 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2267                                      BlockDriverState *bs_src)
2268 {
2269     /* move some fields that need to stay attached to the device */
2270 
2271     /* dev info */
2272     bs_dest->copy_on_read       = bs_src->copy_on_read;
2273 
2274     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2275 
2276     /* dirty bitmap */
2277     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2278 }
2279 
2280 static void change_parent_backing_link(BlockDriverState *from,
2281                                        BlockDriverState *to)
2282 {
2283     BdrvChild *c, *next;
2284 
2285     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2286         assert(c->role != &child_backing);
2287         c->bs = to;
2288         QLIST_REMOVE(c, next_parent);
2289         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2290         bdrv_ref(to);
2291         bdrv_unref(from);
2292     }
2293     if (from->blk) {
2294         blk_set_bs(from->blk, to);
2295         if (!to->device_list.tqe_prev) {
2296             QTAILQ_INSERT_BEFORE(from, to, device_list);
2297         }
2298         bdrv_device_remove(from);
2299     }
2300 }
2301 
2302 static void swap_feature_fields(BlockDriverState *bs_top,
2303                                 BlockDriverState *bs_new)
2304 {
2305     BlockDriverState tmp;
2306 
2307     bdrv_move_feature_fields(&tmp, bs_top);
2308     bdrv_move_feature_fields(bs_top, bs_new);
2309     bdrv_move_feature_fields(bs_new, &tmp);
2310 
2311     assert(!bs_new->throttle_state);
2312     if (bs_top->throttle_state) {
2313         assert(bs_top->io_limits_enabled);
2314         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2315         bdrv_io_limits_disable(bs_top);
2316     }
2317 }
2318 
2319 /*
2320  * Add new bs contents at the top of an image chain while the chain is
2321  * live, while keeping required fields on the top layer.
2322  *
2323  * This will modify the BlockDriverState fields, and swap contents
2324  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2325  *
2326  * bs_new must not be attached to a BlockBackend.
2327  *
2328  * This function does not create any image files.
2329  *
2330  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2331  * that's what the callers commonly need. bs_new will be referenced by the old
2332  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2333  * reference of its own, it must call bdrv_ref().
2334  */
2335 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2336 {
2337     assert(!bdrv_requests_pending(bs_top));
2338     assert(!bdrv_requests_pending(bs_new));
2339 
2340     bdrv_ref(bs_top);
2341     change_parent_backing_link(bs_top, bs_new);
2342 
2343     /* Some fields always stay on top of the backing file chain */
2344     swap_feature_fields(bs_top, bs_new);
2345 
2346     bdrv_set_backing_hd(bs_new, bs_top);
2347     bdrv_unref(bs_top);
2348 
2349     /* bs_new is now referenced by its new parents, we don't need the
2350      * additional reference any more. */
2351     bdrv_unref(bs_new);
2352 }
2353 
2354 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2355 {
2356     assert(!bdrv_requests_pending(old));
2357     assert(!bdrv_requests_pending(new));
2358 
2359     bdrv_ref(old);
2360 
2361     if (old->blk) {
2362         /* As long as these fields aren't in BlockBackend, but in the top-level
2363          * BlockDriverState, it's not possible for a BDS to have two BBs.
2364          *
2365          * We really want to copy the fields from old to new, but we go for a
2366          * swap instead so that pointers aren't duplicated and cause trouble.
2367          * (Also, bdrv_swap() used to do the same.) */
2368         assert(!new->blk);
2369         swap_feature_fields(old, new);
2370     }
2371     change_parent_backing_link(old, new);
2372 
2373     /* Change backing files if a previously independent node is added to the
2374      * chain. For active commit, we replace top by its own (indirect) backing
2375      * file and don't do anything here so we don't build a loop. */
2376     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2377         bdrv_set_backing_hd(new, backing_bs(old));
2378         bdrv_set_backing_hd(old, NULL);
2379     }
2380 
2381     bdrv_unref(old);
2382 }
2383 
2384 static void bdrv_delete(BlockDriverState *bs)
2385 {
2386     assert(!bs->job);
2387     assert(bdrv_op_blocker_is_empty(bs));
2388     assert(!bs->refcnt);
2389 
2390     bdrv_close(bs);
2391 
2392     /* remove from list, if necessary */
2393     bdrv_make_anon(bs);
2394 
2395     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2396 
2397     g_free(bs);
2398 }
2399 
2400 /*
2401  * Run consistency checks on an image
2402  *
2403  * Returns 0 if the check could be completed (it doesn't mean that the image is
2404  * free of errors) or -errno when an internal error occurred. The results of the
2405  * check are stored in res.
2406  */
2407 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2408 {
2409     if (bs->drv == NULL) {
2410         return -ENOMEDIUM;
2411     }
2412     if (bs->drv->bdrv_check == NULL) {
2413         return -ENOTSUP;
2414     }
2415 
2416     memset(res, 0, sizeof(*res));
2417     return bs->drv->bdrv_check(bs, res, fix);
2418 }
2419 
2420 #define COMMIT_BUF_SECTORS 2048
2421 
2422 /* commit COW file into the raw image */
2423 int bdrv_commit(BlockDriverState *bs)
2424 {
2425     BlockDriver *drv = bs->drv;
2426     int64_t sector, total_sectors, length, backing_length;
2427     int n, ro, open_flags;
2428     int ret = 0;
2429     uint8_t *buf = NULL;
2430 
2431     if (!drv)
2432         return -ENOMEDIUM;
2433 
2434     if (!bs->backing) {
2435         return -ENOTSUP;
2436     }
2437 
2438     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2439         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2440         return -EBUSY;
2441     }
2442 
2443     ro = bs->backing->bs->read_only;
2444     open_flags =  bs->backing->bs->open_flags;
2445 
2446     if (ro) {
2447         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2448             return -EACCES;
2449         }
2450     }
2451 
2452     length = bdrv_getlength(bs);
2453     if (length < 0) {
2454         ret = length;
2455         goto ro_cleanup;
2456     }
2457 
2458     backing_length = bdrv_getlength(bs->backing->bs);
2459     if (backing_length < 0) {
2460         ret = backing_length;
2461         goto ro_cleanup;
2462     }
2463 
2464     /* If our top snapshot is larger than the backing file image,
2465      * grow the backing file image if possible.  If not possible,
2466      * we must return an error */
2467     if (length > backing_length) {
2468         ret = bdrv_truncate(bs->backing->bs, length);
2469         if (ret < 0) {
2470             goto ro_cleanup;
2471         }
2472     }
2473 
2474     total_sectors = length >> BDRV_SECTOR_BITS;
2475 
2476     /* qemu_try_blockalign() for bs will choose an alignment that works for
2477      * bs->backing->bs as well, so no need to compare the alignment manually. */
2478     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2479     if (buf == NULL) {
2480         ret = -ENOMEM;
2481         goto ro_cleanup;
2482     }
2483 
2484     for (sector = 0; sector < total_sectors; sector += n) {
2485         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2486         if (ret < 0) {
2487             goto ro_cleanup;
2488         }
2489         if (ret) {
2490             ret = bdrv_read(bs, sector, buf, n);
2491             if (ret < 0) {
2492                 goto ro_cleanup;
2493             }
2494 
2495             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2496             if (ret < 0) {
2497                 goto ro_cleanup;
2498             }
2499         }
2500     }
2501 
2502     if (drv->bdrv_make_empty) {
2503         ret = drv->bdrv_make_empty(bs);
2504         if (ret < 0) {
2505             goto ro_cleanup;
2506         }
2507         bdrv_flush(bs);
2508     }
2509 
2510     /*
2511      * Make sure all data we wrote to the backing device is actually
2512      * stable on disk.
2513      */
2514     if (bs->backing) {
2515         bdrv_flush(bs->backing->bs);
2516     }
2517 
2518     ret = 0;
2519 ro_cleanup:
2520     qemu_vfree(buf);
2521 
2522     if (ro) {
2523         /* ignoring error return here */
2524         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2525     }
2526 
2527     return ret;
2528 }
2529 
2530 int bdrv_commit_all(void)
2531 {
2532     BlockDriverState *bs;
2533 
2534     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2535         AioContext *aio_context = bdrv_get_aio_context(bs);
2536 
2537         aio_context_acquire(aio_context);
2538         if (bs->drv && bs->backing) {
2539             int ret = bdrv_commit(bs);
2540             if (ret < 0) {
2541                 aio_context_release(aio_context);
2542                 return ret;
2543             }
2544         }
2545         aio_context_release(aio_context);
2546     }
2547     return 0;
2548 }
2549 
2550 /*
2551  * Return values:
2552  * 0        - success
2553  * -EINVAL  - backing format specified, but no file
2554  * -ENOSPC  - can't update the backing file because no space is left in the
2555  *            image file header
2556  * -ENOTSUP - format driver doesn't support changing the backing file
2557  */
2558 int bdrv_change_backing_file(BlockDriverState *bs,
2559     const char *backing_file, const char *backing_fmt)
2560 {
2561     BlockDriver *drv = bs->drv;
2562     int ret;
2563 
2564     /* Backing file format doesn't make sense without a backing file */
2565     if (backing_fmt && !backing_file) {
2566         return -EINVAL;
2567     }
2568 
2569     if (drv->bdrv_change_backing_file != NULL) {
2570         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2571     } else {
2572         ret = -ENOTSUP;
2573     }
2574 
2575     if (ret == 0) {
2576         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2577         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2578     }
2579     return ret;
2580 }
2581 
2582 /*
2583  * Finds the image layer in the chain that has 'bs' as its backing file.
2584  *
2585  * active is the current topmost image.
2586  *
2587  * Returns NULL if bs is not found in active's image chain,
2588  * or if active == bs.
2589  *
2590  * Returns the bottommost base image if bs == NULL.
2591  */
2592 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2593                                     BlockDriverState *bs)
2594 {
2595     while (active && bs != backing_bs(active)) {
2596         active = backing_bs(active);
2597     }
2598 
2599     return active;
2600 }
2601 
2602 /* Given a BDS, searches for the base layer. */
2603 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2604 {
2605     return bdrv_find_overlay(bs, NULL);
2606 }
2607 
2608 /*
2609  * Drops images above 'base' up to and including 'top', and sets the image
2610  * above 'top' to have base as its backing file.
2611  *
2612  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2613  * information in 'bs' can be properly updated.
2614  *
2615  * E.g., this will convert the following chain:
2616  * bottom <- base <- intermediate <- top <- active
2617  *
2618  * to
2619  *
2620  * bottom <- base <- active
2621  *
2622  * It is allowed for bottom==base, in which case it converts:
2623  *
2624  * base <- intermediate <- top <- active
2625  *
2626  * to
2627  *
2628  * base <- active
2629  *
2630  * If backing_file_str is non-NULL, it will be used when modifying top's
2631  * overlay image metadata.
2632  *
2633  * Error conditions:
2634  *  if active == top, that is considered an error
2635  *
2636  */
2637 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2638                            BlockDriverState *base, const char *backing_file_str)
2639 {
2640     BlockDriverState *new_top_bs = NULL;
2641     int ret = -EIO;
2642 
2643     if (!top->drv || !base->drv) {
2644         goto exit;
2645     }
2646 
2647     new_top_bs = bdrv_find_overlay(active, top);
2648 
2649     if (new_top_bs == NULL) {
2650         /* we could not find the image above 'top', this is an error */
2651         goto exit;
2652     }
2653 
2654     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2655      * to do, no intermediate images */
2656     if (backing_bs(new_top_bs) == base) {
2657         ret = 0;
2658         goto exit;
2659     }
2660 
2661     /* Make sure that base is in the backing chain of top */
2662     if (!bdrv_chain_contains(top, base)) {
2663         goto exit;
2664     }
2665 
2666     /* success - we can delete the intermediate states, and link top->base */
2667     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2668     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2669                                    base->drv ? base->drv->format_name : "");
2670     if (ret) {
2671         goto exit;
2672     }
2673     bdrv_set_backing_hd(new_top_bs, base);
2674 
2675     ret = 0;
2676 exit:
2677     return ret;
2678 }
2679 
2680 /**
2681  * Truncate file to 'offset' bytes (needed only for file protocols)
2682  */
2683 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2684 {
2685     BlockDriver *drv = bs->drv;
2686     int ret;
2687     if (!drv)
2688         return -ENOMEDIUM;
2689     if (!drv->bdrv_truncate)
2690         return -ENOTSUP;
2691     if (bs->read_only)
2692         return -EACCES;
2693 
2694     ret = drv->bdrv_truncate(bs, offset);
2695     if (ret == 0) {
2696         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2697         bdrv_dirty_bitmap_truncate(bs);
2698         if (bs->blk) {
2699             blk_dev_resize_cb(bs->blk);
2700         }
2701     }
2702     return ret;
2703 }
2704 
2705 /**
2706  * Length of a allocated file in bytes. Sparse files are counted by actual
2707  * allocated space. Return < 0 if error or unknown.
2708  */
2709 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2710 {
2711     BlockDriver *drv = bs->drv;
2712     if (!drv) {
2713         return -ENOMEDIUM;
2714     }
2715     if (drv->bdrv_get_allocated_file_size) {
2716         return drv->bdrv_get_allocated_file_size(bs);
2717     }
2718     if (bs->file) {
2719         return bdrv_get_allocated_file_size(bs->file->bs);
2720     }
2721     return -ENOTSUP;
2722 }
2723 
2724 /**
2725  * Return number of sectors on success, -errno on error.
2726  */
2727 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2728 {
2729     BlockDriver *drv = bs->drv;
2730 
2731     if (!drv)
2732         return -ENOMEDIUM;
2733 
2734     if (drv->has_variable_length) {
2735         int ret = refresh_total_sectors(bs, bs->total_sectors);
2736         if (ret < 0) {
2737             return ret;
2738         }
2739     }
2740     return bs->total_sectors;
2741 }
2742 
2743 /**
2744  * Return length in bytes on success, -errno on error.
2745  * The length is always a multiple of BDRV_SECTOR_SIZE.
2746  */
2747 int64_t bdrv_getlength(BlockDriverState *bs)
2748 {
2749     int64_t ret = bdrv_nb_sectors(bs);
2750 
2751     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2752     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2753 }
2754 
2755 /* return 0 as number of sectors if no device present or error */
2756 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2757 {
2758     int64_t nb_sectors = bdrv_nb_sectors(bs);
2759 
2760     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2761 }
2762 
2763 int bdrv_is_read_only(BlockDriverState *bs)
2764 {
2765     return bs->read_only;
2766 }
2767 
2768 int bdrv_is_sg(BlockDriverState *bs)
2769 {
2770     return bs->sg;
2771 }
2772 
2773 int bdrv_enable_write_cache(BlockDriverState *bs)
2774 {
2775     return bs->enable_write_cache;
2776 }
2777 
2778 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2779 {
2780     bs->enable_write_cache = wce;
2781 
2782     /* so a reopen() will preserve wce */
2783     if (wce) {
2784         bs->open_flags |= BDRV_O_CACHE_WB;
2785     } else {
2786         bs->open_flags &= ~BDRV_O_CACHE_WB;
2787     }
2788 }
2789 
2790 int bdrv_is_encrypted(BlockDriverState *bs)
2791 {
2792     if (bs->backing && bs->backing->bs->encrypted) {
2793         return 1;
2794     }
2795     return bs->encrypted;
2796 }
2797 
2798 int bdrv_key_required(BlockDriverState *bs)
2799 {
2800     BdrvChild *backing = bs->backing;
2801 
2802     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2803         return 1;
2804     }
2805     return (bs->encrypted && !bs->valid_key);
2806 }
2807 
2808 int bdrv_set_key(BlockDriverState *bs, const char *key)
2809 {
2810     int ret;
2811     if (bs->backing && bs->backing->bs->encrypted) {
2812         ret = bdrv_set_key(bs->backing->bs, key);
2813         if (ret < 0)
2814             return ret;
2815         if (!bs->encrypted)
2816             return 0;
2817     }
2818     if (!bs->encrypted) {
2819         return -EINVAL;
2820     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2821         return -ENOMEDIUM;
2822     }
2823     ret = bs->drv->bdrv_set_key(bs, key);
2824     if (ret < 0) {
2825         bs->valid_key = 0;
2826     } else if (!bs->valid_key) {
2827         bs->valid_key = 1;
2828         if (bs->blk) {
2829             /* call the change callback now, we skipped it on open */
2830             blk_dev_change_media_cb(bs->blk, true);
2831         }
2832     }
2833     return ret;
2834 }
2835 
2836 /*
2837  * Provide an encryption key for @bs.
2838  * If @key is non-null:
2839  *     If @bs is not encrypted, fail.
2840  *     Else if the key is invalid, fail.
2841  *     Else set @bs's key to @key, replacing the existing key, if any.
2842  * If @key is null:
2843  *     If @bs is encrypted and still lacks a key, fail.
2844  *     Else do nothing.
2845  * On failure, store an error object through @errp if non-null.
2846  */
2847 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2848 {
2849     if (key) {
2850         if (!bdrv_is_encrypted(bs)) {
2851             error_setg(errp, "Node '%s' is not encrypted",
2852                       bdrv_get_device_or_node_name(bs));
2853         } else if (bdrv_set_key(bs, key) < 0) {
2854             error_setg(errp, QERR_INVALID_PASSWORD);
2855         }
2856     } else {
2857         if (bdrv_key_required(bs)) {
2858             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2859                       "'%s' (%s) is encrypted",
2860                       bdrv_get_device_or_node_name(bs),
2861                       bdrv_get_encrypted_filename(bs));
2862         }
2863     }
2864 }
2865 
2866 const char *bdrv_get_format_name(BlockDriverState *bs)
2867 {
2868     return bs->drv ? bs->drv->format_name : NULL;
2869 }
2870 
2871 static int qsort_strcmp(const void *a, const void *b)
2872 {
2873     return strcmp(a, b);
2874 }
2875 
2876 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2877                          void *opaque)
2878 {
2879     BlockDriver *drv;
2880     int count = 0;
2881     int i;
2882     const char **formats = NULL;
2883 
2884     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2885         if (drv->format_name) {
2886             bool found = false;
2887             int i = count;
2888             while (formats && i && !found) {
2889                 found = !strcmp(formats[--i], drv->format_name);
2890             }
2891 
2892             if (!found) {
2893                 formats = g_renew(const char *, formats, count + 1);
2894                 formats[count++] = drv->format_name;
2895             }
2896         }
2897     }
2898 
2899     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2900 
2901     for (i = 0; i < count; i++) {
2902         it(opaque, formats[i]);
2903     }
2904 
2905     g_free(formats);
2906 }
2907 
2908 /* This function is to find a node in the bs graph */
2909 BlockDriverState *bdrv_find_node(const char *node_name)
2910 {
2911     BlockDriverState *bs;
2912 
2913     assert(node_name);
2914 
2915     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2916         if (!strcmp(node_name, bs->node_name)) {
2917             return bs;
2918         }
2919     }
2920     return NULL;
2921 }
2922 
2923 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2924 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2925 {
2926     BlockDeviceInfoList *list, *entry;
2927     BlockDriverState *bs;
2928 
2929     list = NULL;
2930     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2931         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2932         if (!info) {
2933             qapi_free_BlockDeviceInfoList(list);
2934             return NULL;
2935         }
2936         entry = g_malloc0(sizeof(*entry));
2937         entry->value = info;
2938         entry->next = list;
2939         list = entry;
2940     }
2941 
2942     return list;
2943 }
2944 
2945 BlockDriverState *bdrv_lookup_bs(const char *device,
2946                                  const char *node_name,
2947                                  Error **errp)
2948 {
2949     BlockBackend *blk;
2950     BlockDriverState *bs;
2951 
2952     if (device) {
2953         blk = blk_by_name(device);
2954 
2955         if (blk) {
2956             bs = blk_bs(blk);
2957             if (!bs) {
2958                 error_setg(errp, "Device '%s' has no medium", device);
2959             }
2960 
2961             return bs;
2962         }
2963     }
2964 
2965     if (node_name) {
2966         bs = bdrv_find_node(node_name);
2967 
2968         if (bs) {
2969             return bs;
2970         }
2971     }
2972 
2973     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2974                      device ? device : "",
2975                      node_name ? node_name : "");
2976     return NULL;
2977 }
2978 
2979 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2980  * return false.  If either argument is NULL, return false. */
2981 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2982 {
2983     while (top && top != base) {
2984         top = backing_bs(top);
2985     }
2986 
2987     return top != NULL;
2988 }
2989 
2990 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2991 {
2992     if (!bs) {
2993         return QTAILQ_FIRST(&graph_bdrv_states);
2994     }
2995     return QTAILQ_NEXT(bs, node_list);
2996 }
2997 
2998 BlockDriverState *bdrv_next(BlockDriverState *bs)
2999 {
3000     if (!bs) {
3001         return QTAILQ_FIRST(&bdrv_states);
3002     }
3003     return QTAILQ_NEXT(bs, device_list);
3004 }
3005 
3006 const char *bdrv_get_node_name(const BlockDriverState *bs)
3007 {
3008     return bs->node_name;
3009 }
3010 
3011 /* TODO check what callers really want: bs->node_name or blk_name() */
3012 const char *bdrv_get_device_name(const BlockDriverState *bs)
3013 {
3014     return bs->blk ? blk_name(bs->blk) : "";
3015 }
3016 
3017 /* This can be used to identify nodes that might not have a device
3018  * name associated. Since node and device names live in the same
3019  * namespace, the result is unambiguous. The exception is if both are
3020  * absent, then this returns an empty (non-null) string. */
3021 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3022 {
3023     return bs->blk ? blk_name(bs->blk) : bs->node_name;
3024 }
3025 
3026 int bdrv_get_flags(BlockDriverState *bs)
3027 {
3028     return bs->open_flags;
3029 }
3030 
3031 int bdrv_has_zero_init_1(BlockDriverState *bs)
3032 {
3033     return 1;
3034 }
3035 
3036 int bdrv_has_zero_init(BlockDriverState *bs)
3037 {
3038     assert(bs->drv);
3039 
3040     /* If BS is a copy on write image, it is initialized to
3041        the contents of the base image, which may not be zeroes.  */
3042     if (bs->backing) {
3043         return 0;
3044     }
3045     if (bs->drv->bdrv_has_zero_init) {
3046         return bs->drv->bdrv_has_zero_init(bs);
3047     }
3048 
3049     /* safe default */
3050     return 0;
3051 }
3052 
3053 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3054 {
3055     BlockDriverInfo bdi;
3056 
3057     if (bs->backing) {
3058         return false;
3059     }
3060 
3061     if (bdrv_get_info(bs, &bdi) == 0) {
3062         return bdi.unallocated_blocks_are_zero;
3063     }
3064 
3065     return false;
3066 }
3067 
3068 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3069 {
3070     BlockDriverInfo bdi;
3071 
3072     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3073         return false;
3074     }
3075 
3076     if (bdrv_get_info(bs, &bdi) == 0) {
3077         return bdi.can_write_zeroes_with_unmap;
3078     }
3079 
3080     return false;
3081 }
3082 
3083 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3084 {
3085     if (bs->backing && bs->backing->bs->encrypted)
3086         return bs->backing_file;
3087     else if (bs->encrypted)
3088         return bs->filename;
3089     else
3090         return NULL;
3091 }
3092 
3093 void bdrv_get_backing_filename(BlockDriverState *bs,
3094                                char *filename, int filename_size)
3095 {
3096     pstrcpy(filename, filename_size, bs->backing_file);
3097 }
3098 
3099 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3100 {
3101     BlockDriver *drv = bs->drv;
3102     if (!drv)
3103         return -ENOMEDIUM;
3104     if (!drv->bdrv_get_info)
3105         return -ENOTSUP;
3106     memset(bdi, 0, sizeof(*bdi));
3107     return drv->bdrv_get_info(bs, bdi);
3108 }
3109 
3110 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3111 {
3112     BlockDriver *drv = bs->drv;
3113     if (drv && drv->bdrv_get_specific_info) {
3114         return drv->bdrv_get_specific_info(bs);
3115     }
3116     return NULL;
3117 }
3118 
3119 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3120 {
3121     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3122         return;
3123     }
3124 
3125     bs->drv->bdrv_debug_event(bs, event);
3126 }
3127 
3128 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3129                           const char *tag)
3130 {
3131     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3132         bs = bs->file ? bs->file->bs : NULL;
3133     }
3134 
3135     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3136         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3137     }
3138 
3139     return -ENOTSUP;
3140 }
3141 
3142 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3143 {
3144     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3145         bs = bs->file ? bs->file->bs : NULL;
3146     }
3147 
3148     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3149         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3150     }
3151 
3152     return -ENOTSUP;
3153 }
3154 
3155 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3156 {
3157     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3158         bs = bs->file ? bs->file->bs : NULL;
3159     }
3160 
3161     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3162         return bs->drv->bdrv_debug_resume(bs, tag);
3163     }
3164 
3165     return -ENOTSUP;
3166 }
3167 
3168 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3169 {
3170     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3171         bs = bs->file ? bs->file->bs : NULL;
3172     }
3173 
3174     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3175         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3176     }
3177 
3178     return false;
3179 }
3180 
3181 int bdrv_is_snapshot(BlockDriverState *bs)
3182 {
3183     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3184 }
3185 
3186 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3187  * relative, it must be relative to the chain.  So, passing in bs->filename
3188  * from a BDS as backing_file should not be done, as that may be relative to
3189  * the CWD rather than the chain. */
3190 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3191         const char *backing_file)
3192 {
3193     char *filename_full = NULL;
3194     char *backing_file_full = NULL;
3195     char *filename_tmp = NULL;
3196     int is_protocol = 0;
3197     BlockDriverState *curr_bs = NULL;
3198     BlockDriverState *retval = NULL;
3199 
3200     if (!bs || !bs->drv || !backing_file) {
3201         return NULL;
3202     }
3203 
3204     filename_full     = g_malloc(PATH_MAX);
3205     backing_file_full = g_malloc(PATH_MAX);
3206     filename_tmp      = g_malloc(PATH_MAX);
3207 
3208     is_protocol = path_has_protocol(backing_file);
3209 
3210     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3211 
3212         /* If either of the filename paths is actually a protocol, then
3213          * compare unmodified paths; otherwise make paths relative */
3214         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3215             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3216                 retval = curr_bs->backing->bs;
3217                 break;
3218             }
3219         } else {
3220             /* If not an absolute filename path, make it relative to the current
3221              * image's filename path */
3222             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3223                          backing_file);
3224 
3225             /* We are going to compare absolute pathnames */
3226             if (!realpath(filename_tmp, filename_full)) {
3227                 continue;
3228             }
3229 
3230             /* We need to make sure the backing filename we are comparing against
3231              * is relative to the current image filename (or absolute) */
3232             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3233                          curr_bs->backing_file);
3234 
3235             if (!realpath(filename_tmp, backing_file_full)) {
3236                 continue;
3237             }
3238 
3239             if (strcmp(backing_file_full, filename_full) == 0) {
3240                 retval = curr_bs->backing->bs;
3241                 break;
3242             }
3243         }
3244     }
3245 
3246     g_free(filename_full);
3247     g_free(backing_file_full);
3248     g_free(filename_tmp);
3249     return retval;
3250 }
3251 
3252 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3253 {
3254     if (!bs->drv) {
3255         return 0;
3256     }
3257 
3258     if (!bs->backing) {
3259         return 0;
3260     }
3261 
3262     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3263 }
3264 
3265 void bdrv_init(void)
3266 {
3267     module_call_init(MODULE_INIT_BLOCK);
3268 }
3269 
3270 void bdrv_init_with_whitelist(void)
3271 {
3272     use_bdrv_whitelist = 1;
3273     bdrv_init();
3274 }
3275 
3276 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3277 {
3278     Error *local_err = NULL;
3279     int ret;
3280 
3281     if (!bs->drv)  {
3282         return;
3283     }
3284 
3285     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3286         return;
3287     }
3288     bs->open_flags &= ~BDRV_O_INACTIVE;
3289 
3290     if (bs->drv->bdrv_invalidate_cache) {
3291         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3292     } else if (bs->file) {
3293         bdrv_invalidate_cache(bs->file->bs, &local_err);
3294     }
3295     if (local_err) {
3296         bs->open_flags |= BDRV_O_INACTIVE;
3297         error_propagate(errp, local_err);
3298         return;
3299     }
3300 
3301     ret = refresh_total_sectors(bs, bs->total_sectors);
3302     if (ret < 0) {
3303         bs->open_flags |= BDRV_O_INACTIVE;
3304         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3305         return;
3306     }
3307 }
3308 
3309 void bdrv_invalidate_cache_all(Error **errp)
3310 {
3311     BlockDriverState *bs;
3312     Error *local_err = NULL;
3313 
3314     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3315         AioContext *aio_context = bdrv_get_aio_context(bs);
3316 
3317         aio_context_acquire(aio_context);
3318         bdrv_invalidate_cache(bs, &local_err);
3319         aio_context_release(aio_context);
3320         if (local_err) {
3321             error_propagate(errp, local_err);
3322             return;
3323         }
3324     }
3325 }
3326 
3327 static int bdrv_inactivate(BlockDriverState *bs)
3328 {
3329     int ret;
3330 
3331     if (bs->drv->bdrv_inactivate) {
3332         ret = bs->drv->bdrv_inactivate(bs);
3333         if (ret < 0) {
3334             return ret;
3335         }
3336     }
3337 
3338     bs->open_flags |= BDRV_O_INACTIVE;
3339     return 0;
3340 }
3341 
3342 int bdrv_inactivate_all(void)
3343 {
3344     BlockDriverState *bs;
3345     int ret;
3346 
3347     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3348         AioContext *aio_context = bdrv_get_aio_context(bs);
3349 
3350         aio_context_acquire(aio_context);
3351         ret = bdrv_inactivate(bs);
3352         aio_context_release(aio_context);
3353         if (ret < 0) {
3354             return ret;
3355         }
3356     }
3357 
3358     return 0;
3359 }
3360 
3361 /**************************************************************/
3362 /* removable device support */
3363 
3364 /**
3365  * Return TRUE if the media is present
3366  */
3367 bool bdrv_is_inserted(BlockDriverState *bs)
3368 {
3369     BlockDriver *drv = bs->drv;
3370     BdrvChild *child;
3371 
3372     if (!drv) {
3373         return false;
3374     }
3375     if (drv->bdrv_is_inserted) {
3376         return drv->bdrv_is_inserted(bs);
3377     }
3378     QLIST_FOREACH(child, &bs->children, next) {
3379         if (!bdrv_is_inserted(child->bs)) {
3380             return false;
3381         }
3382     }
3383     return true;
3384 }
3385 
3386 /**
3387  * Return whether the media changed since the last call to this
3388  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3389  */
3390 int bdrv_media_changed(BlockDriverState *bs)
3391 {
3392     BlockDriver *drv = bs->drv;
3393 
3394     if (drv && drv->bdrv_media_changed) {
3395         return drv->bdrv_media_changed(bs);
3396     }
3397     return -ENOTSUP;
3398 }
3399 
3400 /**
3401  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3402  */
3403 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3404 {
3405     BlockDriver *drv = bs->drv;
3406     const char *device_name;
3407 
3408     if (drv && drv->bdrv_eject) {
3409         drv->bdrv_eject(bs, eject_flag);
3410     }
3411 
3412     device_name = bdrv_get_device_name(bs);
3413     if (device_name[0] != '\0') {
3414         qapi_event_send_device_tray_moved(device_name,
3415                                           eject_flag, &error_abort);
3416     }
3417 }
3418 
3419 /**
3420  * Lock or unlock the media (if it is locked, the user won't be able
3421  * to eject it manually).
3422  */
3423 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3424 {
3425     BlockDriver *drv = bs->drv;
3426 
3427     trace_bdrv_lock_medium(bs, locked);
3428 
3429     if (drv && drv->bdrv_lock_medium) {
3430         drv->bdrv_lock_medium(bs, locked);
3431     }
3432 }
3433 
3434 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3435 {
3436     BdrvDirtyBitmap *bm;
3437 
3438     assert(name);
3439     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3440         if (bm->name && !strcmp(name, bm->name)) {
3441             return bm;
3442         }
3443     }
3444     return NULL;
3445 }
3446 
3447 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3448 {
3449     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3450     g_free(bitmap->name);
3451     bitmap->name = NULL;
3452 }
3453 
3454 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3455                                           uint32_t granularity,
3456                                           const char *name,
3457                                           Error **errp)
3458 {
3459     int64_t bitmap_size;
3460     BdrvDirtyBitmap *bitmap;
3461     uint32_t sector_granularity;
3462 
3463     assert((granularity & (granularity - 1)) == 0);
3464 
3465     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3466         error_setg(errp, "Bitmap already exists: %s", name);
3467         return NULL;
3468     }
3469     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3470     assert(sector_granularity);
3471     bitmap_size = bdrv_nb_sectors(bs);
3472     if (bitmap_size < 0) {
3473         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3474         errno = -bitmap_size;
3475         return NULL;
3476     }
3477     bitmap = g_new0(BdrvDirtyBitmap, 1);
3478     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3479     bitmap->size = bitmap_size;
3480     bitmap->name = g_strdup(name);
3481     bitmap->disabled = false;
3482     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3483     return bitmap;
3484 }
3485 
3486 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3487 {
3488     return bitmap->successor;
3489 }
3490 
3491 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3492 {
3493     return !(bitmap->disabled || bitmap->successor);
3494 }
3495 
3496 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3497 {
3498     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3499         return DIRTY_BITMAP_STATUS_FROZEN;
3500     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3501         return DIRTY_BITMAP_STATUS_DISABLED;
3502     } else {
3503         return DIRTY_BITMAP_STATUS_ACTIVE;
3504     }
3505 }
3506 
3507 /**
3508  * Create a successor bitmap destined to replace this bitmap after an operation.
3509  * Requires that the bitmap is not frozen and has no successor.
3510  */
3511 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3512                                        BdrvDirtyBitmap *bitmap, Error **errp)
3513 {
3514     uint64_t granularity;
3515     BdrvDirtyBitmap *child;
3516 
3517     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3518         error_setg(errp, "Cannot create a successor for a bitmap that is "
3519                    "currently frozen");
3520         return -1;
3521     }
3522     assert(!bitmap->successor);
3523 
3524     /* Create an anonymous successor */
3525     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3526     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3527     if (!child) {
3528         return -1;
3529     }
3530 
3531     /* Successor will be on or off based on our current state. */
3532     child->disabled = bitmap->disabled;
3533 
3534     /* Install the successor and freeze the parent */
3535     bitmap->successor = child;
3536     return 0;
3537 }
3538 
3539 /**
3540  * For a bitmap with a successor, yield our name to the successor,
3541  * delete the old bitmap, and return a handle to the new bitmap.
3542  */
3543 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3544                                             BdrvDirtyBitmap *bitmap,
3545                                             Error **errp)
3546 {
3547     char *name;
3548     BdrvDirtyBitmap *successor = bitmap->successor;
3549 
3550     if (successor == NULL) {
3551         error_setg(errp, "Cannot relinquish control if "
3552                    "there's no successor present");
3553         return NULL;
3554     }
3555 
3556     name = bitmap->name;
3557     bitmap->name = NULL;
3558     successor->name = name;
3559     bitmap->successor = NULL;
3560     bdrv_release_dirty_bitmap(bs, bitmap);
3561 
3562     return successor;
3563 }
3564 
3565 /**
3566  * In cases of failure where we can no longer safely delete the parent,
3567  * we may wish to re-join the parent and child/successor.
3568  * The merged parent will be un-frozen, but not explicitly re-enabled.
3569  */
3570 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3571                                            BdrvDirtyBitmap *parent,
3572                                            Error **errp)
3573 {
3574     BdrvDirtyBitmap *successor = parent->successor;
3575 
3576     if (!successor) {
3577         error_setg(errp, "Cannot reclaim a successor when none is present");
3578         return NULL;
3579     }
3580 
3581     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3582         error_setg(errp, "Merging of parent and successor bitmap failed");
3583         return NULL;
3584     }
3585     bdrv_release_dirty_bitmap(bs, successor);
3586     parent->successor = NULL;
3587 
3588     return parent;
3589 }
3590 
3591 /**
3592  * Truncates _all_ bitmaps attached to a BDS.
3593  */
3594 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3595 {
3596     BdrvDirtyBitmap *bitmap;
3597     uint64_t size = bdrv_nb_sectors(bs);
3598 
3599     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3600         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3601         hbitmap_truncate(bitmap->bitmap, size);
3602         bitmap->size = size;
3603     }
3604 }
3605 
3606 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3607                                                   BdrvDirtyBitmap *bitmap,
3608                                                   bool only_named)
3609 {
3610     BdrvDirtyBitmap *bm, *next;
3611     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3612         if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3613             assert(!bdrv_dirty_bitmap_frozen(bm));
3614             QLIST_REMOVE(bm, list);
3615             hbitmap_free(bm->bitmap);
3616             g_free(bm->name);
3617             g_free(bm);
3618 
3619             if (bitmap) {
3620                 return;
3621             }
3622         }
3623     }
3624 }
3625 
3626 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3627 {
3628     bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3629 }
3630 
3631 /**
3632  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3633  * There must not be any frozen bitmaps attached.
3634  */
3635 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3636 {
3637     bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3638 }
3639 
3640 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3641 {
3642     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3643     bitmap->disabled = true;
3644 }
3645 
3646 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3647 {
3648     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3649     bitmap->disabled = false;
3650 }
3651 
3652 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3653 {
3654     BdrvDirtyBitmap *bm;
3655     BlockDirtyInfoList *list = NULL;
3656     BlockDirtyInfoList **plist = &list;
3657 
3658     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3659         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3660         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3661         info->count = bdrv_get_dirty_count(bm);
3662         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3663         info->has_name = !!bm->name;
3664         info->name = g_strdup(bm->name);
3665         info->status = bdrv_dirty_bitmap_status(bm);
3666         entry->value = info;
3667         *plist = entry;
3668         plist = &entry->next;
3669     }
3670 
3671     return list;
3672 }
3673 
3674 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3675 {
3676     if (bitmap) {
3677         return hbitmap_get(bitmap->bitmap, sector);
3678     } else {
3679         return 0;
3680     }
3681 }
3682 
3683 /**
3684  * Chooses a default granularity based on the existing cluster size,
3685  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3686  * is no cluster size information available.
3687  */
3688 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3689 {
3690     BlockDriverInfo bdi;
3691     uint32_t granularity;
3692 
3693     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3694         granularity = MAX(4096, bdi.cluster_size);
3695         granularity = MIN(65536, granularity);
3696     } else {
3697         granularity = 65536;
3698     }
3699 
3700     return granularity;
3701 }
3702 
3703 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3704 {
3705     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3706 }
3707 
3708 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3709 {
3710     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3711 }
3712 
3713 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3714                            int64_t cur_sector, int nr_sectors)
3715 {
3716     assert(bdrv_dirty_bitmap_enabled(bitmap));
3717     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3718 }
3719 
3720 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3721                              int64_t cur_sector, int nr_sectors)
3722 {
3723     assert(bdrv_dirty_bitmap_enabled(bitmap));
3724     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3725 }
3726 
3727 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3728 {
3729     assert(bdrv_dirty_bitmap_enabled(bitmap));
3730     if (!out) {
3731         hbitmap_reset_all(bitmap->bitmap);
3732     } else {
3733         HBitmap *backup = bitmap->bitmap;
3734         bitmap->bitmap = hbitmap_alloc(bitmap->size,
3735                                        hbitmap_granularity(backup));
3736         *out = backup;
3737     }
3738 }
3739 
3740 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3741 {
3742     HBitmap *tmp = bitmap->bitmap;
3743     assert(bdrv_dirty_bitmap_enabled(bitmap));
3744     bitmap->bitmap = in;
3745     hbitmap_free(tmp);
3746 }
3747 
3748 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3749                     int nr_sectors)
3750 {
3751     BdrvDirtyBitmap *bitmap;
3752     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3753         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3754             continue;
3755         }
3756         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3757     }
3758 }
3759 
3760 /**
3761  * Advance an HBitmapIter to an arbitrary offset.
3762  */
3763 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3764 {
3765     assert(hbi->hb);
3766     hbitmap_iter_init(hbi, hbi->hb, offset);
3767 }
3768 
3769 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3770 {
3771     return hbitmap_count(bitmap->bitmap);
3772 }
3773 
3774 /* Get a reference to bs */
3775 void bdrv_ref(BlockDriverState *bs)
3776 {
3777     bs->refcnt++;
3778 }
3779 
3780 /* Release a previously grabbed reference to bs.
3781  * If after releasing, reference count is zero, the BlockDriverState is
3782  * deleted. */
3783 void bdrv_unref(BlockDriverState *bs)
3784 {
3785     if (!bs) {
3786         return;
3787     }
3788     assert(bs->refcnt > 0);
3789     if (--bs->refcnt == 0) {
3790         bdrv_delete(bs);
3791     }
3792 }
3793 
3794 struct BdrvOpBlocker {
3795     Error *reason;
3796     QLIST_ENTRY(BdrvOpBlocker) list;
3797 };
3798 
3799 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3800 {
3801     BdrvOpBlocker *blocker;
3802     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3803     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3804         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3805         if (errp) {
3806             *errp = error_copy(blocker->reason);
3807             error_prepend(errp, "Node '%s' is busy: ",
3808                           bdrv_get_device_or_node_name(bs));
3809         }
3810         return true;
3811     }
3812     return false;
3813 }
3814 
3815 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3816 {
3817     BdrvOpBlocker *blocker;
3818     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3819 
3820     blocker = g_new0(BdrvOpBlocker, 1);
3821     blocker->reason = reason;
3822     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3823 }
3824 
3825 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3826 {
3827     BdrvOpBlocker *blocker, *next;
3828     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3829     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3830         if (blocker->reason == reason) {
3831             QLIST_REMOVE(blocker, list);
3832             g_free(blocker);
3833         }
3834     }
3835 }
3836 
3837 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3838 {
3839     int i;
3840     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3841         bdrv_op_block(bs, i, reason);
3842     }
3843 }
3844 
3845 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3846 {
3847     int i;
3848     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3849         bdrv_op_unblock(bs, i, reason);
3850     }
3851 }
3852 
3853 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3854 {
3855     int i;
3856 
3857     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3858         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3859             return false;
3860         }
3861     }
3862     return true;
3863 }
3864 
3865 void bdrv_img_create(const char *filename, const char *fmt,
3866                      const char *base_filename, const char *base_fmt,
3867                      char *options, uint64_t img_size, int flags,
3868                      Error **errp, bool quiet)
3869 {
3870     QemuOptsList *create_opts = NULL;
3871     QemuOpts *opts = NULL;
3872     const char *backing_fmt, *backing_file;
3873     int64_t size;
3874     BlockDriver *drv, *proto_drv;
3875     Error *local_err = NULL;
3876     int ret = 0;
3877 
3878     /* Find driver and parse its options */
3879     drv = bdrv_find_format(fmt);
3880     if (!drv) {
3881         error_setg(errp, "Unknown file format '%s'", fmt);
3882         return;
3883     }
3884 
3885     proto_drv = bdrv_find_protocol(filename, true, errp);
3886     if (!proto_drv) {
3887         return;
3888     }
3889 
3890     if (!drv->create_opts) {
3891         error_setg(errp, "Format driver '%s' does not support image creation",
3892                    drv->format_name);
3893         return;
3894     }
3895 
3896     if (!proto_drv->create_opts) {
3897         error_setg(errp, "Protocol driver '%s' does not support image creation",
3898                    proto_drv->format_name);
3899         return;
3900     }
3901 
3902     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3903     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3904 
3905     /* Create parameter list with default values */
3906     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3907     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3908 
3909     /* Parse -o options */
3910     if (options) {
3911         qemu_opts_do_parse(opts, options, NULL, &local_err);
3912         if (local_err) {
3913             error_report_err(local_err);
3914             local_err = NULL;
3915             error_setg(errp, "Invalid options for file format '%s'", fmt);
3916             goto out;
3917         }
3918     }
3919 
3920     if (base_filename) {
3921         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3922         if (local_err) {
3923             error_setg(errp, "Backing file not supported for file format '%s'",
3924                        fmt);
3925             goto out;
3926         }
3927     }
3928 
3929     if (base_fmt) {
3930         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3931         if (local_err) {
3932             error_setg(errp, "Backing file format not supported for file "
3933                              "format '%s'", fmt);
3934             goto out;
3935         }
3936     }
3937 
3938     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3939     if (backing_file) {
3940         if (!strcmp(filename, backing_file)) {
3941             error_setg(errp, "Error: Trying to create an image with the "
3942                              "same filename as the backing file");
3943             goto out;
3944         }
3945     }
3946 
3947     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3948 
3949     // The size for the image must always be specified, with one exception:
3950     // If we are using a backing file, we can obtain the size from there
3951     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3952     if (size == -1) {
3953         if (backing_file) {
3954             BlockDriverState *bs;
3955             char *full_backing = g_new0(char, PATH_MAX);
3956             int64_t size;
3957             int back_flags;
3958             QDict *backing_options = NULL;
3959 
3960             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3961                                                          full_backing, PATH_MAX,
3962                                                          &local_err);
3963             if (local_err) {
3964                 g_free(full_backing);
3965                 goto out;
3966             }
3967 
3968             /* backing files always opened read-only */
3969             back_flags =
3970                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3971 
3972             if (backing_fmt) {
3973                 backing_options = qdict_new();
3974                 qdict_put(backing_options, "driver",
3975                           qstring_from_str(backing_fmt));
3976             }
3977 
3978             bs = NULL;
3979             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3980                             back_flags, &local_err);
3981             g_free(full_backing);
3982             if (ret < 0) {
3983                 goto out;
3984             }
3985             size = bdrv_getlength(bs);
3986             if (size < 0) {
3987                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3988                                  backing_file);
3989                 bdrv_unref(bs);
3990                 goto out;
3991             }
3992 
3993             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3994 
3995             bdrv_unref(bs);
3996         } else {
3997             error_setg(errp, "Image creation needs a size parameter");
3998             goto out;
3999         }
4000     }
4001 
4002     if (!quiet) {
4003         printf("Formatting '%s', fmt=%s ", filename, fmt);
4004         qemu_opts_print(opts, " ");
4005         puts("");
4006     }
4007 
4008     ret = bdrv_create(drv, filename, opts, &local_err);
4009 
4010     if (ret == -EFBIG) {
4011         /* This is generally a better message than whatever the driver would
4012          * deliver (especially because of the cluster_size_hint), since that
4013          * is most probably not much different from "image too large". */
4014         const char *cluster_size_hint = "";
4015         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4016             cluster_size_hint = " (try using a larger cluster size)";
4017         }
4018         error_setg(errp, "The image size is too large for file format '%s'"
4019                    "%s", fmt, cluster_size_hint);
4020         error_free(local_err);
4021         local_err = NULL;
4022     }
4023 
4024 out:
4025     qemu_opts_del(opts);
4026     qemu_opts_free(create_opts);
4027     if (local_err) {
4028         error_propagate(errp, local_err);
4029     }
4030 }
4031 
4032 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4033 {
4034     return bs->aio_context;
4035 }
4036 
4037 void bdrv_detach_aio_context(BlockDriverState *bs)
4038 {
4039     BdrvAioNotifier *baf;
4040 
4041     if (!bs->drv) {
4042         return;
4043     }
4044 
4045     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4046         baf->detach_aio_context(baf->opaque);
4047     }
4048 
4049     if (bs->throttle_state) {
4050         throttle_timers_detach_aio_context(&bs->throttle_timers);
4051     }
4052     if (bs->drv->bdrv_detach_aio_context) {
4053         bs->drv->bdrv_detach_aio_context(bs);
4054     }
4055     if (bs->file) {
4056         bdrv_detach_aio_context(bs->file->bs);
4057     }
4058     if (bs->backing) {
4059         bdrv_detach_aio_context(bs->backing->bs);
4060     }
4061 
4062     bs->aio_context = NULL;
4063 }
4064 
4065 void bdrv_attach_aio_context(BlockDriverState *bs,
4066                              AioContext *new_context)
4067 {
4068     BdrvAioNotifier *ban;
4069 
4070     if (!bs->drv) {
4071         return;
4072     }
4073 
4074     bs->aio_context = new_context;
4075 
4076     if (bs->backing) {
4077         bdrv_attach_aio_context(bs->backing->bs, new_context);
4078     }
4079     if (bs->file) {
4080         bdrv_attach_aio_context(bs->file->bs, new_context);
4081     }
4082     if (bs->drv->bdrv_attach_aio_context) {
4083         bs->drv->bdrv_attach_aio_context(bs, new_context);
4084     }
4085     if (bs->throttle_state) {
4086         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4087     }
4088 
4089     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4090         ban->attached_aio_context(new_context, ban->opaque);
4091     }
4092 }
4093 
4094 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4095 {
4096     bdrv_drain(bs); /* ensure there are no in-flight requests */
4097 
4098     bdrv_detach_aio_context(bs);
4099 
4100     /* This function executes in the old AioContext so acquire the new one in
4101      * case it runs in a different thread.
4102      */
4103     aio_context_acquire(new_context);
4104     bdrv_attach_aio_context(bs, new_context);
4105     aio_context_release(new_context);
4106 }
4107 
4108 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4109         void (*attached_aio_context)(AioContext *new_context, void *opaque),
4110         void (*detach_aio_context)(void *opaque), void *opaque)
4111 {
4112     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4113     *ban = (BdrvAioNotifier){
4114         .attached_aio_context = attached_aio_context,
4115         .detach_aio_context   = detach_aio_context,
4116         .opaque               = opaque
4117     };
4118 
4119     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4120 }
4121 
4122 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4123                                       void (*attached_aio_context)(AioContext *,
4124                                                                    void *),
4125                                       void (*detach_aio_context)(void *),
4126                                       void *opaque)
4127 {
4128     BdrvAioNotifier *ban, *ban_next;
4129 
4130     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4131         if (ban->attached_aio_context == attached_aio_context &&
4132             ban->detach_aio_context   == detach_aio_context   &&
4133             ban->opaque               == opaque)
4134         {
4135             QLIST_REMOVE(ban, list);
4136             g_free(ban);
4137 
4138             return;
4139         }
4140     }
4141 
4142     abort();
4143 }
4144 
4145 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4146                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4147 {
4148     if (!bs->drv->bdrv_amend_options) {
4149         return -ENOTSUP;
4150     }
4151     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4152 }
4153 
4154 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4155  * of block filter and by bdrv_is_first_non_filter.
4156  * It is used to test if the given bs is the candidate or recurse more in the
4157  * node graph.
4158  */
4159 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4160                                       BlockDriverState *candidate)
4161 {
4162     /* return false if basic checks fails */
4163     if (!bs || !bs->drv) {
4164         return false;
4165     }
4166 
4167     /* the code reached a non block filter driver -> check if the bs is
4168      * the same as the candidate. It's the recursion termination condition.
4169      */
4170     if (!bs->drv->is_filter) {
4171         return bs == candidate;
4172     }
4173     /* Down this path the driver is a block filter driver */
4174 
4175     /* If the block filter recursion method is defined use it to recurse down
4176      * the node graph.
4177      */
4178     if (bs->drv->bdrv_recurse_is_first_non_filter) {
4179         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4180     }
4181 
4182     /* the driver is a block filter but don't allow to recurse -> return false
4183      */
4184     return false;
4185 }
4186 
4187 /* This function checks if the candidate is the first non filter bs down it's
4188  * bs chain. Since we don't have pointers to parents it explore all bs chains
4189  * from the top. Some filters can choose not to pass down the recursion.
4190  */
4191 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4192 {
4193     BlockDriverState *bs;
4194 
4195     /* walk down the bs forest recursively */
4196     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4197         bool perm;
4198 
4199         /* try to recurse in this top level bs */
4200         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4201 
4202         /* candidate is the first non filter */
4203         if (perm) {
4204             return true;
4205         }
4206     }
4207 
4208     return false;
4209 }
4210 
4211 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4212                                         const char *node_name, Error **errp)
4213 {
4214     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4215     AioContext *aio_context;
4216 
4217     if (!to_replace_bs) {
4218         error_setg(errp, "Node name '%s' not found", node_name);
4219         return NULL;
4220     }
4221 
4222     aio_context = bdrv_get_aio_context(to_replace_bs);
4223     aio_context_acquire(aio_context);
4224 
4225     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4226         to_replace_bs = NULL;
4227         goto out;
4228     }
4229 
4230     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4231      * most non filter in order to prevent data corruption.
4232      * Another benefit is that this tests exclude backing files which are
4233      * blocked by the backing blockers.
4234      */
4235     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4236         error_setg(errp, "Only top most non filter can be replaced");
4237         to_replace_bs = NULL;
4238         goto out;
4239     }
4240 
4241 out:
4242     aio_context_release(aio_context);
4243     return to_replace_bs;
4244 }
4245 
4246 static bool append_open_options(QDict *d, BlockDriverState *bs)
4247 {
4248     const QDictEntry *entry;
4249     QemuOptDesc *desc;
4250     BdrvChild *child;
4251     bool found_any = false;
4252     const char *p;
4253 
4254     for (entry = qdict_first(bs->options); entry;
4255          entry = qdict_next(bs->options, entry))
4256     {
4257         /* Exclude options for children */
4258         QLIST_FOREACH(child, &bs->children, next) {
4259             if (strstart(qdict_entry_key(entry), child->name, &p)
4260                 && (!*p || *p == '.'))
4261             {
4262                 break;
4263             }
4264         }
4265         if (child) {
4266             continue;
4267         }
4268 
4269         /* And exclude all non-driver-specific options */
4270         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4271             if (!strcmp(qdict_entry_key(entry), desc->name)) {
4272                 break;
4273             }
4274         }
4275         if (desc->name) {
4276             continue;
4277         }
4278 
4279         qobject_incref(qdict_entry_value(entry));
4280         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4281         found_any = true;
4282     }
4283 
4284     return found_any;
4285 }
4286 
4287 /* Updates the following BDS fields:
4288  *  - exact_filename: A filename which may be used for opening a block device
4289  *                    which (mostly) equals the given BDS (even without any
4290  *                    other options; so reading and writing must return the same
4291  *                    results, but caching etc. may be different)
4292  *  - full_open_options: Options which, when given when opening a block device
4293  *                       (without a filename), result in a BDS (mostly)
4294  *                       equalling the given one
4295  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4296  *              full_open_options is converted to a JSON object, prefixed with
4297  *              "json:" (for use through the JSON pseudo protocol) and put here.
4298  */
4299 void bdrv_refresh_filename(BlockDriverState *bs)
4300 {
4301     BlockDriver *drv = bs->drv;
4302     QDict *opts;
4303 
4304     if (!drv) {
4305         return;
4306     }
4307 
4308     /* This BDS's file name will most probably depend on its file's name, so
4309      * refresh that first */
4310     if (bs->file) {
4311         bdrv_refresh_filename(bs->file->bs);
4312     }
4313 
4314     if (drv->bdrv_refresh_filename) {
4315         /* Obsolete information is of no use here, so drop the old file name
4316          * information before refreshing it */
4317         bs->exact_filename[0] = '\0';
4318         if (bs->full_open_options) {
4319             QDECREF(bs->full_open_options);
4320             bs->full_open_options = NULL;
4321         }
4322 
4323         opts = qdict_new();
4324         append_open_options(opts, bs);
4325         drv->bdrv_refresh_filename(bs, opts);
4326         QDECREF(opts);
4327     } else if (bs->file) {
4328         /* Try to reconstruct valid information from the underlying file */
4329         bool has_open_options;
4330 
4331         bs->exact_filename[0] = '\0';
4332         if (bs->full_open_options) {
4333             QDECREF(bs->full_open_options);
4334             bs->full_open_options = NULL;
4335         }
4336 
4337         opts = qdict_new();
4338         has_open_options = append_open_options(opts, bs);
4339 
4340         /* If no specific options have been given for this BDS, the filename of
4341          * the underlying file should suffice for this one as well */
4342         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4343             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4344         }
4345         /* Reconstructing the full options QDict is simple for most format block
4346          * drivers, as long as the full options are known for the underlying
4347          * file BDS. The full options QDict of that file BDS should somehow
4348          * contain a representation of the filename, therefore the following
4349          * suffices without querying the (exact_)filename of this BDS. */
4350         if (bs->file->bs->full_open_options) {
4351             qdict_put_obj(opts, "driver",
4352                           QOBJECT(qstring_from_str(drv->format_name)));
4353             QINCREF(bs->file->bs->full_open_options);
4354             qdict_put_obj(opts, "file",
4355                           QOBJECT(bs->file->bs->full_open_options));
4356 
4357             bs->full_open_options = opts;
4358         } else {
4359             QDECREF(opts);
4360         }
4361     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4362         /* There is no underlying file BDS (at least referenced by BDS.file),
4363          * so the full options QDict should be equal to the options given
4364          * specifically for this block device when it was opened (plus the
4365          * driver specification).
4366          * Because those options don't change, there is no need to update
4367          * full_open_options when it's already set. */
4368 
4369         opts = qdict_new();
4370         append_open_options(opts, bs);
4371         qdict_put_obj(opts, "driver",
4372                       QOBJECT(qstring_from_str(drv->format_name)));
4373 
4374         if (bs->exact_filename[0]) {
4375             /* This may not work for all block protocol drivers (some may
4376              * require this filename to be parsed), but we have to find some
4377              * default solution here, so just include it. If some block driver
4378              * does not support pure options without any filename at all or
4379              * needs some special format of the options QDict, it needs to
4380              * implement the driver-specific bdrv_refresh_filename() function.
4381              */
4382             qdict_put_obj(opts, "filename",
4383                           QOBJECT(qstring_from_str(bs->exact_filename)));
4384         }
4385 
4386         bs->full_open_options = opts;
4387     }
4388 
4389     if (bs->exact_filename[0]) {
4390         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4391     } else if (bs->full_open_options) {
4392         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4393         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4394                  qstring_get_str(json));
4395         QDECREF(json);
4396     }
4397 }
4398