xref: /qemu/block.c (revision 6e8e5cb9)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
43 
44 #ifdef CONFIG_BSD
45 #include <sys/ioctl.h>
46 #include <sys/queue.h>
47 #ifndef __DragonFly__
48 #include <sys/disk.h>
49 #endif
50 #endif
51 
52 #ifdef _WIN32
53 #include <windows.h>
54 #endif
55 
56 /**
57  * A BdrvDirtyBitmap can be in three possible states:
58  * (1) successor is NULL and disabled is false: full r/w mode
59  * (2) successor is NULL and disabled is true: read only mode ("disabled")
60  * (3) successor is set: frozen mode.
61  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
62  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
63  */
64 struct BdrvDirtyBitmap {
65     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
66     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
67     char *name;                 /* Optional non-empty unique ID */
68     int64_t size;               /* Size of the bitmap (Number of sectors) */
69     bool disabled;              /* Bitmap is read-only */
70     QLIST_ENTRY(BdrvDirtyBitmap) list;
71 };
72 
73 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
74 
75 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
76 
77 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
78     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
79 
80 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
81     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
82 
83 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84     QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 
86 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
87                              const char *reference, QDict *options, int flags,
88                              BlockDriverState *parent,
89                              const BdrvChildRole *child_role, Error **errp);
90 
91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
92 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
93 
94 /* If non-zero, use only whitelisted block drivers */
95 static int use_bdrv_whitelist;
96 
97 static void bdrv_close(BlockDriverState *bs);
98 
99 #ifdef _WIN32
100 static int is_windows_drive_prefix(const char *filename)
101 {
102     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
103              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
104             filename[1] == ':');
105 }
106 
107 int is_windows_drive(const char *filename)
108 {
109     if (is_windows_drive_prefix(filename) &&
110         filename[2] == '\0')
111         return 1;
112     if (strstart(filename, "\\\\.\\", NULL) ||
113         strstart(filename, "//./", NULL))
114         return 1;
115     return 0;
116 }
117 #endif
118 
119 size_t bdrv_opt_mem_align(BlockDriverState *bs)
120 {
121     if (!bs || !bs->drv) {
122         /* page size or 4k (hdd sector size) should be on the safe side */
123         return MAX(4096, getpagesize());
124     }
125 
126     return bs->bl.opt_mem_alignment;
127 }
128 
129 size_t bdrv_min_mem_align(BlockDriverState *bs)
130 {
131     if (!bs || !bs->drv) {
132         /* page size or 4k (hdd sector size) should be on the safe side */
133         return MAX(4096, getpagesize());
134     }
135 
136     return bs->bl.min_mem_alignment;
137 }
138 
139 /* check if the path starts with "<protocol>:" */
140 int path_has_protocol(const char *path)
141 {
142     const char *p;
143 
144 #ifdef _WIN32
145     if (is_windows_drive(path) ||
146         is_windows_drive_prefix(path)) {
147         return 0;
148     }
149     p = path + strcspn(path, ":/\\");
150 #else
151     p = path + strcspn(path, ":/");
152 #endif
153 
154     return *p == ':';
155 }
156 
157 int path_is_absolute(const char *path)
158 {
159 #ifdef _WIN32
160     /* specific case for names like: "\\.\d:" */
161     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
162         return 1;
163     }
164     return (*path == '/' || *path == '\\');
165 #else
166     return (*path == '/');
167 #endif
168 }
169 
170 /* if filename is absolute, just copy it to dest. Otherwise, build a
171    path to it by considering it is relative to base_path. URL are
172    supported. */
173 void path_combine(char *dest, int dest_size,
174                   const char *base_path,
175                   const char *filename)
176 {
177     const char *p, *p1;
178     int len;
179 
180     if (dest_size <= 0)
181         return;
182     if (path_is_absolute(filename)) {
183         pstrcpy(dest, dest_size, filename);
184     } else {
185         p = strchr(base_path, ':');
186         if (p)
187             p++;
188         else
189             p = base_path;
190         p1 = strrchr(base_path, '/');
191 #ifdef _WIN32
192         {
193             const char *p2;
194             p2 = strrchr(base_path, '\\');
195             if (!p1 || p2 > p1)
196                 p1 = p2;
197         }
198 #endif
199         if (p1)
200             p1++;
201         else
202             p1 = base_path;
203         if (p1 > p)
204             p = p1;
205         len = p - base_path;
206         if (len > dest_size - 1)
207             len = dest_size - 1;
208         memcpy(dest, base_path, len);
209         dest[len] = '\0';
210         pstrcat(dest, dest_size, filename);
211     }
212 }
213 
214 void bdrv_get_full_backing_filename_from_filename(const char *backed,
215                                                   const char *backing,
216                                                   char *dest, size_t sz,
217                                                   Error **errp)
218 {
219     if (backing[0] == '\0' || path_has_protocol(backing) ||
220         path_is_absolute(backing))
221     {
222         pstrcpy(dest, sz, backing);
223     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
224         error_setg(errp, "Cannot use relative backing file names for '%s'",
225                    backed);
226     } else {
227         path_combine(dest, sz, backed, backing);
228     }
229 }
230 
231 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
232                                     Error **errp)
233 {
234     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
235 
236     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
237                                                  dest, sz, errp);
238 }
239 
240 void bdrv_register(BlockDriver *bdrv)
241 {
242     bdrv_setup_io_funcs(bdrv);
243 
244     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
245 }
246 
247 BlockDriverState *bdrv_new_root(void)
248 {
249     BlockDriverState *bs = bdrv_new();
250 
251     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
252     return bs;
253 }
254 
255 BlockDriverState *bdrv_new(void)
256 {
257     BlockDriverState *bs;
258     int i;
259 
260     bs = g_new0(BlockDriverState, 1);
261     QLIST_INIT(&bs->dirty_bitmaps);
262     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
263         QLIST_INIT(&bs->op_blockers[i]);
264     }
265     notifier_with_return_list_init(&bs->before_write_notifiers);
266     qemu_co_queue_init(&bs->throttled_reqs[0]);
267     qemu_co_queue_init(&bs->throttled_reqs[1]);
268     bs->refcnt = 1;
269     bs->aio_context = qemu_get_aio_context();
270 
271     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
272 
273     return bs;
274 }
275 
276 BlockDriver *bdrv_find_format(const char *format_name)
277 {
278     BlockDriver *drv1;
279     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280         if (!strcmp(drv1->format_name, format_name)) {
281             return drv1;
282         }
283     }
284     return NULL;
285 }
286 
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
288 {
289     static const char *whitelist_rw[] = {
290         CONFIG_BDRV_RW_WHITELIST
291     };
292     static const char *whitelist_ro[] = {
293         CONFIG_BDRV_RO_WHITELIST
294     };
295     const char **p;
296 
297     if (!whitelist_rw[0] && !whitelist_ro[0]) {
298         return 1;               /* no whitelist, anything goes */
299     }
300 
301     for (p = whitelist_rw; *p; p++) {
302         if (!strcmp(drv->format_name, *p)) {
303             return 1;
304         }
305     }
306     if (read_only) {
307         for (p = whitelist_ro; *p; p++) {
308             if (!strcmp(drv->format_name, *p)) {
309                 return 1;
310             }
311         }
312     }
313     return 0;
314 }
315 
316 typedef struct CreateCo {
317     BlockDriver *drv;
318     char *filename;
319     QemuOpts *opts;
320     int ret;
321     Error *err;
322 } CreateCo;
323 
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
325 {
326     Error *local_err = NULL;
327     int ret;
328 
329     CreateCo *cco = opaque;
330     assert(cco->drv);
331 
332     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333     if (local_err) {
334         error_propagate(&cco->err, local_err);
335     }
336     cco->ret = ret;
337 }
338 
339 int bdrv_create(BlockDriver *drv, const char* filename,
340                 QemuOpts *opts, Error **errp)
341 {
342     int ret;
343 
344     Coroutine *co;
345     CreateCo cco = {
346         .drv = drv,
347         .filename = g_strdup(filename),
348         .opts = opts,
349         .ret = NOT_DONE,
350         .err = NULL,
351     };
352 
353     if (!drv->bdrv_create) {
354         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355         ret = -ENOTSUP;
356         goto out;
357     }
358 
359     if (qemu_in_coroutine()) {
360         /* Fast-path if already in coroutine context */
361         bdrv_create_co_entry(&cco);
362     } else {
363         co = qemu_coroutine_create(bdrv_create_co_entry);
364         qemu_coroutine_enter(co, &cco);
365         while (cco.ret == NOT_DONE) {
366             aio_poll(qemu_get_aio_context(), true);
367         }
368     }
369 
370     ret = cco.ret;
371     if (ret < 0) {
372         if (cco.err) {
373             error_propagate(errp, cco.err);
374         } else {
375             error_setg_errno(errp, -ret, "Could not create image");
376         }
377     }
378 
379 out:
380     g_free(cco.filename);
381     return ret;
382 }
383 
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
385 {
386     BlockDriver *drv;
387     Error *local_err = NULL;
388     int ret;
389 
390     drv = bdrv_find_protocol(filename, true, errp);
391     if (drv == NULL) {
392         return -ENOENT;
393     }
394 
395     ret = bdrv_create(drv, filename, opts, &local_err);
396     if (local_err) {
397         error_propagate(errp, local_err);
398     }
399     return ret;
400 }
401 
402 /**
403  * Try to get @bs's logical and physical block size.
404  * On success, store them in @bsz struct and return 0.
405  * On failure return -errno.
406  * @bs must not be empty.
407  */
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
409 {
410     BlockDriver *drv = bs->drv;
411 
412     if (drv && drv->bdrv_probe_blocksizes) {
413         return drv->bdrv_probe_blocksizes(bs, bsz);
414     }
415 
416     return -ENOTSUP;
417 }
418 
419 /**
420  * Try to get @bs's geometry (cyls, heads, sectors).
421  * On success, store them in @geo struct and return 0.
422  * On failure return -errno.
423  * @bs must not be empty.
424  */
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
426 {
427     BlockDriver *drv = bs->drv;
428 
429     if (drv && drv->bdrv_probe_geometry) {
430         return drv->bdrv_probe_geometry(bs, geo);
431     }
432 
433     return -ENOTSUP;
434 }
435 
436 /*
437  * Create a uniquely-named empty temporary file.
438  * Return 0 upon success, otherwise a negative errno value.
439  */
440 int get_tmp_filename(char *filename, int size)
441 {
442 #ifdef _WIN32
443     char temp_dir[MAX_PATH];
444     /* GetTempFileName requires that its output buffer (4th param)
445        have length MAX_PATH or greater.  */
446     assert(size >= MAX_PATH);
447     return (GetTempPath(MAX_PATH, temp_dir)
448             && GetTempFileName(temp_dir, "qem", 0, filename)
449             ? 0 : -GetLastError());
450 #else
451     int fd;
452     const char *tmpdir;
453     tmpdir = getenv("TMPDIR");
454     if (!tmpdir) {
455         tmpdir = "/var/tmp";
456     }
457     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458         return -EOVERFLOW;
459     }
460     fd = mkstemp(filename);
461     if (fd < 0) {
462         return -errno;
463     }
464     if (close(fd) != 0) {
465         unlink(filename);
466         return -errno;
467     }
468     return 0;
469 #endif
470 }
471 
472 /*
473  * Detect host devices. By convention, /dev/cdrom[N] is always
474  * recognized as a host CDROM.
475  */
476 static BlockDriver *find_hdev_driver(const char *filename)
477 {
478     int score_max = 0, score;
479     BlockDriver *drv = NULL, *d;
480 
481     QLIST_FOREACH(d, &bdrv_drivers, list) {
482         if (d->bdrv_probe_device) {
483             score = d->bdrv_probe_device(filename);
484             if (score > score_max) {
485                 score_max = score;
486                 drv = d;
487             }
488         }
489     }
490 
491     return drv;
492 }
493 
494 BlockDriver *bdrv_find_protocol(const char *filename,
495                                 bool allow_protocol_prefix,
496                                 Error **errp)
497 {
498     BlockDriver *drv1;
499     char protocol[128];
500     int len;
501     const char *p;
502 
503     /* TODO Drivers without bdrv_file_open must be specified explicitly */
504 
505     /*
506      * XXX(hch): we really should not let host device detection
507      * override an explicit protocol specification, but moving this
508      * later breaks access to device names with colons in them.
509      * Thanks to the brain-dead persistent naming schemes on udev-
510      * based Linux systems those actually are quite common.
511      */
512     drv1 = find_hdev_driver(filename);
513     if (drv1) {
514         return drv1;
515     }
516 
517     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518         return &bdrv_file;
519     }
520 
521     p = strchr(filename, ':');
522     assert(p != NULL);
523     len = p - filename;
524     if (len > sizeof(protocol) - 1)
525         len = sizeof(protocol) - 1;
526     memcpy(protocol, filename, len);
527     protocol[len] = '\0';
528     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529         if (drv1->protocol_name &&
530             !strcmp(drv1->protocol_name, protocol)) {
531             return drv1;
532         }
533     }
534 
535     error_setg(errp, "Unknown protocol '%s'", protocol);
536     return NULL;
537 }
538 
539 /*
540  * Guess image format by probing its contents.
541  * This is not a good idea when your image is raw (CVE-2008-2004), but
542  * we do it anyway for backward compatibility.
543  *
544  * @buf         contains the image's first @buf_size bytes.
545  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546  *              but can be smaller if the image file is smaller)
547  * @filename    is its filename.
548  *
549  * For all block drivers, call the bdrv_probe() method to get its
550  * probing score.
551  * Return the first block driver with the highest probing score.
552  */
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554                             const char *filename)
555 {
556     int score_max = 0, score;
557     BlockDriver *drv = NULL, *d;
558 
559     QLIST_FOREACH(d, &bdrv_drivers, list) {
560         if (d->bdrv_probe) {
561             score = d->bdrv_probe(buf, buf_size, filename);
562             if (score > score_max) {
563                 score_max = score;
564                 drv = d;
565             }
566         }
567     }
568 
569     return drv;
570 }
571 
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573                              BlockDriver **pdrv, Error **errp)
574 {
575     BlockDriver *drv;
576     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577     int ret = 0;
578 
579     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580     if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581         *pdrv = &bdrv_raw;
582         return ret;
583     }
584 
585     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586     if (ret < 0) {
587         error_setg_errno(errp, -ret, "Could not read image for determining its "
588                          "format");
589         *pdrv = NULL;
590         return ret;
591     }
592 
593     drv = bdrv_probe_all(buf, ret, filename);
594     if (!drv) {
595         error_setg(errp, "Could not determine image format: No compatible "
596                    "driver found");
597         ret = -ENOENT;
598     }
599     *pdrv = drv;
600     return ret;
601 }
602 
603 /**
604  * Set the current 'total_sectors' value
605  * Return 0 on success, -errno on error.
606  */
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
608 {
609     BlockDriver *drv = bs->drv;
610 
611     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612     if (bdrv_is_sg(bs))
613         return 0;
614 
615     /* query actual device if possible, otherwise just trust the hint */
616     if (drv->bdrv_getlength) {
617         int64_t length = drv->bdrv_getlength(bs);
618         if (length < 0) {
619             return length;
620         }
621         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
622     }
623 
624     bs->total_sectors = hint;
625     return 0;
626 }
627 
628 /**
629  * Combines a QDict of new block driver @options with any missing options taken
630  * from @old_options, so that leaving out an option defaults to its old value.
631  */
632 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
633                               QDict *old_options)
634 {
635     if (bs->drv && bs->drv->bdrv_join_options) {
636         bs->drv->bdrv_join_options(options, old_options);
637     } else {
638         qdict_join(options, old_options, false);
639     }
640 }
641 
642 /**
643  * Set open flags for a given discard mode
644  *
645  * Return 0 on success, -1 if the discard mode was invalid.
646  */
647 int bdrv_parse_discard_flags(const char *mode, int *flags)
648 {
649     *flags &= ~BDRV_O_UNMAP;
650 
651     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
652         /* do nothing */
653     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
654         *flags |= BDRV_O_UNMAP;
655     } else {
656         return -1;
657     }
658 
659     return 0;
660 }
661 
662 /**
663  * Set open flags for a given cache mode
664  *
665  * Return 0 on success, -1 if the cache mode was invalid.
666  */
667 int bdrv_parse_cache_flags(const char *mode, int *flags)
668 {
669     *flags &= ~BDRV_O_CACHE_MASK;
670 
671     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
672         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
673     } else if (!strcmp(mode, "directsync")) {
674         *flags |= BDRV_O_NOCACHE;
675     } else if (!strcmp(mode, "writeback")) {
676         *flags |= BDRV_O_CACHE_WB;
677     } else if (!strcmp(mode, "unsafe")) {
678         *flags |= BDRV_O_CACHE_WB;
679         *flags |= BDRV_O_NO_FLUSH;
680     } else if (!strcmp(mode, "writethrough")) {
681         /* this is the default */
682     } else {
683         return -1;
684     }
685 
686     return 0;
687 }
688 
689 /*
690  * Returns the flags that a temporary snapshot should get, based on the
691  * originally requested flags (the originally requested image will have flags
692  * like a backing file)
693  */
694 static int bdrv_temp_snapshot_flags(int flags)
695 {
696     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
697 }
698 
699 /*
700  * Returns the options and flags that bs->file should get if a protocol driver
701  * is expected, based on the given options and flags for the parent BDS
702  */
703 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
704                                    int parent_flags, QDict *parent_options)
705 {
706     int flags = parent_flags;
707 
708     /* Enable protocol handling, disable format probing for bs->file */
709     flags |= BDRV_O_PROTOCOL;
710 
711     /* If the cache mode isn't explicitly set, inherit direct and no-flush from
712      * the parent. */
713     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
714     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
715 
716     /* Our block drivers take care to send flushes and respect unmap policy,
717      * so we can default to enable both on lower layers regardless of the
718      * corresponding parent options. */
719     qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
720     flags |= BDRV_O_UNMAP;
721 
722     /* Clear flags that only apply to the top layer */
723     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
724 
725     *child_flags = flags;
726 }
727 
728 const BdrvChildRole child_file = {
729     .inherit_options = bdrv_inherited_options,
730 };
731 
732 /*
733  * Returns the options and flags that bs->file should get if the use of formats
734  * (and not only protocols) is permitted for it, based on the given options and
735  * flags for the parent BDS
736  */
737 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
738                                        int parent_flags, QDict *parent_options)
739 {
740     child_file.inherit_options(child_flags, child_options,
741                                parent_flags, parent_options);
742 
743     *child_flags &= ~BDRV_O_PROTOCOL;
744 }
745 
746 const BdrvChildRole child_format = {
747     .inherit_options = bdrv_inherited_fmt_options,
748 };
749 
750 /*
751  * Returns the options and flags that bs->backing should get, based on the
752  * given options and flags for the parent BDS
753  */
754 static void bdrv_backing_options(int *child_flags, QDict *child_options,
755                                  int parent_flags, QDict *parent_options)
756 {
757     int flags = parent_flags;
758 
759     /* The cache mode is inherited unmodified for backing files */
760     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
761     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
762     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
763 
764     /* backing files always opened read-only */
765     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
766 
767     /* snapshot=on is handled on the top layer */
768     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
769 
770     *child_flags = flags;
771 }
772 
773 static const BdrvChildRole child_backing = {
774     .inherit_options = bdrv_backing_options,
775 };
776 
777 static int bdrv_open_flags(BlockDriverState *bs, int flags)
778 {
779     int open_flags = flags | BDRV_O_CACHE_WB;
780 
781     /*
782      * Clear flags that are internal to the block layer before opening the
783      * image.
784      */
785     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
786 
787     /*
788      * Snapshots should be writable.
789      */
790     if (flags & BDRV_O_TEMPORARY) {
791         open_flags |= BDRV_O_RDWR;
792     }
793 
794     return open_flags;
795 }
796 
797 static void update_flags_from_options(int *flags, QemuOpts *opts)
798 {
799     *flags &= ~BDRV_O_CACHE_MASK;
800 
801     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
802     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
803         *flags |= BDRV_O_CACHE_WB;
804     }
805 
806     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
807     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
808         *flags |= BDRV_O_NO_FLUSH;
809     }
810 
811     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
812     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
813         *flags |= BDRV_O_NOCACHE;
814     }
815 }
816 
817 static void update_options_from_flags(QDict *options, int flags)
818 {
819     if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
820         qdict_put(options, BDRV_OPT_CACHE_WB,
821                   qbool_from_bool(flags & BDRV_O_CACHE_WB));
822     }
823     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
824         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
825                   qbool_from_bool(flags & BDRV_O_NOCACHE));
826     }
827     if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
828         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
829                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
830     }
831 }
832 
833 static void bdrv_assign_node_name(BlockDriverState *bs,
834                                   const char *node_name,
835                                   Error **errp)
836 {
837     char *gen_node_name = NULL;
838 
839     if (!node_name) {
840         node_name = gen_node_name = id_generate(ID_BLOCK);
841     } else if (!id_wellformed(node_name)) {
842         /*
843          * Check for empty string or invalid characters, but not if it is
844          * generated (generated names use characters not available to the user)
845          */
846         error_setg(errp, "Invalid node name");
847         return;
848     }
849 
850     /* takes care of avoiding namespaces collisions */
851     if (blk_by_name(node_name)) {
852         error_setg(errp, "node-name=%s is conflicting with a device id",
853                    node_name);
854         goto out;
855     }
856 
857     /* takes care of avoiding duplicates node names */
858     if (bdrv_find_node(node_name)) {
859         error_setg(errp, "Duplicate node name");
860         goto out;
861     }
862 
863     /* copy node name into the bs and insert it into the graph list */
864     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
865     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
866 out:
867     g_free(gen_node_name);
868 }
869 
870 static QemuOptsList bdrv_runtime_opts = {
871     .name = "bdrv_common",
872     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
873     .desc = {
874         {
875             .name = "node-name",
876             .type = QEMU_OPT_STRING,
877             .help = "Node name of the block device node",
878         },
879         {
880             .name = "driver",
881             .type = QEMU_OPT_STRING,
882             .help = "Block driver to use for the node",
883         },
884         {
885             .name = BDRV_OPT_CACHE_WB,
886             .type = QEMU_OPT_BOOL,
887             .help = "Enable writeback mode",
888         },
889         {
890             .name = BDRV_OPT_CACHE_DIRECT,
891             .type = QEMU_OPT_BOOL,
892             .help = "Bypass software writeback cache on the host",
893         },
894         {
895             .name = BDRV_OPT_CACHE_NO_FLUSH,
896             .type = QEMU_OPT_BOOL,
897             .help = "Ignore flush requests",
898         },
899         { /* end of list */ }
900     },
901 };
902 
903 /*
904  * Common part for opening disk images and files
905  *
906  * Removes all processed options from *options.
907  */
908 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
909                             QDict *options, Error **errp)
910 {
911     int ret, open_flags;
912     const char *filename;
913     const char *driver_name = NULL;
914     const char *node_name = NULL;
915     QemuOpts *opts;
916     BlockDriver *drv;
917     Error *local_err = NULL;
918 
919     assert(bs->file == NULL);
920     assert(options != NULL && bs->options != options);
921 
922     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
923     qemu_opts_absorb_qdict(opts, options, &local_err);
924     if (local_err) {
925         error_propagate(errp, local_err);
926         ret = -EINVAL;
927         goto fail_opts;
928     }
929 
930     driver_name = qemu_opt_get(opts, "driver");
931     drv = bdrv_find_format(driver_name);
932     assert(drv != NULL);
933 
934     if (file != NULL) {
935         filename = file->bs->filename;
936     } else {
937         filename = qdict_get_try_str(options, "filename");
938     }
939 
940     if (drv->bdrv_needs_filename && !filename) {
941         error_setg(errp, "The '%s' block driver requires a file name",
942                    drv->format_name);
943         ret = -EINVAL;
944         goto fail_opts;
945     }
946 
947     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
948                            drv->format_name);
949 
950     node_name = qemu_opt_get(opts, "node-name");
951     bdrv_assign_node_name(bs, node_name, &local_err);
952     if (local_err) {
953         error_propagate(errp, local_err);
954         ret = -EINVAL;
955         goto fail_opts;
956     }
957 
958     bs->request_alignment = 512;
959     bs->zero_beyond_eof = true;
960     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
961 
962     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
963         error_setg(errp,
964                    !bs->read_only && bdrv_is_whitelisted(drv, true)
965                         ? "Driver '%s' can only be used for read-only devices"
966                         : "Driver '%s' is not whitelisted",
967                    drv->format_name);
968         ret = -ENOTSUP;
969         goto fail_opts;
970     }
971 
972     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
973     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
974         if (!bs->read_only) {
975             bdrv_enable_copy_on_read(bs);
976         } else {
977             error_setg(errp, "Can't use copy-on-read on read-only device");
978             ret = -EINVAL;
979             goto fail_opts;
980         }
981     }
982 
983     if (filename != NULL) {
984         pstrcpy(bs->filename, sizeof(bs->filename), filename);
985     } else {
986         bs->filename[0] = '\0';
987     }
988     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
989 
990     bs->drv = drv;
991     bs->opaque = g_malloc0(drv->instance_size);
992 
993     /* Apply cache mode options */
994     update_flags_from_options(&bs->open_flags, opts);
995     bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
996 
997     /* Open the image, either directly or using a protocol */
998     open_flags = bdrv_open_flags(bs, bs->open_flags);
999     if (drv->bdrv_file_open) {
1000         assert(file == NULL);
1001         assert(!drv->bdrv_needs_filename || filename != NULL);
1002         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1003     } else {
1004         if (file == NULL) {
1005             error_setg(errp, "Can't use '%s' as a block driver for the "
1006                        "protocol level", drv->format_name);
1007             ret = -EINVAL;
1008             goto free_and_fail;
1009         }
1010         bs->file = file;
1011         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1012     }
1013 
1014     if (ret < 0) {
1015         if (local_err) {
1016             error_propagate(errp, local_err);
1017         } else if (bs->filename[0]) {
1018             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1019         } else {
1020             error_setg_errno(errp, -ret, "Could not open image");
1021         }
1022         goto free_and_fail;
1023     }
1024 
1025     if (bs->encrypted) {
1026         error_report("Encrypted images are deprecated");
1027         error_printf("Support for them will be removed in a future release.\n"
1028                      "You can use 'qemu-img convert' to convert your image"
1029                      " to an unencrypted one.\n");
1030     }
1031 
1032     ret = refresh_total_sectors(bs, bs->total_sectors);
1033     if (ret < 0) {
1034         error_setg_errno(errp, -ret, "Could not refresh total sector count");
1035         goto free_and_fail;
1036     }
1037 
1038     bdrv_refresh_limits(bs, &local_err);
1039     if (local_err) {
1040         error_propagate(errp, local_err);
1041         ret = -EINVAL;
1042         goto free_and_fail;
1043     }
1044 
1045     assert(bdrv_opt_mem_align(bs) != 0);
1046     assert(bdrv_min_mem_align(bs) != 0);
1047     assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1048 
1049     qemu_opts_del(opts);
1050     return 0;
1051 
1052 free_and_fail:
1053     bs->file = NULL;
1054     g_free(bs->opaque);
1055     bs->opaque = NULL;
1056     bs->drv = NULL;
1057 fail_opts:
1058     qemu_opts_del(opts);
1059     return ret;
1060 }
1061 
1062 static QDict *parse_json_filename(const char *filename, Error **errp)
1063 {
1064     QObject *options_obj;
1065     QDict *options;
1066     int ret;
1067 
1068     ret = strstart(filename, "json:", &filename);
1069     assert(ret);
1070 
1071     options_obj = qobject_from_json(filename);
1072     if (!options_obj) {
1073         error_setg(errp, "Could not parse the JSON options");
1074         return NULL;
1075     }
1076 
1077     if (qobject_type(options_obj) != QTYPE_QDICT) {
1078         qobject_decref(options_obj);
1079         error_setg(errp, "Invalid JSON object given");
1080         return NULL;
1081     }
1082 
1083     options = qobject_to_qdict(options_obj);
1084     qdict_flatten(options);
1085 
1086     return options;
1087 }
1088 
1089 static void parse_json_protocol(QDict *options, const char **pfilename,
1090                                 Error **errp)
1091 {
1092     QDict *json_options;
1093     Error *local_err = NULL;
1094 
1095     /* Parse json: pseudo-protocol */
1096     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1097         return;
1098     }
1099 
1100     json_options = parse_json_filename(*pfilename, &local_err);
1101     if (local_err) {
1102         error_propagate(errp, local_err);
1103         return;
1104     }
1105 
1106     /* Options given in the filename have lower priority than options
1107      * specified directly */
1108     qdict_join(options, json_options, false);
1109     QDECREF(json_options);
1110     *pfilename = NULL;
1111 }
1112 
1113 /*
1114  * Fills in default options for opening images and converts the legacy
1115  * filename/flags pair to option QDict entries.
1116  * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1117  * block driver has been specified explicitly.
1118  */
1119 static int bdrv_fill_options(QDict **options, const char *filename,
1120                              int *flags, Error **errp)
1121 {
1122     const char *drvname;
1123     bool protocol = *flags & BDRV_O_PROTOCOL;
1124     bool parse_filename = false;
1125     BlockDriver *drv = NULL;
1126     Error *local_err = NULL;
1127 
1128     drvname = qdict_get_try_str(*options, "driver");
1129     if (drvname) {
1130         drv = bdrv_find_format(drvname);
1131         if (!drv) {
1132             error_setg(errp, "Unknown driver '%s'", drvname);
1133             return -ENOENT;
1134         }
1135         /* If the user has explicitly specified the driver, this choice should
1136          * override the BDRV_O_PROTOCOL flag */
1137         protocol = drv->bdrv_file_open;
1138     }
1139 
1140     if (protocol) {
1141         *flags |= BDRV_O_PROTOCOL;
1142     } else {
1143         *flags &= ~BDRV_O_PROTOCOL;
1144     }
1145 
1146     /* Translate cache options from flags into options */
1147     update_options_from_flags(*options, *flags);
1148 
1149     /* Fetch the file name from the options QDict if necessary */
1150     if (protocol && filename) {
1151         if (!qdict_haskey(*options, "filename")) {
1152             qdict_put(*options, "filename", qstring_from_str(filename));
1153             parse_filename = true;
1154         } else {
1155             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1156                              "the same time");
1157             return -EINVAL;
1158         }
1159     }
1160 
1161     /* Find the right block driver */
1162     filename = qdict_get_try_str(*options, "filename");
1163 
1164     if (!drvname && protocol) {
1165         if (filename) {
1166             drv = bdrv_find_protocol(filename, parse_filename, errp);
1167             if (!drv) {
1168                 return -EINVAL;
1169             }
1170 
1171             drvname = drv->format_name;
1172             qdict_put(*options, "driver", qstring_from_str(drvname));
1173         } else {
1174             error_setg(errp, "Must specify either driver or file");
1175             return -EINVAL;
1176         }
1177     }
1178 
1179     assert(drv || !protocol);
1180 
1181     /* Driver-specific filename parsing */
1182     if (drv && drv->bdrv_parse_filename && parse_filename) {
1183         drv->bdrv_parse_filename(filename, *options, &local_err);
1184         if (local_err) {
1185             error_propagate(errp, local_err);
1186             return -EINVAL;
1187         }
1188 
1189         if (!drv->bdrv_needs_filename) {
1190             qdict_del(*options, "filename");
1191         }
1192     }
1193 
1194     if (runstate_check(RUN_STATE_INMIGRATE)) {
1195         *flags |= BDRV_O_INACTIVE;
1196     }
1197 
1198     return 0;
1199 }
1200 
1201 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1202                                     BlockDriverState *child_bs,
1203                                     const char *child_name,
1204                                     const BdrvChildRole *child_role)
1205 {
1206     BdrvChild *child = g_new(BdrvChild, 1);
1207     *child = (BdrvChild) {
1208         .bs     = child_bs,
1209         .name   = g_strdup(child_name),
1210         .role   = child_role,
1211     };
1212 
1213     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1214     QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1215 
1216     return child;
1217 }
1218 
1219 static void bdrv_detach_child(BdrvChild *child)
1220 {
1221     QLIST_REMOVE(child, next);
1222     QLIST_REMOVE(child, next_parent);
1223     g_free(child->name);
1224     g_free(child);
1225 }
1226 
1227 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1228 {
1229     BlockDriverState *child_bs;
1230 
1231     if (child == NULL) {
1232         return;
1233     }
1234 
1235     if (child->bs->inherits_from == parent) {
1236         child->bs->inherits_from = NULL;
1237     }
1238 
1239     child_bs = child->bs;
1240     bdrv_detach_child(child);
1241     bdrv_unref(child_bs);
1242 }
1243 
1244 /*
1245  * Sets the backing file link of a BDS. A new reference is created; callers
1246  * which don't need their own reference any more must call bdrv_unref().
1247  */
1248 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1249 {
1250     if (backing_hd) {
1251         bdrv_ref(backing_hd);
1252     }
1253 
1254     if (bs->backing) {
1255         assert(bs->backing_blocker);
1256         bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1257         bdrv_unref_child(bs, bs->backing);
1258     } else if (backing_hd) {
1259         error_setg(&bs->backing_blocker,
1260                    "node is used as backing hd of '%s'",
1261                    bdrv_get_device_or_node_name(bs));
1262     }
1263 
1264     if (!backing_hd) {
1265         error_free(bs->backing_blocker);
1266         bs->backing_blocker = NULL;
1267         bs->backing = NULL;
1268         goto out;
1269     }
1270     bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1271     bs->open_flags &= ~BDRV_O_NO_BACKING;
1272     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1273     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1274             backing_hd->drv ? backing_hd->drv->format_name : "");
1275 
1276     bdrv_op_block_all(backing_hd, bs->backing_blocker);
1277     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1278     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1279                     bs->backing_blocker);
1280 out:
1281     bdrv_refresh_limits(bs, NULL);
1282 }
1283 
1284 /*
1285  * Opens the backing file for a BlockDriverState if not yet open
1286  *
1287  * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1288  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1289  * itself, all options starting with "${bdref_key}." are considered part of the
1290  * BlockdevRef.
1291  *
1292  * TODO Can this be unified with bdrv_open_image()?
1293  */
1294 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1295                            const char *bdref_key, Error **errp)
1296 {
1297     char *backing_filename = g_malloc0(PATH_MAX);
1298     char *bdref_key_dot;
1299     const char *reference = NULL;
1300     int ret = 0;
1301     BlockDriverState *backing_hd;
1302     QDict *options;
1303     QDict *tmp_parent_options = NULL;
1304     Error *local_err = NULL;
1305 
1306     if (bs->backing != NULL) {
1307         goto free_exit;
1308     }
1309 
1310     /* NULL means an empty set of options */
1311     if (parent_options == NULL) {
1312         tmp_parent_options = qdict_new();
1313         parent_options = tmp_parent_options;
1314     }
1315 
1316     bs->open_flags &= ~BDRV_O_NO_BACKING;
1317 
1318     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1319     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1320     g_free(bdref_key_dot);
1321 
1322     reference = qdict_get_try_str(parent_options, bdref_key);
1323     if (reference || qdict_haskey(options, "file.filename")) {
1324         backing_filename[0] = '\0';
1325     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1326         QDECREF(options);
1327         goto free_exit;
1328     } else {
1329         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1330                                        &local_err);
1331         if (local_err) {
1332             ret = -EINVAL;
1333             error_propagate(errp, local_err);
1334             QDECREF(options);
1335             goto free_exit;
1336         }
1337     }
1338 
1339     if (!bs->drv || !bs->drv->supports_backing) {
1340         ret = -EINVAL;
1341         error_setg(errp, "Driver doesn't support backing files");
1342         QDECREF(options);
1343         goto free_exit;
1344     }
1345 
1346     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1347         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1348     }
1349 
1350     backing_hd = NULL;
1351     ret = bdrv_open_inherit(&backing_hd,
1352                             *backing_filename ? backing_filename : NULL,
1353                             reference, options, 0, bs, &child_backing,
1354                             errp);
1355     if (ret < 0) {
1356         bs->open_flags |= BDRV_O_NO_BACKING;
1357         error_prepend(errp, "Could not open backing file: ");
1358         goto free_exit;
1359     }
1360 
1361     /* Hook up the backing file link; drop our reference, bs owns the
1362      * backing_hd reference now */
1363     bdrv_set_backing_hd(bs, backing_hd);
1364     bdrv_unref(backing_hd);
1365 
1366     qdict_del(parent_options, bdref_key);
1367 
1368 free_exit:
1369     g_free(backing_filename);
1370     QDECREF(tmp_parent_options);
1371     return ret;
1372 }
1373 
1374 /*
1375  * Opens a disk image whose options are given as BlockdevRef in another block
1376  * device's options.
1377  *
1378  * If allow_none is true, no image will be opened if filename is false and no
1379  * BlockdevRef is given. NULL will be returned, but errp remains unset.
1380  *
1381  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1382  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1383  * itself, all options starting with "${bdref_key}." are considered part of the
1384  * BlockdevRef.
1385  *
1386  * The BlockdevRef will be removed from the options QDict.
1387  */
1388 BdrvChild *bdrv_open_child(const char *filename,
1389                            QDict *options, const char *bdref_key,
1390                            BlockDriverState* parent,
1391                            const BdrvChildRole *child_role,
1392                            bool allow_none, Error **errp)
1393 {
1394     BdrvChild *c = NULL;
1395     BlockDriverState *bs;
1396     QDict *image_options;
1397     int ret;
1398     char *bdref_key_dot;
1399     const char *reference;
1400 
1401     assert(child_role != NULL);
1402 
1403     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1404     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1405     g_free(bdref_key_dot);
1406 
1407     reference = qdict_get_try_str(options, bdref_key);
1408     if (!filename && !reference && !qdict_size(image_options)) {
1409         if (!allow_none) {
1410             error_setg(errp, "A block device must be specified for \"%s\"",
1411                        bdref_key);
1412         }
1413         QDECREF(image_options);
1414         goto done;
1415     }
1416 
1417     bs = NULL;
1418     ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1419                             parent, child_role, errp);
1420     if (ret < 0) {
1421         goto done;
1422     }
1423 
1424     c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1425 
1426 done:
1427     qdict_del(options, bdref_key);
1428     return c;
1429 }
1430 
1431 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1432 {
1433     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1434     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1435     int64_t total_size;
1436     QemuOpts *opts = NULL;
1437     QDict *snapshot_options;
1438     BlockDriverState *bs_snapshot;
1439     Error *local_err = NULL;
1440     int ret;
1441 
1442     /* if snapshot, we create a temporary backing file and open it
1443        instead of opening 'filename' directly */
1444 
1445     /* Get the required size from the image */
1446     total_size = bdrv_getlength(bs);
1447     if (total_size < 0) {
1448         ret = total_size;
1449         error_setg_errno(errp, -total_size, "Could not get image size");
1450         goto out;
1451     }
1452 
1453     /* Create the temporary image */
1454     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1455     if (ret < 0) {
1456         error_setg_errno(errp, -ret, "Could not get temporary filename");
1457         goto out;
1458     }
1459 
1460     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1461                             &error_abort);
1462     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1463     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1464     qemu_opts_del(opts);
1465     if (ret < 0) {
1466         error_prepend(errp, "Could not create temporary overlay '%s': ",
1467                       tmp_filename);
1468         goto out;
1469     }
1470 
1471     /* Prepare a new options QDict for the temporary file */
1472     snapshot_options = qdict_new();
1473     qdict_put(snapshot_options, "file.driver",
1474               qstring_from_str("file"));
1475     qdict_put(snapshot_options, "file.filename",
1476               qstring_from_str(tmp_filename));
1477     qdict_put(snapshot_options, "driver",
1478               qstring_from_str("qcow2"));
1479 
1480     bs_snapshot = bdrv_new();
1481 
1482     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1483                     flags, &local_err);
1484     if (ret < 0) {
1485         error_propagate(errp, local_err);
1486         goto out;
1487     }
1488 
1489     bdrv_append(bs_snapshot, bs);
1490 
1491 out:
1492     g_free(tmp_filename);
1493     return ret;
1494 }
1495 
1496 /*
1497  * Opens a disk image (raw, qcow2, vmdk, ...)
1498  *
1499  * options is a QDict of options to pass to the block drivers, or NULL for an
1500  * empty set of options. The reference to the QDict belongs to the block layer
1501  * after the call (even on failure), so if the caller intends to reuse the
1502  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1503  *
1504  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1505  * If it is not NULL, the referenced BDS will be reused.
1506  *
1507  * The reference parameter may be used to specify an existing block device which
1508  * should be opened. If specified, neither options nor a filename may be given,
1509  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1510  */
1511 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1512                              const char *reference, QDict *options, int flags,
1513                              BlockDriverState *parent,
1514                              const BdrvChildRole *child_role, Error **errp)
1515 {
1516     int ret;
1517     BdrvChild *file = NULL;
1518     BlockDriverState *bs;
1519     BlockDriver *drv = NULL;
1520     const char *drvname;
1521     const char *backing;
1522     Error *local_err = NULL;
1523     int snapshot_flags = 0;
1524 
1525     assert(pbs);
1526     assert(!child_role || !flags);
1527     assert(!child_role == !parent);
1528 
1529     if (reference) {
1530         bool options_non_empty = options ? qdict_size(options) : false;
1531         QDECREF(options);
1532 
1533         if (*pbs) {
1534             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1535                        "another block device");
1536             return -EINVAL;
1537         }
1538 
1539         if (filename || options_non_empty) {
1540             error_setg(errp, "Cannot reference an existing block device with "
1541                        "additional options or a new filename");
1542             return -EINVAL;
1543         }
1544 
1545         bs = bdrv_lookup_bs(reference, reference, errp);
1546         if (!bs) {
1547             return -ENODEV;
1548         }
1549         bdrv_ref(bs);
1550         *pbs = bs;
1551         return 0;
1552     }
1553 
1554     if (*pbs) {
1555         bs = *pbs;
1556     } else {
1557         bs = bdrv_new();
1558     }
1559 
1560     /* NULL means an empty set of options */
1561     if (options == NULL) {
1562         options = qdict_new();
1563     }
1564 
1565     /* json: syntax counts as explicit options, as if in the QDict */
1566     parse_json_protocol(options, &filename, &local_err);
1567     if (local_err) {
1568         ret = -EINVAL;
1569         goto fail;
1570     }
1571 
1572     bs->explicit_options = qdict_clone_shallow(options);
1573 
1574     if (child_role) {
1575         bs->inherits_from = parent;
1576         child_role->inherit_options(&flags, options,
1577                                     parent->open_flags, parent->options);
1578     }
1579 
1580     ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1581     if (local_err) {
1582         goto fail;
1583     }
1584 
1585     bs->open_flags = flags;
1586     bs->options = options;
1587     options = qdict_clone_shallow(options);
1588 
1589     /* Find the right image format driver */
1590     drvname = qdict_get_try_str(options, "driver");
1591     if (drvname) {
1592         drv = bdrv_find_format(drvname);
1593         if (!drv) {
1594             error_setg(errp, "Unknown driver: '%s'", drvname);
1595             ret = -EINVAL;
1596             goto fail;
1597         }
1598     }
1599 
1600     assert(drvname || !(flags & BDRV_O_PROTOCOL));
1601 
1602     backing = qdict_get_try_str(options, "backing");
1603     if (backing && *backing == '\0') {
1604         flags |= BDRV_O_NO_BACKING;
1605         qdict_del(options, "backing");
1606     }
1607 
1608     /* Open image file without format layer */
1609     if ((flags & BDRV_O_PROTOCOL) == 0) {
1610         if (flags & BDRV_O_RDWR) {
1611             flags |= BDRV_O_ALLOW_RDWR;
1612         }
1613         if (flags & BDRV_O_SNAPSHOT) {
1614             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1615             bdrv_backing_options(&flags, options, flags, options);
1616         }
1617 
1618         bs->open_flags = flags;
1619 
1620         file = bdrv_open_child(filename, options, "file", bs,
1621                                &child_file, true, &local_err);
1622         if (local_err) {
1623             ret = -EINVAL;
1624             goto fail;
1625         }
1626     }
1627 
1628     /* Image format probing */
1629     bs->probed = !drv;
1630     if (!drv && file) {
1631         ret = find_image_format(file->bs, filename, &drv, &local_err);
1632         if (ret < 0) {
1633             goto fail;
1634         }
1635         /*
1636          * This option update would logically belong in bdrv_fill_options(),
1637          * but we first need to open bs->file for the probing to work, while
1638          * opening bs->file already requires the (mostly) final set of options
1639          * so that cache mode etc. can be inherited.
1640          *
1641          * Adding the driver later is somewhat ugly, but it's not an option
1642          * that would ever be inherited, so it's correct. We just need to make
1643          * sure to update both bs->options (which has the full effective
1644          * options for bs) and options (which has file.* already removed).
1645          */
1646         qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1647         qdict_put(options, "driver", qstring_from_str(drv->format_name));
1648     } else if (!drv) {
1649         error_setg(errp, "Must specify either driver or file");
1650         ret = -EINVAL;
1651         goto fail;
1652     }
1653 
1654     /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1655     assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1656     /* file must be NULL if a protocol BDS is about to be created
1657      * (the inverse results in an error message from bdrv_open_common()) */
1658     assert(!(flags & BDRV_O_PROTOCOL) || !file);
1659 
1660     /* Open the image */
1661     ret = bdrv_open_common(bs, file, options, &local_err);
1662     if (ret < 0) {
1663         goto fail;
1664     }
1665 
1666     if (file && (bs->file != file)) {
1667         bdrv_unref_child(bs, file);
1668         file = NULL;
1669     }
1670 
1671     /* If there is a backing file, use it */
1672     if ((flags & BDRV_O_NO_BACKING) == 0) {
1673         ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1674         if (ret < 0) {
1675             goto close_and_fail;
1676         }
1677     }
1678 
1679     bdrv_refresh_filename(bs);
1680 
1681     /* Check if any unknown options were used */
1682     if (options && (qdict_size(options) != 0)) {
1683         const QDictEntry *entry = qdict_first(options);
1684         if (flags & BDRV_O_PROTOCOL) {
1685             error_setg(errp, "Block protocol '%s' doesn't support the option "
1686                        "'%s'", drv->format_name, entry->key);
1687         } else {
1688             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1689                        "support the option '%s'", drv->format_name,
1690                        bdrv_get_device_name(bs), entry->key);
1691         }
1692 
1693         ret = -EINVAL;
1694         goto close_and_fail;
1695     }
1696 
1697     if (!bdrv_key_required(bs)) {
1698         if (bs->blk) {
1699             blk_dev_change_media_cb(bs->blk, true);
1700         }
1701     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1702                && !runstate_check(RUN_STATE_INMIGRATE)
1703                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1704         error_setg(errp,
1705                    "Guest must be stopped for opening of encrypted image");
1706         ret = -EBUSY;
1707         goto close_and_fail;
1708     }
1709 
1710     QDECREF(options);
1711     *pbs = bs;
1712 
1713     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1714      * temporary snapshot afterwards. */
1715     if (snapshot_flags) {
1716         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1717         if (local_err) {
1718             goto close_and_fail;
1719         }
1720     }
1721 
1722     return 0;
1723 
1724 fail:
1725     if (file != NULL) {
1726         bdrv_unref_child(bs, file);
1727     }
1728     QDECREF(bs->explicit_options);
1729     QDECREF(bs->options);
1730     QDECREF(options);
1731     bs->options = NULL;
1732     if (!*pbs) {
1733         /* If *pbs is NULL, a new BDS has been created in this function and
1734            needs to be freed now. Otherwise, it does not need to be closed,
1735            since it has not really been opened yet. */
1736         bdrv_unref(bs);
1737     }
1738     if (local_err) {
1739         error_propagate(errp, local_err);
1740     }
1741     return ret;
1742 
1743 close_and_fail:
1744     /* See fail path, but now the BDS has to be always closed */
1745     if (*pbs) {
1746         bdrv_close(bs);
1747     } else {
1748         bdrv_unref(bs);
1749     }
1750     QDECREF(options);
1751     if (local_err) {
1752         error_propagate(errp, local_err);
1753     }
1754     return ret;
1755 }
1756 
1757 int bdrv_open(BlockDriverState **pbs, const char *filename,
1758               const char *reference, QDict *options, int flags, Error **errp)
1759 {
1760     return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1761                              NULL, errp);
1762 }
1763 
1764 typedef struct BlockReopenQueueEntry {
1765      bool prepared;
1766      BDRVReopenState state;
1767      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1768 } BlockReopenQueueEntry;
1769 
1770 /*
1771  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1772  * reopen of multiple devices.
1773  *
1774  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1775  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1776  * be created and initialized. This newly created BlockReopenQueue should be
1777  * passed back in for subsequent calls that are intended to be of the same
1778  * atomic 'set'.
1779  *
1780  * bs is the BlockDriverState to add to the reopen queue.
1781  *
1782  * options contains the changed options for the associated bs
1783  * (the BlockReopenQueue takes ownership)
1784  *
1785  * flags contains the open flags for the associated bs
1786  *
1787  * returns a pointer to bs_queue, which is either the newly allocated
1788  * bs_queue, or the existing bs_queue being used.
1789  *
1790  */
1791 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1792                                                  BlockDriverState *bs,
1793                                                  QDict *options,
1794                                                  int flags,
1795                                                  const BdrvChildRole *role,
1796                                                  QDict *parent_options,
1797                                                  int parent_flags)
1798 {
1799     assert(bs != NULL);
1800 
1801     BlockReopenQueueEntry *bs_entry;
1802     BdrvChild *child;
1803     QDict *old_options, *explicit_options;
1804 
1805     if (bs_queue == NULL) {
1806         bs_queue = g_new0(BlockReopenQueue, 1);
1807         QSIMPLEQ_INIT(bs_queue);
1808     }
1809 
1810     if (!options) {
1811         options = qdict_new();
1812     }
1813 
1814     /*
1815      * Precedence of options:
1816      * 1. Explicitly passed in options (highest)
1817      * 2. Set in flags (only for top level)
1818      * 3. Retained from explicitly set options of bs
1819      * 4. Inherited from parent node
1820      * 5. Retained from effective options of bs
1821      */
1822 
1823     if (!parent_options) {
1824         /*
1825          * Any setting represented by flags is always updated. If the
1826          * corresponding QDict option is set, it takes precedence. Otherwise
1827          * the flag is translated into a QDict option. The old setting of bs is
1828          * not considered.
1829          */
1830         update_options_from_flags(options, flags);
1831     }
1832 
1833     /* Old explicitly set values (don't overwrite by inherited value) */
1834     old_options = qdict_clone_shallow(bs->explicit_options);
1835     bdrv_join_options(bs, options, old_options);
1836     QDECREF(old_options);
1837 
1838     explicit_options = qdict_clone_shallow(options);
1839 
1840     /* Inherit from parent node */
1841     if (parent_options) {
1842         assert(!flags);
1843         role->inherit_options(&flags, options, parent_flags, parent_options);
1844     }
1845 
1846     /* Old values are used for options that aren't set yet */
1847     old_options = qdict_clone_shallow(bs->options);
1848     bdrv_join_options(bs, options, old_options);
1849     QDECREF(old_options);
1850 
1851     /* bdrv_open() masks this flag out */
1852     flags &= ~BDRV_O_PROTOCOL;
1853 
1854     QLIST_FOREACH(child, &bs->children, next) {
1855         QDict *new_child_options;
1856         char *child_key_dot;
1857 
1858         /* reopen can only change the options of block devices that were
1859          * implicitly created and inherited options. For other (referenced)
1860          * block devices, a syntax like "backing.foo" results in an error. */
1861         if (child->bs->inherits_from != bs) {
1862             continue;
1863         }
1864 
1865         child_key_dot = g_strdup_printf("%s.", child->name);
1866         qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1867         g_free(child_key_dot);
1868 
1869         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1870                                 child->role, options, flags);
1871     }
1872 
1873     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1874     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1875 
1876     bs_entry->state.bs = bs;
1877     bs_entry->state.options = options;
1878     bs_entry->state.explicit_options = explicit_options;
1879     bs_entry->state.flags = flags;
1880 
1881     return bs_queue;
1882 }
1883 
1884 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1885                                     BlockDriverState *bs,
1886                                     QDict *options, int flags)
1887 {
1888     return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1889                                    NULL, NULL, 0);
1890 }
1891 
1892 /*
1893  * Reopen multiple BlockDriverStates atomically & transactionally.
1894  *
1895  * The queue passed in (bs_queue) must have been built up previous
1896  * via bdrv_reopen_queue().
1897  *
1898  * Reopens all BDS specified in the queue, with the appropriate
1899  * flags.  All devices are prepared for reopen, and failure of any
1900  * device will cause all device changes to be abandonded, and intermediate
1901  * data cleaned up.
1902  *
1903  * If all devices prepare successfully, then the changes are committed
1904  * to all devices.
1905  *
1906  */
1907 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1908 {
1909     int ret = -1;
1910     BlockReopenQueueEntry *bs_entry, *next;
1911     Error *local_err = NULL;
1912 
1913     assert(bs_queue != NULL);
1914 
1915     bdrv_drain_all();
1916 
1917     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1918         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1919             error_propagate(errp, local_err);
1920             goto cleanup;
1921         }
1922         bs_entry->prepared = true;
1923     }
1924 
1925     /* If we reach this point, we have success and just need to apply the
1926      * changes
1927      */
1928     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1929         bdrv_reopen_commit(&bs_entry->state);
1930     }
1931 
1932     ret = 0;
1933 
1934 cleanup:
1935     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1936         if (ret && bs_entry->prepared) {
1937             bdrv_reopen_abort(&bs_entry->state);
1938         } else if (ret) {
1939             QDECREF(bs_entry->state.explicit_options);
1940         }
1941         QDECREF(bs_entry->state.options);
1942         g_free(bs_entry);
1943     }
1944     g_free(bs_queue);
1945     return ret;
1946 }
1947 
1948 
1949 /* Reopen a single BlockDriverState with the specified flags. */
1950 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1951 {
1952     int ret = -1;
1953     Error *local_err = NULL;
1954     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1955 
1956     ret = bdrv_reopen_multiple(queue, &local_err);
1957     if (local_err != NULL) {
1958         error_propagate(errp, local_err);
1959     }
1960     return ret;
1961 }
1962 
1963 
1964 /*
1965  * Prepares a BlockDriverState for reopen. All changes are staged in the
1966  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1967  * the block driver layer .bdrv_reopen_prepare()
1968  *
1969  * bs is the BlockDriverState to reopen
1970  * flags are the new open flags
1971  * queue is the reopen queue
1972  *
1973  * Returns 0 on success, non-zero on error.  On error errp will be set
1974  * as well.
1975  *
1976  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1977  * It is the responsibility of the caller to then call the abort() or
1978  * commit() for any other BDS that have been left in a prepare() state
1979  *
1980  */
1981 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1982                         Error **errp)
1983 {
1984     int ret = -1;
1985     Error *local_err = NULL;
1986     BlockDriver *drv;
1987     QemuOpts *opts;
1988     const char *value;
1989 
1990     assert(reopen_state != NULL);
1991     assert(reopen_state->bs->drv != NULL);
1992     drv = reopen_state->bs->drv;
1993 
1994     /* Process generic block layer options */
1995     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1996     qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1997     if (local_err) {
1998         error_propagate(errp, local_err);
1999         ret = -EINVAL;
2000         goto error;
2001     }
2002 
2003     update_flags_from_options(&reopen_state->flags, opts);
2004 
2005     /* If a guest device is attached, it owns WCE */
2006     if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2007         bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2008         bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2009         if (old_wce != new_wce) {
2010             error_setg(errp, "Cannot change cache.writeback: Device attached");
2011             ret = -EINVAL;
2012             goto error;
2013         }
2014     }
2015 
2016     /* node-name and driver must be unchanged. Put them back into the QDict, so
2017      * that they are checked at the end of this function. */
2018     value = qemu_opt_get(opts, "node-name");
2019     if (value) {
2020         qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2021     }
2022 
2023     value = qemu_opt_get(opts, "driver");
2024     if (value) {
2025         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2026     }
2027 
2028     /* if we are to stay read-only, do not allow permission change
2029      * to r/w */
2030     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2031         reopen_state->flags & BDRV_O_RDWR) {
2032         error_setg(errp, "Node '%s' is read only",
2033                    bdrv_get_device_or_node_name(reopen_state->bs));
2034         goto error;
2035     }
2036 
2037 
2038     ret = bdrv_flush(reopen_state->bs);
2039     if (ret) {
2040         error_setg_errno(errp, -ret, "Error flushing drive");
2041         goto error;
2042     }
2043 
2044     if (drv->bdrv_reopen_prepare) {
2045         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2046         if (ret) {
2047             if (local_err != NULL) {
2048                 error_propagate(errp, local_err);
2049             } else {
2050                 error_setg(errp, "failed while preparing to reopen image '%s'",
2051                            reopen_state->bs->filename);
2052             }
2053             goto error;
2054         }
2055     } else {
2056         /* It is currently mandatory to have a bdrv_reopen_prepare()
2057          * handler for each supported drv. */
2058         error_setg(errp, "Block format '%s' used by node '%s' "
2059                    "does not support reopening files", drv->format_name,
2060                    bdrv_get_device_or_node_name(reopen_state->bs));
2061         ret = -1;
2062         goto error;
2063     }
2064 
2065     /* Options that are not handled are only okay if they are unchanged
2066      * compared to the old state. It is expected that some options are only
2067      * used for the initial open, but not reopen (e.g. filename) */
2068     if (qdict_size(reopen_state->options)) {
2069         const QDictEntry *entry = qdict_first(reopen_state->options);
2070 
2071         do {
2072             QString *new_obj = qobject_to_qstring(entry->value);
2073             const char *new = qstring_get_str(new_obj);
2074             const char *old = qdict_get_try_str(reopen_state->bs->options,
2075                                                 entry->key);
2076 
2077             if (!old || strcmp(new, old)) {
2078                 error_setg(errp, "Cannot change the option '%s'", entry->key);
2079                 ret = -EINVAL;
2080                 goto error;
2081             }
2082         } while ((entry = qdict_next(reopen_state->options, entry)));
2083     }
2084 
2085     ret = 0;
2086 
2087 error:
2088     qemu_opts_del(opts);
2089     return ret;
2090 }
2091 
2092 /*
2093  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2094  * makes them final by swapping the staging BlockDriverState contents into
2095  * the active BlockDriverState contents.
2096  */
2097 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2098 {
2099     BlockDriver *drv;
2100 
2101     assert(reopen_state != NULL);
2102     drv = reopen_state->bs->drv;
2103     assert(drv != NULL);
2104 
2105     /* If there are any driver level actions to take */
2106     if (drv->bdrv_reopen_commit) {
2107         drv->bdrv_reopen_commit(reopen_state);
2108     }
2109 
2110     /* set BDS specific flags now */
2111     QDECREF(reopen_state->bs->explicit_options);
2112 
2113     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2114     reopen_state->bs->open_flags         = reopen_state->flags;
2115     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2116                                               BDRV_O_CACHE_WB);
2117     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2118 
2119     bdrv_refresh_limits(reopen_state->bs, NULL);
2120 }
2121 
2122 /*
2123  * Abort the reopen, and delete and free the staged changes in
2124  * reopen_state
2125  */
2126 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2127 {
2128     BlockDriver *drv;
2129 
2130     assert(reopen_state != NULL);
2131     drv = reopen_state->bs->drv;
2132     assert(drv != NULL);
2133 
2134     if (drv->bdrv_reopen_abort) {
2135         drv->bdrv_reopen_abort(reopen_state);
2136     }
2137 
2138     QDECREF(reopen_state->explicit_options);
2139 }
2140 
2141 
2142 static void bdrv_close(BlockDriverState *bs)
2143 {
2144     BdrvAioNotifier *ban, *ban_next;
2145 
2146     assert(!bs->job);
2147 
2148     /* Disable I/O limits and drain all pending throttled requests */
2149     if (bs->throttle_state) {
2150         bdrv_io_limits_disable(bs);
2151     }
2152 
2153     bdrv_drained_begin(bs); /* complete I/O */
2154     bdrv_flush(bs);
2155     bdrv_drain(bs); /* in case flush left pending I/O */
2156 
2157     bdrv_release_named_dirty_bitmaps(bs);
2158     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2159 
2160     if (bs->blk) {
2161         blk_dev_change_media_cb(bs->blk, false);
2162     }
2163 
2164     if (bs->drv) {
2165         BdrvChild *child, *next;
2166 
2167         bs->drv->bdrv_close(bs);
2168         bs->drv = NULL;
2169 
2170         bdrv_set_backing_hd(bs, NULL);
2171 
2172         if (bs->file != NULL) {
2173             bdrv_unref_child(bs, bs->file);
2174             bs->file = NULL;
2175         }
2176 
2177         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2178             /* TODO Remove bdrv_unref() from drivers' close function and use
2179              * bdrv_unref_child() here */
2180             if (child->bs->inherits_from == bs) {
2181                 child->bs->inherits_from = NULL;
2182             }
2183             bdrv_detach_child(child);
2184         }
2185 
2186         g_free(bs->opaque);
2187         bs->opaque = NULL;
2188         bs->copy_on_read = 0;
2189         bs->backing_file[0] = '\0';
2190         bs->backing_format[0] = '\0';
2191         bs->total_sectors = 0;
2192         bs->encrypted = 0;
2193         bs->valid_key = 0;
2194         bs->sg = 0;
2195         bs->zero_beyond_eof = false;
2196         QDECREF(bs->options);
2197         QDECREF(bs->explicit_options);
2198         bs->options = NULL;
2199         QDECREF(bs->full_open_options);
2200         bs->full_open_options = NULL;
2201     }
2202 
2203     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2204         g_free(ban);
2205     }
2206     QLIST_INIT(&bs->aio_notifiers);
2207     bdrv_drained_end(bs);
2208 }
2209 
2210 void bdrv_close_all(void)
2211 {
2212     BlockDriverState *bs;
2213     AioContext *aio_context;
2214 
2215     /* Drop references from requests still in flight, such as canceled block
2216      * jobs whose AIO context has not been polled yet */
2217     bdrv_drain_all();
2218 
2219     blk_remove_all_bs();
2220     blockdev_close_all_bdrv_states();
2221 
2222     /* Cancel all block jobs */
2223     while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2224         QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2225             aio_context = bdrv_get_aio_context(bs);
2226 
2227             aio_context_acquire(aio_context);
2228             if (bs->job) {
2229                 block_job_cancel_sync(bs->job);
2230                 aio_context_release(aio_context);
2231                 break;
2232             }
2233             aio_context_release(aio_context);
2234         }
2235 
2236         /* All the remaining BlockDriverStates are referenced directly or
2237          * indirectly from block jobs, so there needs to be at least one BDS
2238          * directly used by a block job */
2239         assert(bs);
2240     }
2241 }
2242 
2243 /* Note that bs->device_list.tqe_prev is initially null,
2244  * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2245  * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2246  * resetting it to null on remove.  */
2247 void bdrv_device_remove(BlockDriverState *bs)
2248 {
2249     QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2250     bs->device_list.tqe_prev = NULL;
2251 }
2252 
2253 /* make a BlockDriverState anonymous by removing from bdrv_state and
2254  * graph_bdrv_state list.
2255    Also, NULL terminate the device_name to prevent double remove */
2256 void bdrv_make_anon(BlockDriverState *bs)
2257 {
2258     /* Take care to remove bs from bdrv_states only when it's actually
2259      * in it. */
2260     if (bs->device_list.tqe_prev) {
2261         bdrv_device_remove(bs);
2262     }
2263     if (bs->node_name[0] != '\0') {
2264         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2265     }
2266     bs->node_name[0] = '\0';
2267 }
2268 
2269 /* Fields that need to stay with the top-level BDS */
2270 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2271                                      BlockDriverState *bs_src)
2272 {
2273     /* move some fields that need to stay attached to the device */
2274 
2275     /* dev info */
2276     bs_dest->copy_on_read       = bs_src->copy_on_read;
2277 
2278     bs_dest->enable_write_cache = bs_src->enable_write_cache;
2279 
2280     /* dirty bitmap */
2281     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2282 }
2283 
2284 static void change_parent_backing_link(BlockDriverState *from,
2285                                        BlockDriverState *to)
2286 {
2287     BdrvChild *c, *next;
2288 
2289     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2290         assert(c->role != &child_backing);
2291         c->bs = to;
2292         QLIST_REMOVE(c, next_parent);
2293         QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2294         bdrv_ref(to);
2295         bdrv_unref(from);
2296     }
2297     if (from->blk) {
2298         blk_set_bs(from->blk, to);
2299         if (!to->device_list.tqe_prev) {
2300             QTAILQ_INSERT_BEFORE(from, to, device_list);
2301         }
2302         bdrv_device_remove(from);
2303     }
2304 }
2305 
2306 static void swap_feature_fields(BlockDriverState *bs_top,
2307                                 BlockDriverState *bs_new)
2308 {
2309     BlockDriverState tmp;
2310 
2311     bdrv_move_feature_fields(&tmp, bs_top);
2312     bdrv_move_feature_fields(bs_top, bs_new);
2313     bdrv_move_feature_fields(bs_new, &tmp);
2314 
2315     assert(!bs_new->throttle_state);
2316     if (bs_top->throttle_state) {
2317         assert(bs_top->io_limits_enabled);
2318         bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2319         bdrv_io_limits_disable(bs_top);
2320     }
2321 }
2322 
2323 /*
2324  * Add new bs contents at the top of an image chain while the chain is
2325  * live, while keeping required fields on the top layer.
2326  *
2327  * This will modify the BlockDriverState fields, and swap contents
2328  * between bs_new and bs_top. Both bs_new and bs_top are modified.
2329  *
2330  * bs_new must not be attached to a BlockBackend.
2331  *
2332  * This function does not create any image files.
2333  *
2334  * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2335  * that's what the callers commonly need. bs_new will be referenced by the old
2336  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2337  * reference of its own, it must call bdrv_ref().
2338  */
2339 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2340 {
2341     assert(!bdrv_requests_pending(bs_top));
2342     assert(!bdrv_requests_pending(bs_new));
2343 
2344     bdrv_ref(bs_top);
2345     change_parent_backing_link(bs_top, bs_new);
2346 
2347     /* Some fields always stay on top of the backing file chain */
2348     swap_feature_fields(bs_top, bs_new);
2349 
2350     bdrv_set_backing_hd(bs_new, bs_top);
2351     bdrv_unref(bs_top);
2352 
2353     /* bs_new is now referenced by its new parents, we don't need the
2354      * additional reference any more. */
2355     bdrv_unref(bs_new);
2356 }
2357 
2358 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2359 {
2360     assert(!bdrv_requests_pending(old));
2361     assert(!bdrv_requests_pending(new));
2362 
2363     bdrv_ref(old);
2364 
2365     if (old->blk) {
2366         /* As long as these fields aren't in BlockBackend, but in the top-level
2367          * BlockDriverState, it's not possible for a BDS to have two BBs.
2368          *
2369          * We really want to copy the fields from old to new, but we go for a
2370          * swap instead so that pointers aren't duplicated and cause trouble.
2371          * (Also, bdrv_swap() used to do the same.) */
2372         assert(!new->blk);
2373         swap_feature_fields(old, new);
2374     }
2375     change_parent_backing_link(old, new);
2376 
2377     /* Change backing files if a previously independent node is added to the
2378      * chain. For active commit, we replace top by its own (indirect) backing
2379      * file and don't do anything here so we don't build a loop. */
2380     if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2381         bdrv_set_backing_hd(new, backing_bs(old));
2382         bdrv_set_backing_hd(old, NULL);
2383     }
2384 
2385     bdrv_unref(old);
2386 }
2387 
2388 static void bdrv_delete(BlockDriverState *bs)
2389 {
2390     assert(!bs->job);
2391     assert(bdrv_op_blocker_is_empty(bs));
2392     assert(!bs->refcnt);
2393 
2394     bdrv_close(bs);
2395 
2396     /* remove from list, if necessary */
2397     bdrv_make_anon(bs);
2398 
2399     QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2400 
2401     g_free(bs);
2402 }
2403 
2404 /*
2405  * Run consistency checks on an image
2406  *
2407  * Returns 0 if the check could be completed (it doesn't mean that the image is
2408  * free of errors) or -errno when an internal error occurred. The results of the
2409  * check are stored in res.
2410  */
2411 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2412 {
2413     if (bs->drv == NULL) {
2414         return -ENOMEDIUM;
2415     }
2416     if (bs->drv->bdrv_check == NULL) {
2417         return -ENOTSUP;
2418     }
2419 
2420     memset(res, 0, sizeof(*res));
2421     return bs->drv->bdrv_check(bs, res, fix);
2422 }
2423 
2424 #define COMMIT_BUF_SECTORS 2048
2425 
2426 /* commit COW file into the raw image */
2427 int bdrv_commit(BlockDriverState *bs)
2428 {
2429     BlockDriver *drv = bs->drv;
2430     int64_t sector, total_sectors, length, backing_length;
2431     int n, ro, open_flags;
2432     int ret = 0;
2433     uint8_t *buf = NULL;
2434 
2435     if (!drv)
2436         return -ENOMEDIUM;
2437 
2438     if (!bs->backing) {
2439         return -ENOTSUP;
2440     }
2441 
2442     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2443         bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2444         return -EBUSY;
2445     }
2446 
2447     ro = bs->backing->bs->read_only;
2448     open_flags =  bs->backing->bs->open_flags;
2449 
2450     if (ro) {
2451         if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2452             return -EACCES;
2453         }
2454     }
2455 
2456     length = bdrv_getlength(bs);
2457     if (length < 0) {
2458         ret = length;
2459         goto ro_cleanup;
2460     }
2461 
2462     backing_length = bdrv_getlength(bs->backing->bs);
2463     if (backing_length < 0) {
2464         ret = backing_length;
2465         goto ro_cleanup;
2466     }
2467 
2468     /* If our top snapshot is larger than the backing file image,
2469      * grow the backing file image if possible.  If not possible,
2470      * we must return an error */
2471     if (length > backing_length) {
2472         ret = bdrv_truncate(bs->backing->bs, length);
2473         if (ret < 0) {
2474             goto ro_cleanup;
2475         }
2476     }
2477 
2478     total_sectors = length >> BDRV_SECTOR_BITS;
2479 
2480     /* qemu_try_blockalign() for bs will choose an alignment that works for
2481      * bs->backing->bs as well, so no need to compare the alignment manually. */
2482     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2483     if (buf == NULL) {
2484         ret = -ENOMEM;
2485         goto ro_cleanup;
2486     }
2487 
2488     for (sector = 0; sector < total_sectors; sector += n) {
2489         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2490         if (ret < 0) {
2491             goto ro_cleanup;
2492         }
2493         if (ret) {
2494             ret = bdrv_read(bs, sector, buf, n);
2495             if (ret < 0) {
2496                 goto ro_cleanup;
2497             }
2498 
2499             ret = bdrv_write(bs->backing->bs, sector, buf, n);
2500             if (ret < 0) {
2501                 goto ro_cleanup;
2502             }
2503         }
2504     }
2505 
2506     if (drv->bdrv_make_empty) {
2507         ret = drv->bdrv_make_empty(bs);
2508         if (ret < 0) {
2509             goto ro_cleanup;
2510         }
2511         bdrv_flush(bs);
2512     }
2513 
2514     /*
2515      * Make sure all data we wrote to the backing device is actually
2516      * stable on disk.
2517      */
2518     if (bs->backing) {
2519         bdrv_flush(bs->backing->bs);
2520     }
2521 
2522     ret = 0;
2523 ro_cleanup:
2524     qemu_vfree(buf);
2525 
2526     if (ro) {
2527         /* ignoring error return here */
2528         bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2529     }
2530 
2531     return ret;
2532 }
2533 
2534 int bdrv_commit_all(void)
2535 {
2536     BlockDriverState *bs;
2537 
2538     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2539         AioContext *aio_context = bdrv_get_aio_context(bs);
2540 
2541         aio_context_acquire(aio_context);
2542         if (bs->drv && bs->backing) {
2543             int ret = bdrv_commit(bs);
2544             if (ret < 0) {
2545                 aio_context_release(aio_context);
2546                 return ret;
2547             }
2548         }
2549         aio_context_release(aio_context);
2550     }
2551     return 0;
2552 }
2553 
2554 /*
2555  * Return values:
2556  * 0        - success
2557  * -EINVAL  - backing format specified, but no file
2558  * -ENOSPC  - can't update the backing file because no space is left in the
2559  *            image file header
2560  * -ENOTSUP - format driver doesn't support changing the backing file
2561  */
2562 int bdrv_change_backing_file(BlockDriverState *bs,
2563     const char *backing_file, const char *backing_fmt)
2564 {
2565     BlockDriver *drv = bs->drv;
2566     int ret;
2567 
2568     /* Backing file format doesn't make sense without a backing file */
2569     if (backing_fmt && !backing_file) {
2570         return -EINVAL;
2571     }
2572 
2573     if (drv->bdrv_change_backing_file != NULL) {
2574         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2575     } else {
2576         ret = -ENOTSUP;
2577     }
2578 
2579     if (ret == 0) {
2580         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2581         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2582     }
2583     return ret;
2584 }
2585 
2586 /*
2587  * Finds the image layer in the chain that has 'bs' as its backing file.
2588  *
2589  * active is the current topmost image.
2590  *
2591  * Returns NULL if bs is not found in active's image chain,
2592  * or if active == bs.
2593  *
2594  * Returns the bottommost base image if bs == NULL.
2595  */
2596 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2597                                     BlockDriverState *bs)
2598 {
2599     while (active && bs != backing_bs(active)) {
2600         active = backing_bs(active);
2601     }
2602 
2603     return active;
2604 }
2605 
2606 /* Given a BDS, searches for the base layer. */
2607 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2608 {
2609     return bdrv_find_overlay(bs, NULL);
2610 }
2611 
2612 /*
2613  * Drops images above 'base' up to and including 'top', and sets the image
2614  * above 'top' to have base as its backing file.
2615  *
2616  * Requires that the overlay to 'top' is opened r/w, so that the backing file
2617  * information in 'bs' can be properly updated.
2618  *
2619  * E.g., this will convert the following chain:
2620  * bottom <- base <- intermediate <- top <- active
2621  *
2622  * to
2623  *
2624  * bottom <- base <- active
2625  *
2626  * It is allowed for bottom==base, in which case it converts:
2627  *
2628  * base <- intermediate <- top <- active
2629  *
2630  * to
2631  *
2632  * base <- active
2633  *
2634  * If backing_file_str is non-NULL, it will be used when modifying top's
2635  * overlay image metadata.
2636  *
2637  * Error conditions:
2638  *  if active == top, that is considered an error
2639  *
2640  */
2641 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2642                            BlockDriverState *base, const char *backing_file_str)
2643 {
2644     BlockDriverState *new_top_bs = NULL;
2645     int ret = -EIO;
2646 
2647     if (!top->drv || !base->drv) {
2648         goto exit;
2649     }
2650 
2651     new_top_bs = bdrv_find_overlay(active, top);
2652 
2653     if (new_top_bs == NULL) {
2654         /* we could not find the image above 'top', this is an error */
2655         goto exit;
2656     }
2657 
2658     /* special case of new_top_bs->backing->bs already pointing to base - nothing
2659      * to do, no intermediate images */
2660     if (backing_bs(new_top_bs) == base) {
2661         ret = 0;
2662         goto exit;
2663     }
2664 
2665     /* Make sure that base is in the backing chain of top */
2666     if (!bdrv_chain_contains(top, base)) {
2667         goto exit;
2668     }
2669 
2670     /* success - we can delete the intermediate states, and link top->base */
2671     backing_file_str = backing_file_str ? backing_file_str : base->filename;
2672     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2673                                    base->drv ? base->drv->format_name : "");
2674     if (ret) {
2675         goto exit;
2676     }
2677     bdrv_set_backing_hd(new_top_bs, base);
2678 
2679     ret = 0;
2680 exit:
2681     return ret;
2682 }
2683 
2684 /**
2685  * Truncate file to 'offset' bytes (needed only for file protocols)
2686  */
2687 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2688 {
2689     BlockDriver *drv = bs->drv;
2690     int ret;
2691     if (!drv)
2692         return -ENOMEDIUM;
2693     if (!drv->bdrv_truncate)
2694         return -ENOTSUP;
2695     if (bs->read_only)
2696         return -EACCES;
2697 
2698     ret = drv->bdrv_truncate(bs, offset);
2699     if (ret == 0) {
2700         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2701         bdrv_dirty_bitmap_truncate(bs);
2702         if (bs->blk) {
2703             blk_dev_resize_cb(bs->blk);
2704         }
2705     }
2706     return ret;
2707 }
2708 
2709 /**
2710  * Length of a allocated file in bytes. Sparse files are counted by actual
2711  * allocated space. Return < 0 if error or unknown.
2712  */
2713 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2714 {
2715     BlockDriver *drv = bs->drv;
2716     if (!drv) {
2717         return -ENOMEDIUM;
2718     }
2719     if (drv->bdrv_get_allocated_file_size) {
2720         return drv->bdrv_get_allocated_file_size(bs);
2721     }
2722     if (bs->file) {
2723         return bdrv_get_allocated_file_size(bs->file->bs);
2724     }
2725     return -ENOTSUP;
2726 }
2727 
2728 /**
2729  * Return number of sectors on success, -errno on error.
2730  */
2731 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2732 {
2733     BlockDriver *drv = bs->drv;
2734 
2735     if (!drv)
2736         return -ENOMEDIUM;
2737 
2738     if (drv->has_variable_length) {
2739         int ret = refresh_total_sectors(bs, bs->total_sectors);
2740         if (ret < 0) {
2741             return ret;
2742         }
2743     }
2744     return bs->total_sectors;
2745 }
2746 
2747 /**
2748  * Return length in bytes on success, -errno on error.
2749  * The length is always a multiple of BDRV_SECTOR_SIZE.
2750  */
2751 int64_t bdrv_getlength(BlockDriverState *bs)
2752 {
2753     int64_t ret = bdrv_nb_sectors(bs);
2754 
2755     ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2756     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2757 }
2758 
2759 /* return 0 as number of sectors if no device present or error */
2760 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2761 {
2762     int64_t nb_sectors = bdrv_nb_sectors(bs);
2763 
2764     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2765 }
2766 
2767 int bdrv_is_read_only(BlockDriverState *bs)
2768 {
2769     return bs->read_only;
2770 }
2771 
2772 int bdrv_is_sg(BlockDriverState *bs)
2773 {
2774     return bs->sg;
2775 }
2776 
2777 int bdrv_enable_write_cache(BlockDriverState *bs)
2778 {
2779     return bs->enable_write_cache;
2780 }
2781 
2782 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2783 {
2784     bs->enable_write_cache = wce;
2785 
2786     /* so a reopen() will preserve wce */
2787     if (wce) {
2788         bs->open_flags |= BDRV_O_CACHE_WB;
2789     } else {
2790         bs->open_flags &= ~BDRV_O_CACHE_WB;
2791     }
2792 }
2793 
2794 int bdrv_is_encrypted(BlockDriverState *bs)
2795 {
2796     if (bs->backing && bs->backing->bs->encrypted) {
2797         return 1;
2798     }
2799     return bs->encrypted;
2800 }
2801 
2802 int bdrv_key_required(BlockDriverState *bs)
2803 {
2804     BdrvChild *backing = bs->backing;
2805 
2806     if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2807         return 1;
2808     }
2809     return (bs->encrypted && !bs->valid_key);
2810 }
2811 
2812 int bdrv_set_key(BlockDriverState *bs, const char *key)
2813 {
2814     int ret;
2815     if (bs->backing && bs->backing->bs->encrypted) {
2816         ret = bdrv_set_key(bs->backing->bs, key);
2817         if (ret < 0)
2818             return ret;
2819         if (!bs->encrypted)
2820             return 0;
2821     }
2822     if (!bs->encrypted) {
2823         return -EINVAL;
2824     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2825         return -ENOMEDIUM;
2826     }
2827     ret = bs->drv->bdrv_set_key(bs, key);
2828     if (ret < 0) {
2829         bs->valid_key = 0;
2830     } else if (!bs->valid_key) {
2831         bs->valid_key = 1;
2832         if (bs->blk) {
2833             /* call the change callback now, we skipped it on open */
2834             blk_dev_change_media_cb(bs->blk, true);
2835         }
2836     }
2837     return ret;
2838 }
2839 
2840 /*
2841  * Provide an encryption key for @bs.
2842  * If @key is non-null:
2843  *     If @bs is not encrypted, fail.
2844  *     Else if the key is invalid, fail.
2845  *     Else set @bs's key to @key, replacing the existing key, if any.
2846  * If @key is null:
2847  *     If @bs is encrypted and still lacks a key, fail.
2848  *     Else do nothing.
2849  * On failure, store an error object through @errp if non-null.
2850  */
2851 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2852 {
2853     if (key) {
2854         if (!bdrv_is_encrypted(bs)) {
2855             error_setg(errp, "Node '%s' is not encrypted",
2856                       bdrv_get_device_or_node_name(bs));
2857         } else if (bdrv_set_key(bs, key) < 0) {
2858             error_setg(errp, QERR_INVALID_PASSWORD);
2859         }
2860     } else {
2861         if (bdrv_key_required(bs)) {
2862             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2863                       "'%s' (%s) is encrypted",
2864                       bdrv_get_device_or_node_name(bs),
2865                       bdrv_get_encrypted_filename(bs));
2866         }
2867     }
2868 }
2869 
2870 const char *bdrv_get_format_name(BlockDriverState *bs)
2871 {
2872     return bs->drv ? bs->drv->format_name : NULL;
2873 }
2874 
2875 static int qsort_strcmp(const void *a, const void *b)
2876 {
2877     return strcmp(a, b);
2878 }
2879 
2880 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2881                          void *opaque)
2882 {
2883     BlockDriver *drv;
2884     int count = 0;
2885     int i;
2886     const char **formats = NULL;
2887 
2888     QLIST_FOREACH(drv, &bdrv_drivers, list) {
2889         if (drv->format_name) {
2890             bool found = false;
2891             int i = count;
2892             while (formats && i && !found) {
2893                 found = !strcmp(formats[--i], drv->format_name);
2894             }
2895 
2896             if (!found) {
2897                 formats = g_renew(const char *, formats, count + 1);
2898                 formats[count++] = drv->format_name;
2899             }
2900         }
2901     }
2902 
2903     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2904 
2905     for (i = 0; i < count; i++) {
2906         it(opaque, formats[i]);
2907     }
2908 
2909     g_free(formats);
2910 }
2911 
2912 /* This function is to find a node in the bs graph */
2913 BlockDriverState *bdrv_find_node(const char *node_name)
2914 {
2915     BlockDriverState *bs;
2916 
2917     assert(node_name);
2918 
2919     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2920         if (!strcmp(node_name, bs->node_name)) {
2921             return bs;
2922         }
2923     }
2924     return NULL;
2925 }
2926 
2927 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2928 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2929 {
2930     BlockDeviceInfoList *list, *entry;
2931     BlockDriverState *bs;
2932 
2933     list = NULL;
2934     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2935         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2936         if (!info) {
2937             qapi_free_BlockDeviceInfoList(list);
2938             return NULL;
2939         }
2940         entry = g_malloc0(sizeof(*entry));
2941         entry->value = info;
2942         entry->next = list;
2943         list = entry;
2944     }
2945 
2946     return list;
2947 }
2948 
2949 BlockDriverState *bdrv_lookup_bs(const char *device,
2950                                  const char *node_name,
2951                                  Error **errp)
2952 {
2953     BlockBackend *blk;
2954     BlockDriverState *bs;
2955 
2956     if (device) {
2957         blk = blk_by_name(device);
2958 
2959         if (blk) {
2960             bs = blk_bs(blk);
2961             if (!bs) {
2962                 error_setg(errp, "Device '%s' has no medium", device);
2963             }
2964 
2965             return bs;
2966         }
2967     }
2968 
2969     if (node_name) {
2970         bs = bdrv_find_node(node_name);
2971 
2972         if (bs) {
2973             return bs;
2974         }
2975     }
2976 
2977     error_setg(errp, "Cannot find device=%s nor node_name=%s",
2978                      device ? device : "",
2979                      node_name ? node_name : "");
2980     return NULL;
2981 }
2982 
2983 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2984  * return false.  If either argument is NULL, return false. */
2985 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2986 {
2987     while (top && top != base) {
2988         top = backing_bs(top);
2989     }
2990 
2991     return top != NULL;
2992 }
2993 
2994 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2995 {
2996     if (!bs) {
2997         return QTAILQ_FIRST(&graph_bdrv_states);
2998     }
2999     return QTAILQ_NEXT(bs, node_list);
3000 }
3001 
3002 BlockDriverState *bdrv_next(BlockDriverState *bs)
3003 {
3004     if (!bs) {
3005         return QTAILQ_FIRST(&bdrv_states);
3006     }
3007     return QTAILQ_NEXT(bs, device_list);
3008 }
3009 
3010 const char *bdrv_get_node_name(const BlockDriverState *bs)
3011 {
3012     return bs->node_name;
3013 }
3014 
3015 /* TODO check what callers really want: bs->node_name or blk_name() */
3016 const char *bdrv_get_device_name(const BlockDriverState *bs)
3017 {
3018     return bs->blk ? blk_name(bs->blk) : "";
3019 }
3020 
3021 /* This can be used to identify nodes that might not have a device
3022  * name associated. Since node and device names live in the same
3023  * namespace, the result is unambiguous. The exception is if both are
3024  * absent, then this returns an empty (non-null) string. */
3025 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3026 {
3027     return bs->blk ? blk_name(bs->blk) : bs->node_name;
3028 }
3029 
3030 int bdrv_get_flags(BlockDriverState *bs)
3031 {
3032     return bs->open_flags;
3033 }
3034 
3035 int bdrv_has_zero_init_1(BlockDriverState *bs)
3036 {
3037     return 1;
3038 }
3039 
3040 int bdrv_has_zero_init(BlockDriverState *bs)
3041 {
3042     assert(bs->drv);
3043 
3044     /* If BS is a copy on write image, it is initialized to
3045        the contents of the base image, which may not be zeroes.  */
3046     if (bs->backing) {
3047         return 0;
3048     }
3049     if (bs->drv->bdrv_has_zero_init) {
3050         return bs->drv->bdrv_has_zero_init(bs);
3051     }
3052 
3053     /* safe default */
3054     return 0;
3055 }
3056 
3057 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3058 {
3059     BlockDriverInfo bdi;
3060 
3061     if (bs->backing) {
3062         return false;
3063     }
3064 
3065     if (bdrv_get_info(bs, &bdi) == 0) {
3066         return bdi.unallocated_blocks_are_zero;
3067     }
3068 
3069     return false;
3070 }
3071 
3072 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3073 {
3074     BlockDriverInfo bdi;
3075 
3076     if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3077         return false;
3078     }
3079 
3080     if (bdrv_get_info(bs, &bdi) == 0) {
3081         return bdi.can_write_zeroes_with_unmap;
3082     }
3083 
3084     return false;
3085 }
3086 
3087 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3088 {
3089     if (bs->backing && bs->backing->bs->encrypted)
3090         return bs->backing_file;
3091     else if (bs->encrypted)
3092         return bs->filename;
3093     else
3094         return NULL;
3095 }
3096 
3097 void bdrv_get_backing_filename(BlockDriverState *bs,
3098                                char *filename, int filename_size)
3099 {
3100     pstrcpy(filename, filename_size, bs->backing_file);
3101 }
3102 
3103 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3104 {
3105     BlockDriver *drv = bs->drv;
3106     if (!drv)
3107         return -ENOMEDIUM;
3108     if (!drv->bdrv_get_info)
3109         return -ENOTSUP;
3110     memset(bdi, 0, sizeof(*bdi));
3111     return drv->bdrv_get_info(bs, bdi);
3112 }
3113 
3114 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3115 {
3116     BlockDriver *drv = bs->drv;
3117     if (drv && drv->bdrv_get_specific_info) {
3118         return drv->bdrv_get_specific_info(bs);
3119     }
3120     return NULL;
3121 }
3122 
3123 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3124 {
3125     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3126         return;
3127     }
3128 
3129     bs->drv->bdrv_debug_event(bs, event);
3130 }
3131 
3132 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3133                           const char *tag)
3134 {
3135     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3136         bs = bs->file ? bs->file->bs : NULL;
3137     }
3138 
3139     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3140         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3141     }
3142 
3143     return -ENOTSUP;
3144 }
3145 
3146 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3147 {
3148     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3149         bs = bs->file ? bs->file->bs : NULL;
3150     }
3151 
3152     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3153         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3154     }
3155 
3156     return -ENOTSUP;
3157 }
3158 
3159 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3160 {
3161     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3162         bs = bs->file ? bs->file->bs : NULL;
3163     }
3164 
3165     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3166         return bs->drv->bdrv_debug_resume(bs, tag);
3167     }
3168 
3169     return -ENOTSUP;
3170 }
3171 
3172 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3173 {
3174     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3175         bs = bs->file ? bs->file->bs : NULL;
3176     }
3177 
3178     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3179         return bs->drv->bdrv_debug_is_suspended(bs, tag);
3180     }
3181 
3182     return false;
3183 }
3184 
3185 int bdrv_is_snapshot(BlockDriverState *bs)
3186 {
3187     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3188 }
3189 
3190 /* backing_file can either be relative, or absolute, or a protocol.  If it is
3191  * relative, it must be relative to the chain.  So, passing in bs->filename
3192  * from a BDS as backing_file should not be done, as that may be relative to
3193  * the CWD rather than the chain. */
3194 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3195         const char *backing_file)
3196 {
3197     char *filename_full = NULL;
3198     char *backing_file_full = NULL;
3199     char *filename_tmp = NULL;
3200     int is_protocol = 0;
3201     BlockDriverState *curr_bs = NULL;
3202     BlockDriverState *retval = NULL;
3203 
3204     if (!bs || !bs->drv || !backing_file) {
3205         return NULL;
3206     }
3207 
3208     filename_full     = g_malloc(PATH_MAX);
3209     backing_file_full = g_malloc(PATH_MAX);
3210     filename_tmp      = g_malloc(PATH_MAX);
3211 
3212     is_protocol = path_has_protocol(backing_file);
3213 
3214     for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3215 
3216         /* If either of the filename paths is actually a protocol, then
3217          * compare unmodified paths; otherwise make paths relative */
3218         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3219             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3220                 retval = curr_bs->backing->bs;
3221                 break;
3222             }
3223         } else {
3224             /* If not an absolute filename path, make it relative to the current
3225              * image's filename path */
3226             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3227                          backing_file);
3228 
3229             /* We are going to compare absolute pathnames */
3230             if (!realpath(filename_tmp, filename_full)) {
3231                 continue;
3232             }
3233 
3234             /* We need to make sure the backing filename we are comparing against
3235              * is relative to the current image filename (or absolute) */
3236             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3237                          curr_bs->backing_file);
3238 
3239             if (!realpath(filename_tmp, backing_file_full)) {
3240                 continue;
3241             }
3242 
3243             if (strcmp(backing_file_full, filename_full) == 0) {
3244                 retval = curr_bs->backing->bs;
3245                 break;
3246             }
3247         }
3248     }
3249 
3250     g_free(filename_full);
3251     g_free(backing_file_full);
3252     g_free(filename_tmp);
3253     return retval;
3254 }
3255 
3256 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3257 {
3258     if (!bs->drv) {
3259         return 0;
3260     }
3261 
3262     if (!bs->backing) {
3263         return 0;
3264     }
3265 
3266     return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3267 }
3268 
3269 void bdrv_init(void)
3270 {
3271     module_call_init(MODULE_INIT_BLOCK);
3272 }
3273 
3274 void bdrv_init_with_whitelist(void)
3275 {
3276     use_bdrv_whitelist = 1;
3277     bdrv_init();
3278 }
3279 
3280 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3281 {
3282     Error *local_err = NULL;
3283     int ret;
3284 
3285     if (!bs->drv)  {
3286         return;
3287     }
3288 
3289     if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3290         return;
3291     }
3292     bs->open_flags &= ~BDRV_O_INACTIVE;
3293 
3294     if (bs->drv->bdrv_invalidate_cache) {
3295         bs->drv->bdrv_invalidate_cache(bs, &local_err);
3296     } else if (bs->file) {
3297         bdrv_invalidate_cache(bs->file->bs, &local_err);
3298     }
3299     if (local_err) {
3300         bs->open_flags |= BDRV_O_INACTIVE;
3301         error_propagate(errp, local_err);
3302         return;
3303     }
3304 
3305     ret = refresh_total_sectors(bs, bs->total_sectors);
3306     if (ret < 0) {
3307         bs->open_flags |= BDRV_O_INACTIVE;
3308         error_setg_errno(errp, -ret, "Could not refresh total sector count");
3309         return;
3310     }
3311 }
3312 
3313 void bdrv_invalidate_cache_all(Error **errp)
3314 {
3315     BlockDriverState *bs;
3316     Error *local_err = NULL;
3317 
3318     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3319         AioContext *aio_context = bdrv_get_aio_context(bs);
3320 
3321         aio_context_acquire(aio_context);
3322         bdrv_invalidate_cache(bs, &local_err);
3323         aio_context_release(aio_context);
3324         if (local_err) {
3325             error_propagate(errp, local_err);
3326             return;
3327         }
3328     }
3329 }
3330 
3331 static int bdrv_inactivate(BlockDriverState *bs)
3332 {
3333     int ret;
3334 
3335     if (bs->drv->bdrv_inactivate) {
3336         ret = bs->drv->bdrv_inactivate(bs);
3337         if (ret < 0) {
3338             return ret;
3339         }
3340     }
3341 
3342     bs->open_flags |= BDRV_O_INACTIVE;
3343     return 0;
3344 }
3345 
3346 int bdrv_inactivate_all(void)
3347 {
3348     BlockDriverState *bs;
3349     int ret;
3350 
3351     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3352         AioContext *aio_context = bdrv_get_aio_context(bs);
3353 
3354         aio_context_acquire(aio_context);
3355         ret = bdrv_inactivate(bs);
3356         aio_context_release(aio_context);
3357         if (ret < 0) {
3358             return ret;
3359         }
3360     }
3361 
3362     return 0;
3363 }
3364 
3365 /**************************************************************/
3366 /* removable device support */
3367 
3368 /**
3369  * Return TRUE if the media is present
3370  */
3371 bool bdrv_is_inserted(BlockDriverState *bs)
3372 {
3373     BlockDriver *drv = bs->drv;
3374     BdrvChild *child;
3375 
3376     if (!drv) {
3377         return false;
3378     }
3379     if (drv->bdrv_is_inserted) {
3380         return drv->bdrv_is_inserted(bs);
3381     }
3382     QLIST_FOREACH(child, &bs->children, next) {
3383         if (!bdrv_is_inserted(child->bs)) {
3384             return false;
3385         }
3386     }
3387     return true;
3388 }
3389 
3390 /**
3391  * Return whether the media changed since the last call to this
3392  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3393  */
3394 int bdrv_media_changed(BlockDriverState *bs)
3395 {
3396     BlockDriver *drv = bs->drv;
3397 
3398     if (drv && drv->bdrv_media_changed) {
3399         return drv->bdrv_media_changed(bs);
3400     }
3401     return -ENOTSUP;
3402 }
3403 
3404 /**
3405  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3406  */
3407 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3408 {
3409     BlockDriver *drv = bs->drv;
3410     const char *device_name;
3411 
3412     if (drv && drv->bdrv_eject) {
3413         drv->bdrv_eject(bs, eject_flag);
3414     }
3415 
3416     device_name = bdrv_get_device_name(bs);
3417     if (device_name[0] != '\0') {
3418         qapi_event_send_device_tray_moved(device_name,
3419                                           eject_flag, &error_abort);
3420     }
3421 }
3422 
3423 /**
3424  * Lock or unlock the media (if it is locked, the user won't be able
3425  * to eject it manually).
3426  */
3427 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3428 {
3429     BlockDriver *drv = bs->drv;
3430 
3431     trace_bdrv_lock_medium(bs, locked);
3432 
3433     if (drv && drv->bdrv_lock_medium) {
3434         drv->bdrv_lock_medium(bs, locked);
3435     }
3436 }
3437 
3438 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3439 {
3440     BdrvDirtyBitmap *bm;
3441 
3442     assert(name);
3443     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3444         if (bm->name && !strcmp(name, bm->name)) {
3445             return bm;
3446         }
3447     }
3448     return NULL;
3449 }
3450 
3451 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3452 {
3453     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3454     g_free(bitmap->name);
3455     bitmap->name = NULL;
3456 }
3457 
3458 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3459                                           uint32_t granularity,
3460                                           const char *name,
3461                                           Error **errp)
3462 {
3463     int64_t bitmap_size;
3464     BdrvDirtyBitmap *bitmap;
3465     uint32_t sector_granularity;
3466 
3467     assert((granularity & (granularity - 1)) == 0);
3468 
3469     if (name && bdrv_find_dirty_bitmap(bs, name)) {
3470         error_setg(errp, "Bitmap already exists: %s", name);
3471         return NULL;
3472     }
3473     sector_granularity = granularity >> BDRV_SECTOR_BITS;
3474     assert(sector_granularity);
3475     bitmap_size = bdrv_nb_sectors(bs);
3476     if (bitmap_size < 0) {
3477         error_setg_errno(errp, -bitmap_size, "could not get length of device");
3478         errno = -bitmap_size;
3479         return NULL;
3480     }
3481     bitmap = g_new0(BdrvDirtyBitmap, 1);
3482     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3483     bitmap->size = bitmap_size;
3484     bitmap->name = g_strdup(name);
3485     bitmap->disabled = false;
3486     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3487     return bitmap;
3488 }
3489 
3490 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3491 {
3492     return bitmap->successor;
3493 }
3494 
3495 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3496 {
3497     return !(bitmap->disabled || bitmap->successor);
3498 }
3499 
3500 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3501 {
3502     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3503         return DIRTY_BITMAP_STATUS_FROZEN;
3504     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3505         return DIRTY_BITMAP_STATUS_DISABLED;
3506     } else {
3507         return DIRTY_BITMAP_STATUS_ACTIVE;
3508     }
3509 }
3510 
3511 /**
3512  * Create a successor bitmap destined to replace this bitmap after an operation.
3513  * Requires that the bitmap is not frozen and has no successor.
3514  */
3515 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3516                                        BdrvDirtyBitmap *bitmap, Error **errp)
3517 {
3518     uint64_t granularity;
3519     BdrvDirtyBitmap *child;
3520 
3521     if (bdrv_dirty_bitmap_frozen(bitmap)) {
3522         error_setg(errp, "Cannot create a successor for a bitmap that is "
3523                    "currently frozen");
3524         return -1;
3525     }
3526     assert(!bitmap->successor);
3527 
3528     /* Create an anonymous successor */
3529     granularity = bdrv_dirty_bitmap_granularity(bitmap);
3530     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3531     if (!child) {
3532         return -1;
3533     }
3534 
3535     /* Successor will be on or off based on our current state. */
3536     child->disabled = bitmap->disabled;
3537 
3538     /* Install the successor and freeze the parent */
3539     bitmap->successor = child;
3540     return 0;
3541 }
3542 
3543 /**
3544  * For a bitmap with a successor, yield our name to the successor,
3545  * delete the old bitmap, and return a handle to the new bitmap.
3546  */
3547 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3548                                             BdrvDirtyBitmap *bitmap,
3549                                             Error **errp)
3550 {
3551     char *name;
3552     BdrvDirtyBitmap *successor = bitmap->successor;
3553 
3554     if (successor == NULL) {
3555         error_setg(errp, "Cannot relinquish control if "
3556                    "there's no successor present");
3557         return NULL;
3558     }
3559 
3560     name = bitmap->name;
3561     bitmap->name = NULL;
3562     successor->name = name;
3563     bitmap->successor = NULL;
3564     bdrv_release_dirty_bitmap(bs, bitmap);
3565 
3566     return successor;
3567 }
3568 
3569 /**
3570  * In cases of failure where we can no longer safely delete the parent,
3571  * we may wish to re-join the parent and child/successor.
3572  * The merged parent will be un-frozen, but not explicitly re-enabled.
3573  */
3574 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3575                                            BdrvDirtyBitmap *parent,
3576                                            Error **errp)
3577 {
3578     BdrvDirtyBitmap *successor = parent->successor;
3579 
3580     if (!successor) {
3581         error_setg(errp, "Cannot reclaim a successor when none is present");
3582         return NULL;
3583     }
3584 
3585     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3586         error_setg(errp, "Merging of parent and successor bitmap failed");
3587         return NULL;
3588     }
3589     bdrv_release_dirty_bitmap(bs, successor);
3590     parent->successor = NULL;
3591 
3592     return parent;
3593 }
3594 
3595 /**
3596  * Truncates _all_ bitmaps attached to a BDS.
3597  */
3598 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3599 {
3600     BdrvDirtyBitmap *bitmap;
3601     uint64_t size = bdrv_nb_sectors(bs);
3602 
3603     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3604         assert(!bdrv_dirty_bitmap_frozen(bitmap));
3605         hbitmap_truncate(bitmap->bitmap, size);
3606         bitmap->size = size;
3607     }
3608 }
3609 
3610 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3611                                                   BdrvDirtyBitmap *bitmap,
3612                                                   bool only_named)
3613 {
3614     BdrvDirtyBitmap *bm, *next;
3615     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3616         if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3617             assert(!bdrv_dirty_bitmap_frozen(bm));
3618             QLIST_REMOVE(bm, list);
3619             hbitmap_free(bm->bitmap);
3620             g_free(bm->name);
3621             g_free(bm);
3622 
3623             if (bitmap) {
3624                 return;
3625             }
3626         }
3627     }
3628 }
3629 
3630 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3631 {
3632     bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3633 }
3634 
3635 /**
3636  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3637  * There must not be any frozen bitmaps attached.
3638  */
3639 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3640 {
3641     bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3642 }
3643 
3644 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3645 {
3646     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3647     bitmap->disabled = true;
3648 }
3649 
3650 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3651 {
3652     assert(!bdrv_dirty_bitmap_frozen(bitmap));
3653     bitmap->disabled = false;
3654 }
3655 
3656 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3657 {
3658     BdrvDirtyBitmap *bm;
3659     BlockDirtyInfoList *list = NULL;
3660     BlockDirtyInfoList **plist = &list;
3661 
3662     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3663         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3664         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3665         info->count = bdrv_get_dirty_count(bm);
3666         info->granularity = bdrv_dirty_bitmap_granularity(bm);
3667         info->has_name = !!bm->name;
3668         info->name = g_strdup(bm->name);
3669         info->status = bdrv_dirty_bitmap_status(bm);
3670         entry->value = info;
3671         *plist = entry;
3672         plist = &entry->next;
3673     }
3674 
3675     return list;
3676 }
3677 
3678 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3679 {
3680     if (bitmap) {
3681         return hbitmap_get(bitmap->bitmap, sector);
3682     } else {
3683         return 0;
3684     }
3685 }
3686 
3687 /**
3688  * Chooses a default granularity based on the existing cluster size,
3689  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3690  * is no cluster size information available.
3691  */
3692 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3693 {
3694     BlockDriverInfo bdi;
3695     uint32_t granularity;
3696 
3697     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3698         granularity = MAX(4096, bdi.cluster_size);
3699         granularity = MIN(65536, granularity);
3700     } else {
3701         granularity = 65536;
3702     }
3703 
3704     return granularity;
3705 }
3706 
3707 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3708 {
3709     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3710 }
3711 
3712 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3713 {
3714     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3715 }
3716 
3717 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3718                            int64_t cur_sector, int nr_sectors)
3719 {
3720     assert(bdrv_dirty_bitmap_enabled(bitmap));
3721     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3722 }
3723 
3724 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3725                              int64_t cur_sector, int nr_sectors)
3726 {
3727     assert(bdrv_dirty_bitmap_enabled(bitmap));
3728     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3729 }
3730 
3731 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3732 {
3733     assert(bdrv_dirty_bitmap_enabled(bitmap));
3734     if (!out) {
3735         hbitmap_reset_all(bitmap->bitmap);
3736     } else {
3737         HBitmap *backup = bitmap->bitmap;
3738         bitmap->bitmap = hbitmap_alloc(bitmap->size,
3739                                        hbitmap_granularity(backup));
3740         *out = backup;
3741     }
3742 }
3743 
3744 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3745 {
3746     HBitmap *tmp = bitmap->bitmap;
3747     assert(bdrv_dirty_bitmap_enabled(bitmap));
3748     bitmap->bitmap = in;
3749     hbitmap_free(tmp);
3750 }
3751 
3752 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3753                     int nr_sectors)
3754 {
3755     BdrvDirtyBitmap *bitmap;
3756     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3757         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3758             continue;
3759         }
3760         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3761     }
3762 }
3763 
3764 /**
3765  * Advance an HBitmapIter to an arbitrary offset.
3766  */
3767 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3768 {
3769     assert(hbi->hb);
3770     hbitmap_iter_init(hbi, hbi->hb, offset);
3771 }
3772 
3773 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3774 {
3775     return hbitmap_count(bitmap->bitmap);
3776 }
3777 
3778 /* Get a reference to bs */
3779 void bdrv_ref(BlockDriverState *bs)
3780 {
3781     bs->refcnt++;
3782 }
3783 
3784 /* Release a previously grabbed reference to bs.
3785  * If after releasing, reference count is zero, the BlockDriverState is
3786  * deleted. */
3787 void bdrv_unref(BlockDriverState *bs)
3788 {
3789     if (!bs) {
3790         return;
3791     }
3792     assert(bs->refcnt > 0);
3793     if (--bs->refcnt == 0) {
3794         bdrv_delete(bs);
3795     }
3796 }
3797 
3798 struct BdrvOpBlocker {
3799     Error *reason;
3800     QLIST_ENTRY(BdrvOpBlocker) list;
3801 };
3802 
3803 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3804 {
3805     BdrvOpBlocker *blocker;
3806     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3807     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3808         blocker = QLIST_FIRST(&bs->op_blockers[op]);
3809         if (errp) {
3810             *errp = error_copy(blocker->reason);
3811             error_prepend(errp, "Node '%s' is busy: ",
3812                           bdrv_get_device_or_node_name(bs));
3813         }
3814         return true;
3815     }
3816     return false;
3817 }
3818 
3819 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3820 {
3821     BdrvOpBlocker *blocker;
3822     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3823 
3824     blocker = g_new0(BdrvOpBlocker, 1);
3825     blocker->reason = reason;
3826     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3827 }
3828 
3829 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3830 {
3831     BdrvOpBlocker *blocker, *next;
3832     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3833     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3834         if (blocker->reason == reason) {
3835             QLIST_REMOVE(blocker, list);
3836             g_free(blocker);
3837         }
3838     }
3839 }
3840 
3841 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3842 {
3843     int i;
3844     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3845         bdrv_op_block(bs, i, reason);
3846     }
3847 }
3848 
3849 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3850 {
3851     int i;
3852     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3853         bdrv_op_unblock(bs, i, reason);
3854     }
3855 }
3856 
3857 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3858 {
3859     int i;
3860 
3861     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3862         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3863             return false;
3864         }
3865     }
3866     return true;
3867 }
3868 
3869 void bdrv_img_create(const char *filename, const char *fmt,
3870                      const char *base_filename, const char *base_fmt,
3871                      char *options, uint64_t img_size, int flags,
3872                      Error **errp, bool quiet)
3873 {
3874     QemuOptsList *create_opts = NULL;
3875     QemuOpts *opts = NULL;
3876     const char *backing_fmt, *backing_file;
3877     int64_t size;
3878     BlockDriver *drv, *proto_drv;
3879     Error *local_err = NULL;
3880     int ret = 0;
3881 
3882     /* Find driver and parse its options */
3883     drv = bdrv_find_format(fmt);
3884     if (!drv) {
3885         error_setg(errp, "Unknown file format '%s'", fmt);
3886         return;
3887     }
3888 
3889     proto_drv = bdrv_find_protocol(filename, true, errp);
3890     if (!proto_drv) {
3891         return;
3892     }
3893 
3894     if (!drv->create_opts) {
3895         error_setg(errp, "Format driver '%s' does not support image creation",
3896                    drv->format_name);
3897         return;
3898     }
3899 
3900     if (!proto_drv->create_opts) {
3901         error_setg(errp, "Protocol driver '%s' does not support image creation",
3902                    proto_drv->format_name);
3903         return;
3904     }
3905 
3906     create_opts = qemu_opts_append(create_opts, drv->create_opts);
3907     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3908 
3909     /* Create parameter list with default values */
3910     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3911     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3912 
3913     /* Parse -o options */
3914     if (options) {
3915         qemu_opts_do_parse(opts, options, NULL, &local_err);
3916         if (local_err) {
3917             error_report_err(local_err);
3918             local_err = NULL;
3919             error_setg(errp, "Invalid options for file format '%s'", fmt);
3920             goto out;
3921         }
3922     }
3923 
3924     if (base_filename) {
3925         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3926         if (local_err) {
3927             error_setg(errp, "Backing file not supported for file format '%s'",
3928                        fmt);
3929             goto out;
3930         }
3931     }
3932 
3933     if (base_fmt) {
3934         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3935         if (local_err) {
3936             error_setg(errp, "Backing file format not supported for file "
3937                              "format '%s'", fmt);
3938             goto out;
3939         }
3940     }
3941 
3942     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3943     if (backing_file) {
3944         if (!strcmp(filename, backing_file)) {
3945             error_setg(errp, "Error: Trying to create an image with the "
3946                              "same filename as the backing file");
3947             goto out;
3948         }
3949     }
3950 
3951     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3952 
3953     // The size for the image must always be specified, with one exception:
3954     // If we are using a backing file, we can obtain the size from there
3955     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3956     if (size == -1) {
3957         if (backing_file) {
3958             BlockDriverState *bs;
3959             char *full_backing = g_new0(char, PATH_MAX);
3960             int64_t size;
3961             int back_flags;
3962             QDict *backing_options = NULL;
3963 
3964             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3965                                                          full_backing, PATH_MAX,
3966                                                          &local_err);
3967             if (local_err) {
3968                 g_free(full_backing);
3969                 goto out;
3970             }
3971 
3972             /* backing files always opened read-only */
3973             back_flags =
3974                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3975 
3976             if (backing_fmt) {
3977                 backing_options = qdict_new();
3978                 qdict_put(backing_options, "driver",
3979                           qstring_from_str(backing_fmt));
3980             }
3981 
3982             bs = NULL;
3983             ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3984                             back_flags, &local_err);
3985             g_free(full_backing);
3986             if (ret < 0) {
3987                 goto out;
3988             }
3989             size = bdrv_getlength(bs);
3990             if (size < 0) {
3991                 error_setg_errno(errp, -size, "Could not get size of '%s'",
3992                                  backing_file);
3993                 bdrv_unref(bs);
3994                 goto out;
3995             }
3996 
3997             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3998 
3999             bdrv_unref(bs);
4000         } else {
4001             error_setg(errp, "Image creation needs a size parameter");
4002             goto out;
4003         }
4004     }
4005 
4006     if (!quiet) {
4007         printf("Formatting '%s', fmt=%s ", filename, fmt);
4008         qemu_opts_print(opts, " ");
4009         puts("");
4010     }
4011 
4012     ret = bdrv_create(drv, filename, opts, &local_err);
4013 
4014     if (ret == -EFBIG) {
4015         /* This is generally a better message than whatever the driver would
4016          * deliver (especially because of the cluster_size_hint), since that
4017          * is most probably not much different from "image too large". */
4018         const char *cluster_size_hint = "";
4019         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4020             cluster_size_hint = " (try using a larger cluster size)";
4021         }
4022         error_setg(errp, "The image size is too large for file format '%s'"
4023                    "%s", fmt, cluster_size_hint);
4024         error_free(local_err);
4025         local_err = NULL;
4026     }
4027 
4028 out:
4029     qemu_opts_del(opts);
4030     qemu_opts_free(create_opts);
4031     if (local_err) {
4032         error_propagate(errp, local_err);
4033     }
4034 }
4035 
4036 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4037 {
4038     return bs->aio_context;
4039 }
4040 
4041 void bdrv_detach_aio_context(BlockDriverState *bs)
4042 {
4043     BdrvAioNotifier *baf;
4044 
4045     if (!bs->drv) {
4046         return;
4047     }
4048 
4049     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4050         baf->detach_aio_context(baf->opaque);
4051     }
4052 
4053     if (bs->throttle_state) {
4054         throttle_timers_detach_aio_context(&bs->throttle_timers);
4055     }
4056     if (bs->drv->bdrv_detach_aio_context) {
4057         bs->drv->bdrv_detach_aio_context(bs);
4058     }
4059     if (bs->file) {
4060         bdrv_detach_aio_context(bs->file->bs);
4061     }
4062     if (bs->backing) {
4063         bdrv_detach_aio_context(bs->backing->bs);
4064     }
4065 
4066     bs->aio_context = NULL;
4067 }
4068 
4069 void bdrv_attach_aio_context(BlockDriverState *bs,
4070                              AioContext *new_context)
4071 {
4072     BdrvAioNotifier *ban;
4073 
4074     if (!bs->drv) {
4075         return;
4076     }
4077 
4078     bs->aio_context = new_context;
4079 
4080     if (bs->backing) {
4081         bdrv_attach_aio_context(bs->backing->bs, new_context);
4082     }
4083     if (bs->file) {
4084         bdrv_attach_aio_context(bs->file->bs, new_context);
4085     }
4086     if (bs->drv->bdrv_attach_aio_context) {
4087         bs->drv->bdrv_attach_aio_context(bs, new_context);
4088     }
4089     if (bs->throttle_state) {
4090         throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4091     }
4092 
4093     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4094         ban->attached_aio_context(new_context, ban->opaque);
4095     }
4096 }
4097 
4098 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4099 {
4100     bdrv_drain(bs); /* ensure there are no in-flight requests */
4101 
4102     bdrv_detach_aio_context(bs);
4103 
4104     /* This function executes in the old AioContext so acquire the new one in
4105      * case it runs in a different thread.
4106      */
4107     aio_context_acquire(new_context);
4108     bdrv_attach_aio_context(bs, new_context);
4109     aio_context_release(new_context);
4110 }
4111 
4112 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4113         void (*attached_aio_context)(AioContext *new_context, void *opaque),
4114         void (*detach_aio_context)(void *opaque), void *opaque)
4115 {
4116     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4117     *ban = (BdrvAioNotifier){
4118         .attached_aio_context = attached_aio_context,
4119         .detach_aio_context   = detach_aio_context,
4120         .opaque               = opaque
4121     };
4122 
4123     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4124 }
4125 
4126 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4127                                       void (*attached_aio_context)(AioContext *,
4128                                                                    void *),
4129                                       void (*detach_aio_context)(void *),
4130                                       void *opaque)
4131 {
4132     BdrvAioNotifier *ban, *ban_next;
4133 
4134     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4135         if (ban->attached_aio_context == attached_aio_context &&
4136             ban->detach_aio_context   == detach_aio_context   &&
4137             ban->opaque               == opaque)
4138         {
4139             QLIST_REMOVE(ban, list);
4140             g_free(ban);
4141 
4142             return;
4143         }
4144     }
4145 
4146     abort();
4147 }
4148 
4149 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4150                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4151 {
4152     if (!bs->drv->bdrv_amend_options) {
4153         return -ENOTSUP;
4154     }
4155     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4156 }
4157 
4158 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4159  * of block filter and by bdrv_is_first_non_filter.
4160  * It is used to test if the given bs is the candidate or recurse more in the
4161  * node graph.
4162  */
4163 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4164                                       BlockDriverState *candidate)
4165 {
4166     /* return false if basic checks fails */
4167     if (!bs || !bs->drv) {
4168         return false;
4169     }
4170 
4171     /* the code reached a non block filter driver -> check if the bs is
4172      * the same as the candidate. It's the recursion termination condition.
4173      */
4174     if (!bs->drv->is_filter) {
4175         return bs == candidate;
4176     }
4177     /* Down this path the driver is a block filter driver */
4178 
4179     /* If the block filter recursion method is defined use it to recurse down
4180      * the node graph.
4181      */
4182     if (bs->drv->bdrv_recurse_is_first_non_filter) {
4183         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4184     }
4185 
4186     /* the driver is a block filter but don't allow to recurse -> return false
4187      */
4188     return false;
4189 }
4190 
4191 /* This function checks if the candidate is the first non filter bs down it's
4192  * bs chain. Since we don't have pointers to parents it explore all bs chains
4193  * from the top. Some filters can choose not to pass down the recursion.
4194  */
4195 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4196 {
4197     BlockDriverState *bs;
4198 
4199     /* walk down the bs forest recursively */
4200     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4201         bool perm;
4202 
4203         /* try to recurse in this top level bs */
4204         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4205 
4206         /* candidate is the first non filter */
4207         if (perm) {
4208             return true;
4209         }
4210     }
4211 
4212     return false;
4213 }
4214 
4215 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4216                                         const char *node_name, Error **errp)
4217 {
4218     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4219     AioContext *aio_context;
4220 
4221     if (!to_replace_bs) {
4222         error_setg(errp, "Node name '%s' not found", node_name);
4223         return NULL;
4224     }
4225 
4226     aio_context = bdrv_get_aio_context(to_replace_bs);
4227     aio_context_acquire(aio_context);
4228 
4229     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4230         to_replace_bs = NULL;
4231         goto out;
4232     }
4233 
4234     /* We don't want arbitrary node of the BDS chain to be replaced only the top
4235      * most non filter in order to prevent data corruption.
4236      * Another benefit is that this tests exclude backing files which are
4237      * blocked by the backing blockers.
4238      */
4239     if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4240         error_setg(errp, "Only top most non filter can be replaced");
4241         to_replace_bs = NULL;
4242         goto out;
4243     }
4244 
4245 out:
4246     aio_context_release(aio_context);
4247     return to_replace_bs;
4248 }
4249 
4250 static bool append_open_options(QDict *d, BlockDriverState *bs)
4251 {
4252     const QDictEntry *entry;
4253     QemuOptDesc *desc;
4254     BdrvChild *child;
4255     bool found_any = false;
4256     const char *p;
4257 
4258     for (entry = qdict_first(bs->options); entry;
4259          entry = qdict_next(bs->options, entry))
4260     {
4261         /* Exclude options for children */
4262         QLIST_FOREACH(child, &bs->children, next) {
4263             if (strstart(qdict_entry_key(entry), child->name, &p)
4264                 && (!*p || *p == '.'))
4265             {
4266                 break;
4267             }
4268         }
4269         if (child) {
4270             continue;
4271         }
4272 
4273         /* And exclude all non-driver-specific options */
4274         for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4275             if (!strcmp(qdict_entry_key(entry), desc->name)) {
4276                 break;
4277             }
4278         }
4279         if (desc->name) {
4280             continue;
4281         }
4282 
4283         qobject_incref(qdict_entry_value(entry));
4284         qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4285         found_any = true;
4286     }
4287 
4288     return found_any;
4289 }
4290 
4291 /* Updates the following BDS fields:
4292  *  - exact_filename: A filename which may be used for opening a block device
4293  *                    which (mostly) equals the given BDS (even without any
4294  *                    other options; so reading and writing must return the same
4295  *                    results, but caching etc. may be different)
4296  *  - full_open_options: Options which, when given when opening a block device
4297  *                       (without a filename), result in a BDS (mostly)
4298  *                       equalling the given one
4299  *  - filename: If exact_filename is set, it is copied here. Otherwise,
4300  *              full_open_options is converted to a JSON object, prefixed with
4301  *              "json:" (for use through the JSON pseudo protocol) and put here.
4302  */
4303 void bdrv_refresh_filename(BlockDriverState *bs)
4304 {
4305     BlockDriver *drv = bs->drv;
4306     QDict *opts;
4307 
4308     if (!drv) {
4309         return;
4310     }
4311 
4312     /* This BDS's file name will most probably depend on its file's name, so
4313      * refresh that first */
4314     if (bs->file) {
4315         bdrv_refresh_filename(bs->file->bs);
4316     }
4317 
4318     if (drv->bdrv_refresh_filename) {
4319         /* Obsolete information is of no use here, so drop the old file name
4320          * information before refreshing it */
4321         bs->exact_filename[0] = '\0';
4322         if (bs->full_open_options) {
4323             QDECREF(bs->full_open_options);
4324             bs->full_open_options = NULL;
4325         }
4326 
4327         opts = qdict_new();
4328         append_open_options(opts, bs);
4329         drv->bdrv_refresh_filename(bs, opts);
4330         QDECREF(opts);
4331     } else if (bs->file) {
4332         /* Try to reconstruct valid information from the underlying file */
4333         bool has_open_options;
4334 
4335         bs->exact_filename[0] = '\0';
4336         if (bs->full_open_options) {
4337             QDECREF(bs->full_open_options);
4338             bs->full_open_options = NULL;
4339         }
4340 
4341         opts = qdict_new();
4342         has_open_options = append_open_options(opts, bs);
4343 
4344         /* If no specific options have been given for this BDS, the filename of
4345          * the underlying file should suffice for this one as well */
4346         if (bs->file->bs->exact_filename[0] && !has_open_options) {
4347             strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4348         }
4349         /* Reconstructing the full options QDict is simple for most format block
4350          * drivers, as long as the full options are known for the underlying
4351          * file BDS. The full options QDict of that file BDS should somehow
4352          * contain a representation of the filename, therefore the following
4353          * suffices without querying the (exact_)filename of this BDS. */
4354         if (bs->file->bs->full_open_options) {
4355             qdict_put_obj(opts, "driver",
4356                           QOBJECT(qstring_from_str(drv->format_name)));
4357             QINCREF(bs->file->bs->full_open_options);
4358             qdict_put_obj(opts, "file",
4359                           QOBJECT(bs->file->bs->full_open_options));
4360 
4361             bs->full_open_options = opts;
4362         } else {
4363             QDECREF(opts);
4364         }
4365     } else if (!bs->full_open_options && qdict_size(bs->options)) {
4366         /* There is no underlying file BDS (at least referenced by BDS.file),
4367          * so the full options QDict should be equal to the options given
4368          * specifically for this block device when it was opened (plus the
4369          * driver specification).
4370          * Because those options don't change, there is no need to update
4371          * full_open_options when it's already set. */
4372 
4373         opts = qdict_new();
4374         append_open_options(opts, bs);
4375         qdict_put_obj(opts, "driver",
4376                       QOBJECT(qstring_from_str(drv->format_name)));
4377 
4378         if (bs->exact_filename[0]) {
4379             /* This may not work for all block protocol drivers (some may
4380              * require this filename to be parsed), but we have to find some
4381              * default solution here, so just include it. If some block driver
4382              * does not support pure options without any filename at all or
4383              * needs some special format of the options QDict, it needs to
4384              * implement the driver-specific bdrv_refresh_filename() function.
4385              */
4386             qdict_put_obj(opts, "filename",
4387                           QOBJECT(qstring_from_str(bs->exact_filename)));
4388         }
4389 
4390         bs->full_open_options = opts;
4391     }
4392 
4393     if (bs->exact_filename[0]) {
4394         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4395     } else if (bs->full_open_options) {
4396         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4397         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4398                  qstring_get_str(json));
4399         QDECREF(json);
4400     }
4401 }
4402