xref: /qemu/qemu-img.c (revision 27a4a30e)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73     OPTION_TARGET_IS_ZERO = 268,
74 };
75 
76 typedef enum OutputFormat {
77     OFORMAT_JSON,
78     OFORMAT_HUMAN,
79 } OutputFormat;
80 
81 /* Default to cache=writeback as data integrity is not important for qemu-img */
82 #define BDRV_DEFAULT_CACHE "writeback"
83 
84 static void format_print(void *opaque, const char *name)
85 {
86     printf(" %s", name);
87 }
88 
89 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
90 {
91     va_list ap;
92 
93     va_start(ap, fmt);
94     error_vreport(fmt, ap);
95     va_end(ap);
96 
97     error_printf("Try 'qemu-img --help' for more information\n");
98     exit(EXIT_FAILURE);
99 }
100 
101 static void QEMU_NORETURN missing_argument(const char *option)
102 {
103     error_exit("missing argument for option '%s'", option);
104 }
105 
106 static void QEMU_NORETURN unrecognized_option(const char *option)
107 {
108     error_exit("unrecognized option '%s'", option);
109 }
110 
111 /* Please keep in synch with qemu-img.texi */
112 static void QEMU_NORETURN help(void)
113 {
114     const char *help_msg =
115            QEMU_IMG_VERSION
116            "usage: qemu-img [standard options] command [command options]\n"
117            "QEMU disk image utility\n"
118            "\n"
119            "    '-h', '--help'       display this help and exit\n"
120            "    '-V', '--version'    output version information and exit\n"
121            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
122            "                         specify tracing options\n"
123            "\n"
124            "Command syntax:\n"
125 #define DEF(option, callback, arg_string)        \
126            "  " arg_string "\n"
127 #include "qemu-img-cmds.h"
128 #undef DEF
129            "\n"
130            "Command parameters:\n"
131            "  'filename' is a disk image filename\n"
132            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
133            "    manual page for a description of the object properties. The most common\n"
134            "    object type is a 'secret', which is used to supply passwords and/or\n"
135            "    encryption keys.\n"
136            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
137            "  'cache' is the cache mode used to write the output disk image, the valid\n"
138            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
139            "    'directsync' and 'unsafe' (default for convert)\n"
140            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
141            "    options are the same as for the 'cache' option\n"
142            "  'size' is the disk image size in bytes. Optional suffixes\n"
143            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
144            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
145            "    supported. 'b' is ignored.\n"
146            "  'output_filename' is the destination disk image filename\n"
147            "  'output_fmt' is the destination format\n"
148            "  'options' is a comma separated list of format specific options in a\n"
149            "    name=value format. Use -o ? for an overview of the options supported by the\n"
150            "    used format\n"
151            "  'snapshot_param' is param used for internal snapshot, format\n"
152            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
153            "    '[ID_OR_NAME]'\n"
154            "  '-c' indicates that target image must be compressed (qcow format only)\n"
155            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
156            "       new backing file match exactly. The image doesn't need a working\n"
157            "       backing file before rebasing in this case (useful for renaming the\n"
158            "       backing file). For image creation, allow creating without attempting\n"
159            "       to open the backing file.\n"
160            "  '-h' with or without a command shows this help and lists the supported formats\n"
161            "  '-p' show progress of command (only certain commands)\n"
162            "  '-q' use Quiet mode - do not print any output (except errors)\n"
163            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
164            "       contain only zeros for qemu-img to create a sparse image during\n"
165            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
166            "       unallocated or zero sectors, and the destination image will always be\n"
167            "       fully allocated\n"
168            "  '--output' takes the format in which the output must be done (human or json)\n"
169            "  '-n' skips the target volume creation (useful if the volume is created\n"
170            "       prior to running qemu-img)\n"
171            "\n"
172            "Parameters to check subcommand:\n"
173            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
174            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
175            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
176            "       hiding corruption that has already occurred.\n"
177            "\n"
178            "Parameters to convert subcommand:\n"
179            "  '-m' specifies how many coroutines work in parallel during the convert\n"
180            "       process (defaults to 8)\n"
181            "  '-W' allow to write to the target out of order rather than sequential\n"
182            "\n"
183            "Parameters to snapshot subcommand:\n"
184            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
185            "  '-a' applies a snapshot (revert disk to saved state)\n"
186            "  '-c' creates a snapshot\n"
187            "  '-d' deletes a snapshot\n"
188            "  '-l' lists all snapshots in the given image\n"
189            "\n"
190            "Parameters to compare subcommand:\n"
191            "  '-f' first image format\n"
192            "  '-F' second image format\n"
193            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
194            "\n"
195            "Parameters to dd subcommand:\n"
196            "  'bs=BYTES' read and write up to BYTES bytes at a time "
197            "(default: 512)\n"
198            "  'count=N' copy only N input blocks\n"
199            "  'if=FILE' read from FILE\n"
200            "  'of=FILE' write to FILE\n"
201            "  'skip=N' skip N bs-sized blocks at the start of input\n";
202 
203     printf("%s\nSupported formats:", help_msg);
204     bdrv_iterate_format(format_print, NULL, false);
205     printf("\n\n" QEMU_HELP_BOTTOM "\n");
206     exit(EXIT_SUCCESS);
207 }
208 
209 static QemuOptsList qemu_object_opts = {
210     .name = "object",
211     .implied_opt_name = "qom-type",
212     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
213     .desc = {
214         { }
215     },
216 };
217 
218 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
219 {
220     if (user_creatable_print_help(type, opts)) {
221         exit(0);
222     }
223     return true;
224 }
225 
226 static QemuOptsList qemu_source_opts = {
227     .name = "source",
228     .implied_opt_name = "file",
229     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
230     .desc = {
231         { }
232     },
233 };
234 
235 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
236 {
237     int ret = 0;
238     if (!quiet) {
239         va_list args;
240         va_start(args, fmt);
241         ret = vprintf(fmt, args);
242         va_end(args);
243     }
244     return ret;
245 }
246 
247 
248 static int print_block_option_help(const char *filename, const char *fmt)
249 {
250     BlockDriver *drv, *proto_drv;
251     QemuOptsList *create_opts = NULL;
252     Error *local_err = NULL;
253 
254     /* Find driver and parse its options */
255     drv = bdrv_find_format(fmt);
256     if (!drv) {
257         error_report("Unknown file format '%s'", fmt);
258         return 1;
259     }
260 
261     if (!drv->create_opts) {
262         error_report("Format driver '%s' does not support image creation", fmt);
263         return 1;
264     }
265 
266     create_opts = qemu_opts_append(create_opts, drv->create_opts);
267     if (filename) {
268         proto_drv = bdrv_find_protocol(filename, true, &local_err);
269         if (!proto_drv) {
270             error_report_err(local_err);
271             qemu_opts_free(create_opts);
272             return 1;
273         }
274         if (!proto_drv->create_opts) {
275             error_report("Protocol driver '%s' does not support image creation",
276                          proto_drv->format_name);
277             qemu_opts_free(create_opts);
278             return 1;
279         }
280         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
281     }
282 
283     if (filename) {
284         printf("Supported options:\n");
285     } else {
286         printf("Supported %s options:\n", fmt);
287     }
288     qemu_opts_print_help(create_opts, false);
289     qemu_opts_free(create_opts);
290 
291     if (!filename) {
292         printf("\n"
293                "The protocol level may support further options.\n"
294                "Specify the target filename to include those options.\n");
295     }
296 
297     return 0;
298 }
299 
300 
301 static BlockBackend *img_open_opts(const char *optstr,
302                                    QemuOpts *opts, int flags, bool writethrough,
303                                    bool quiet, bool force_share)
304 {
305     QDict *options;
306     Error *local_err = NULL;
307     BlockBackend *blk;
308     options = qemu_opts_to_qdict(opts, NULL);
309     if (force_share) {
310         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
311             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
312             error_report("--force-share/-U conflicts with image options");
313             qobject_unref(options);
314             return NULL;
315         }
316         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
317     }
318     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
319     if (!blk) {
320         error_reportf_err(local_err, "Could not open '%s': ", optstr);
321         return NULL;
322     }
323     blk_set_enable_write_cache(blk, !writethrough);
324 
325     return blk;
326 }
327 
328 static BlockBackend *img_open_file(const char *filename,
329                                    QDict *options,
330                                    const char *fmt, int flags,
331                                    bool writethrough, bool quiet,
332                                    bool force_share)
333 {
334     BlockBackend *blk;
335     Error *local_err = NULL;
336 
337     if (!options) {
338         options = qdict_new();
339     }
340     if (fmt) {
341         qdict_put_str(options, "driver", fmt);
342     }
343 
344     if (force_share) {
345         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
346     }
347     blk = blk_new_open(filename, NULL, options, flags, &local_err);
348     if (!blk) {
349         error_reportf_err(local_err, "Could not open '%s': ", filename);
350         return NULL;
351     }
352     blk_set_enable_write_cache(blk, !writethrough);
353 
354     return blk;
355 }
356 
357 
358 static int img_add_key_secrets(void *opaque,
359                                const char *name, const char *value,
360                                Error **errp)
361 {
362     QDict *options = opaque;
363 
364     if (g_str_has_suffix(name, "key-secret")) {
365         qdict_put_str(options, name, value);
366     }
367 
368     return 0;
369 }
370 
371 
372 static BlockBackend *img_open(bool image_opts,
373                               const char *filename,
374                               const char *fmt, int flags, bool writethrough,
375                               bool quiet, bool force_share)
376 {
377     BlockBackend *blk;
378     if (image_opts) {
379         QemuOpts *opts;
380         if (fmt) {
381             error_report("--image-opts and --format are mutually exclusive");
382             return NULL;
383         }
384         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
385                                        filename, true);
386         if (!opts) {
387             return NULL;
388         }
389         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
390                             force_share);
391     } else {
392         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
393                             force_share);
394     }
395     return blk;
396 }
397 
398 
399 static int add_old_style_options(const char *fmt, QemuOpts *opts,
400                                  const char *base_filename,
401                                  const char *base_fmt)
402 {
403     Error *err = NULL;
404 
405     if (base_filename) {
406         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
407         if (err) {
408             error_report("Backing file not supported for file format '%s'",
409                          fmt);
410             error_free(err);
411             return -1;
412         }
413     }
414     if (base_fmt) {
415         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
416         if (err) {
417             error_report("Backing file format not supported for file "
418                          "format '%s'", fmt);
419             error_free(err);
420             return -1;
421         }
422     }
423     return 0;
424 }
425 
426 static int64_t cvtnum(const char *s)
427 {
428     int err;
429     uint64_t value;
430 
431     err = qemu_strtosz(s, NULL, &value);
432     if (err < 0) {
433         return err;
434     }
435     if (value > INT64_MAX) {
436         return -ERANGE;
437     }
438     return value;
439 }
440 
441 static int img_create(int argc, char **argv)
442 {
443     int c;
444     uint64_t img_size = -1;
445     const char *fmt = "raw";
446     const char *base_fmt = NULL;
447     const char *filename;
448     const char *base_filename = NULL;
449     char *options = NULL;
450     Error *local_err = NULL;
451     bool quiet = false;
452     int flags = 0;
453 
454     for(;;) {
455         static const struct option long_options[] = {
456             {"help", no_argument, 0, 'h'},
457             {"object", required_argument, 0, OPTION_OBJECT},
458             {0, 0, 0, 0}
459         };
460         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
461                         long_options, NULL);
462         if (c == -1) {
463             break;
464         }
465         switch(c) {
466         case ':':
467             missing_argument(argv[optind - 1]);
468             break;
469         case '?':
470             unrecognized_option(argv[optind - 1]);
471             break;
472         case 'h':
473             help();
474             break;
475         case 'F':
476             base_fmt = optarg;
477             break;
478         case 'b':
479             base_filename = optarg;
480             break;
481         case 'f':
482             fmt = optarg;
483             break;
484         case 'o':
485             if (!is_valid_option_list(optarg)) {
486                 error_report("Invalid option list: %s", optarg);
487                 goto fail;
488             }
489             if (!options) {
490                 options = g_strdup(optarg);
491             } else {
492                 char *old_options = options;
493                 options = g_strdup_printf("%s,%s", options, optarg);
494                 g_free(old_options);
495             }
496             break;
497         case 'q':
498             quiet = true;
499             break;
500         case 'u':
501             flags |= BDRV_O_NO_BACKING;
502             break;
503         case OPTION_OBJECT: {
504             QemuOpts *opts;
505             opts = qemu_opts_parse_noisily(&qemu_object_opts,
506                                            optarg, true);
507             if (!opts) {
508                 goto fail;
509             }
510         }   break;
511         }
512     }
513 
514     /* Get the filename */
515     filename = (optind < argc) ? argv[optind] : NULL;
516     if (options && has_help_option(options)) {
517         g_free(options);
518         return print_block_option_help(filename, fmt);
519     }
520 
521     if (optind >= argc) {
522         error_exit("Expecting image file name");
523     }
524     optind++;
525 
526     if (qemu_opts_foreach(&qemu_object_opts,
527                           user_creatable_add_opts_foreach,
528                           qemu_img_object_print_help, &error_fatal)) {
529         goto fail;
530     }
531 
532     /* Get image size, if specified */
533     if (optind < argc) {
534         int64_t sval;
535 
536         sval = cvtnum(argv[optind++]);
537         if (sval < 0) {
538             if (sval == -ERANGE) {
539                 error_report("Image size must be less than 8 EiB!");
540             } else {
541                 error_report("Invalid image size specified! You may use k, M, "
542                       "G, T, P or E suffixes for ");
543                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
544                              "petabytes and exabytes.");
545             }
546             goto fail;
547         }
548         img_size = (uint64_t)sval;
549     }
550     if (optind != argc) {
551         error_exit("Unexpected argument: %s", argv[optind]);
552     }
553 
554     bdrv_img_create(filename, fmt, base_filename, base_fmt,
555                     options, img_size, flags, quiet, &local_err);
556     if (local_err) {
557         error_reportf_err(local_err, "%s: ", filename);
558         goto fail;
559     }
560 
561     g_free(options);
562     return 0;
563 
564 fail:
565     g_free(options);
566     return 1;
567 }
568 
569 static void dump_json_image_check(ImageCheck *check, bool quiet)
570 {
571     QString *str;
572     QObject *obj;
573     Visitor *v = qobject_output_visitor_new(&obj);
574 
575     visit_type_ImageCheck(v, NULL, &check, &error_abort);
576     visit_complete(v, &obj);
577     str = qobject_to_json_pretty(obj);
578     assert(str != NULL);
579     qprintf(quiet, "%s\n", qstring_get_str(str));
580     qobject_unref(obj);
581     visit_free(v);
582     qobject_unref(str);
583 }
584 
585 static void dump_human_image_check(ImageCheck *check, bool quiet)
586 {
587     if (!(check->corruptions || check->leaks || check->check_errors)) {
588         qprintf(quiet, "No errors were found on the image.\n");
589     } else {
590         if (check->corruptions) {
591             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
592                     "Data may be corrupted, or further writes to the image "
593                     "may corrupt it.\n",
594                     check->corruptions);
595         }
596 
597         if (check->leaks) {
598             qprintf(quiet,
599                     "\n%" PRId64 " leaked clusters were found on the image.\n"
600                     "This means waste of disk space, but no harm to data.\n",
601                     check->leaks);
602         }
603 
604         if (check->check_errors) {
605             qprintf(quiet,
606                     "\n%" PRId64
607                     " internal errors have occurred during the check.\n",
608                     check->check_errors);
609         }
610     }
611 
612     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
613         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
614                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
615                 check->allocated_clusters, check->total_clusters,
616                 check->allocated_clusters * 100.0 / check->total_clusters,
617                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
618                 check->compressed_clusters * 100.0 /
619                 check->allocated_clusters);
620     }
621 
622     if (check->image_end_offset) {
623         qprintf(quiet,
624                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
625     }
626 }
627 
628 static int collect_image_check(BlockDriverState *bs,
629                    ImageCheck *check,
630                    const char *filename,
631                    const char *fmt,
632                    int fix)
633 {
634     int ret;
635     BdrvCheckResult result;
636 
637     ret = bdrv_check(bs, &result, fix);
638     if (ret < 0) {
639         return ret;
640     }
641 
642     check->filename                 = g_strdup(filename);
643     check->format                   = g_strdup(bdrv_get_format_name(bs));
644     check->check_errors             = result.check_errors;
645     check->corruptions              = result.corruptions;
646     check->has_corruptions          = result.corruptions != 0;
647     check->leaks                    = result.leaks;
648     check->has_leaks                = result.leaks != 0;
649     check->corruptions_fixed        = result.corruptions_fixed;
650     check->has_corruptions_fixed    = result.corruptions != 0;
651     check->leaks_fixed              = result.leaks_fixed;
652     check->has_leaks_fixed          = result.leaks != 0;
653     check->image_end_offset         = result.image_end_offset;
654     check->has_image_end_offset     = result.image_end_offset != 0;
655     check->total_clusters           = result.bfi.total_clusters;
656     check->has_total_clusters       = result.bfi.total_clusters != 0;
657     check->allocated_clusters       = result.bfi.allocated_clusters;
658     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
659     check->fragmented_clusters      = result.bfi.fragmented_clusters;
660     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
661     check->compressed_clusters      = result.bfi.compressed_clusters;
662     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
663 
664     return 0;
665 }
666 
667 /*
668  * Checks an image for consistency. Exit codes:
669  *
670  *  0 - Check completed, image is good
671  *  1 - Check not completed because of internal errors
672  *  2 - Check completed, image is corrupted
673  *  3 - Check completed, image has leaked clusters, but is good otherwise
674  * 63 - Checks are not supported by the image format
675  */
676 static int img_check(int argc, char **argv)
677 {
678     int c, ret;
679     OutputFormat output_format = OFORMAT_HUMAN;
680     const char *filename, *fmt, *output, *cache;
681     BlockBackend *blk;
682     BlockDriverState *bs;
683     int fix = 0;
684     int flags = BDRV_O_CHECK;
685     bool writethrough;
686     ImageCheck *check;
687     bool quiet = false;
688     bool image_opts = false;
689     bool force_share = false;
690 
691     fmt = NULL;
692     output = NULL;
693     cache = BDRV_DEFAULT_CACHE;
694 
695     for(;;) {
696         int option_index = 0;
697         static const struct option long_options[] = {
698             {"help", no_argument, 0, 'h'},
699             {"format", required_argument, 0, 'f'},
700             {"repair", required_argument, 0, 'r'},
701             {"output", required_argument, 0, OPTION_OUTPUT},
702             {"object", required_argument, 0, OPTION_OBJECT},
703             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
704             {"force-share", no_argument, 0, 'U'},
705             {0, 0, 0, 0}
706         };
707         c = getopt_long(argc, argv, ":hf:r:T:qU",
708                         long_options, &option_index);
709         if (c == -1) {
710             break;
711         }
712         switch(c) {
713         case ':':
714             missing_argument(argv[optind - 1]);
715             break;
716         case '?':
717             unrecognized_option(argv[optind - 1]);
718             break;
719         case 'h':
720             help();
721             break;
722         case 'f':
723             fmt = optarg;
724             break;
725         case 'r':
726             flags |= BDRV_O_RDWR;
727 
728             if (!strcmp(optarg, "leaks")) {
729                 fix = BDRV_FIX_LEAKS;
730             } else if (!strcmp(optarg, "all")) {
731                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
732             } else {
733                 error_exit("Unknown option value for -r "
734                            "(expecting 'leaks' or 'all'): %s", optarg);
735             }
736             break;
737         case OPTION_OUTPUT:
738             output = optarg;
739             break;
740         case 'T':
741             cache = optarg;
742             break;
743         case 'q':
744             quiet = true;
745             break;
746         case 'U':
747             force_share = true;
748             break;
749         case OPTION_OBJECT: {
750             QemuOpts *opts;
751             opts = qemu_opts_parse_noisily(&qemu_object_opts,
752                                            optarg, true);
753             if (!opts) {
754                 return 1;
755             }
756         }   break;
757         case OPTION_IMAGE_OPTS:
758             image_opts = true;
759             break;
760         }
761     }
762     if (optind != argc - 1) {
763         error_exit("Expecting one image file name");
764     }
765     filename = argv[optind++];
766 
767     if (output && !strcmp(output, "json")) {
768         output_format = OFORMAT_JSON;
769     } else if (output && !strcmp(output, "human")) {
770         output_format = OFORMAT_HUMAN;
771     } else if (output) {
772         error_report("--output must be used with human or json as argument.");
773         return 1;
774     }
775 
776     if (qemu_opts_foreach(&qemu_object_opts,
777                           user_creatable_add_opts_foreach,
778                           qemu_img_object_print_help, &error_fatal)) {
779         return 1;
780     }
781 
782     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
783     if (ret < 0) {
784         error_report("Invalid source cache option: %s", cache);
785         return 1;
786     }
787 
788     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
789                    force_share);
790     if (!blk) {
791         return 1;
792     }
793     bs = blk_bs(blk);
794 
795     check = g_new0(ImageCheck, 1);
796     ret = collect_image_check(bs, check, filename, fmt, fix);
797 
798     if (ret == -ENOTSUP) {
799         error_report("This image format does not support checks");
800         ret = 63;
801         goto fail;
802     }
803 
804     if (check->corruptions_fixed || check->leaks_fixed) {
805         int corruptions_fixed, leaks_fixed;
806 
807         leaks_fixed         = check->leaks_fixed;
808         corruptions_fixed   = check->corruptions_fixed;
809 
810         if (output_format == OFORMAT_HUMAN) {
811             qprintf(quiet,
812                     "The following inconsistencies were found and repaired:\n\n"
813                     "    %" PRId64 " leaked clusters\n"
814                     "    %" PRId64 " corruptions\n\n"
815                     "Double checking the fixed image now...\n",
816                     check->leaks_fixed,
817                     check->corruptions_fixed);
818         }
819 
820         qapi_free_ImageCheck(check);
821         check = g_new0(ImageCheck, 1);
822         ret = collect_image_check(bs, check, filename, fmt, 0);
823 
824         check->leaks_fixed          = leaks_fixed;
825         check->corruptions_fixed    = corruptions_fixed;
826     }
827 
828     if (!ret) {
829         switch (output_format) {
830         case OFORMAT_HUMAN:
831             dump_human_image_check(check, quiet);
832             break;
833         case OFORMAT_JSON:
834             dump_json_image_check(check, quiet);
835             break;
836         }
837     }
838 
839     if (ret || check->check_errors) {
840         if (ret) {
841             error_report("Check failed: %s", strerror(-ret));
842         } else {
843             error_report("Check failed");
844         }
845         ret = 1;
846         goto fail;
847     }
848 
849     if (check->corruptions) {
850         ret = 2;
851     } else if (check->leaks) {
852         ret = 3;
853     } else {
854         ret = 0;
855     }
856 
857 fail:
858     qapi_free_ImageCheck(check);
859     blk_unref(blk);
860     return ret;
861 }
862 
863 typedef struct CommonBlockJobCBInfo {
864     BlockDriverState *bs;
865     Error **errp;
866 } CommonBlockJobCBInfo;
867 
868 static void common_block_job_cb(void *opaque, int ret)
869 {
870     CommonBlockJobCBInfo *cbi = opaque;
871 
872     if (ret < 0) {
873         error_setg_errno(cbi->errp, -ret, "Block job failed");
874     }
875 }
876 
877 static void run_block_job(BlockJob *job, Error **errp)
878 {
879     AioContext *aio_context = blk_get_aio_context(job->blk);
880     int ret = 0;
881 
882     aio_context_acquire(aio_context);
883     job_ref(&job->job);
884     do {
885         float progress = 0.0f;
886         aio_poll(aio_context, true);
887         if (job->job.progress.total) {
888             progress = (float)job->job.progress.current /
889                        job->job.progress.total * 100.f;
890         }
891         qemu_progress_print(progress, 0);
892     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
893 
894     if (!job_is_completed(&job->job)) {
895         ret = job_complete_sync(&job->job, errp);
896     } else {
897         ret = job->job.ret;
898     }
899     job_unref(&job->job);
900     aio_context_release(aio_context);
901 
902     /* publish completion progress only when success */
903     if (!ret) {
904         qemu_progress_print(100.f, 0);
905     }
906 }
907 
908 static int img_commit(int argc, char **argv)
909 {
910     int c, ret, flags;
911     const char *filename, *fmt, *cache, *base;
912     BlockBackend *blk;
913     BlockDriverState *bs, *base_bs;
914     BlockJob *job;
915     bool progress = false, quiet = false, drop = false;
916     bool writethrough;
917     Error *local_err = NULL;
918     CommonBlockJobCBInfo cbi;
919     bool image_opts = false;
920     AioContext *aio_context;
921 
922     fmt = NULL;
923     cache = BDRV_DEFAULT_CACHE;
924     base = NULL;
925     for(;;) {
926         static const struct option long_options[] = {
927             {"help", no_argument, 0, 'h'},
928             {"object", required_argument, 0, OPTION_OBJECT},
929             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
930             {0, 0, 0, 0}
931         };
932         c = getopt_long(argc, argv, ":f:ht:b:dpq",
933                         long_options, NULL);
934         if (c == -1) {
935             break;
936         }
937         switch(c) {
938         case ':':
939             missing_argument(argv[optind - 1]);
940             break;
941         case '?':
942             unrecognized_option(argv[optind - 1]);
943             break;
944         case 'h':
945             help();
946             break;
947         case 'f':
948             fmt = optarg;
949             break;
950         case 't':
951             cache = optarg;
952             break;
953         case 'b':
954             base = optarg;
955             /* -b implies -d */
956             drop = true;
957             break;
958         case 'd':
959             drop = true;
960             break;
961         case 'p':
962             progress = true;
963             break;
964         case 'q':
965             quiet = true;
966             break;
967         case OPTION_OBJECT: {
968             QemuOpts *opts;
969             opts = qemu_opts_parse_noisily(&qemu_object_opts,
970                                            optarg, true);
971             if (!opts) {
972                 return 1;
973             }
974         }   break;
975         case OPTION_IMAGE_OPTS:
976             image_opts = true;
977             break;
978         }
979     }
980 
981     /* Progress is not shown in Quiet mode */
982     if (quiet) {
983         progress = false;
984     }
985 
986     if (optind != argc - 1) {
987         error_exit("Expecting one image file name");
988     }
989     filename = argv[optind++];
990 
991     if (qemu_opts_foreach(&qemu_object_opts,
992                           user_creatable_add_opts_foreach,
993                           qemu_img_object_print_help, &error_fatal)) {
994         return 1;
995     }
996 
997     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
998     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
999     if (ret < 0) {
1000         error_report("Invalid cache option: %s", cache);
1001         return 1;
1002     }
1003 
1004     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1005                    false);
1006     if (!blk) {
1007         return 1;
1008     }
1009     bs = blk_bs(blk);
1010 
1011     qemu_progress_init(progress, 1.f);
1012     qemu_progress_print(0.f, 100);
1013 
1014     if (base) {
1015         base_bs = bdrv_find_backing_image(bs, base);
1016         if (!base_bs) {
1017             error_setg(&local_err,
1018                        "Did not find '%s' in the backing chain of '%s'",
1019                        base, filename);
1020             goto done;
1021         }
1022     } else {
1023         /* This is different from QMP, which by default uses the deepest file in
1024          * the backing chain (i.e., the very base); however, the traditional
1025          * behavior of qemu-img commit is using the immediate backing file. */
1026         base_bs = backing_bs(bs);
1027         if (!base_bs) {
1028             error_setg(&local_err, "Image does not have a backing file");
1029             goto done;
1030         }
1031     }
1032 
1033     cbi = (CommonBlockJobCBInfo){
1034         .errp = &local_err,
1035         .bs   = bs,
1036     };
1037 
1038     aio_context = bdrv_get_aio_context(bs);
1039     aio_context_acquire(aio_context);
1040     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1041                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1042                         &cbi, false, &local_err);
1043     aio_context_release(aio_context);
1044     if (local_err) {
1045         goto done;
1046     }
1047 
1048     /* When the block job completes, the BlockBackend reference will point to
1049      * the old backing file. In order to avoid that the top image is already
1050      * deleted, so we can still empty it afterwards, increment the reference
1051      * counter here preemptively. */
1052     if (!drop) {
1053         bdrv_ref(bs);
1054     }
1055 
1056     job = block_job_get("commit");
1057     assert(job);
1058     run_block_job(job, &local_err);
1059     if (local_err) {
1060         goto unref_backing;
1061     }
1062 
1063     if (!drop && bs->drv->bdrv_make_empty) {
1064         ret = bs->drv->bdrv_make_empty(bs);
1065         if (ret) {
1066             error_setg_errno(&local_err, -ret, "Could not empty %s",
1067                              filename);
1068             goto unref_backing;
1069         }
1070     }
1071 
1072 unref_backing:
1073     if (!drop) {
1074         bdrv_unref(bs);
1075     }
1076 
1077 done:
1078     qemu_progress_end();
1079 
1080     blk_unref(blk);
1081 
1082     if (local_err) {
1083         error_report_err(local_err);
1084         return 1;
1085     }
1086 
1087     qprintf(quiet, "Image committed.\n");
1088     return 0;
1089 }
1090 
1091 /*
1092  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1093  * of the first sector boundary within buf where the sector contains a
1094  * non-zero byte.  This function is robust to a buffer that is not
1095  * sector-aligned.
1096  */
1097 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1098 {
1099     int64_t i;
1100     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1101 
1102     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1103         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1104             return i;
1105         }
1106     }
1107     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1108         return i;
1109     }
1110     return -1;
1111 }
1112 
1113 /*
1114  * Returns true iff the first sector pointed to by 'buf' contains at least
1115  * a non-NUL byte.
1116  *
1117  * 'pnum' is set to the number of sectors (including and immediately following
1118  * the first one) that are known to be in the same allocated/unallocated state.
1119  * The function will try to align the end offset to alignment boundaries so
1120  * that the request will at least end aligned and consequtive requests will
1121  * also start at an aligned offset.
1122  */
1123 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1124                                 int64_t sector_num, int alignment)
1125 {
1126     bool is_zero;
1127     int i, tail;
1128 
1129     if (n <= 0) {
1130         *pnum = 0;
1131         return 0;
1132     }
1133     is_zero = buffer_is_zero(buf, 512);
1134     for(i = 1; i < n; i++) {
1135         buf += 512;
1136         if (is_zero != buffer_is_zero(buf, 512)) {
1137             break;
1138         }
1139     }
1140 
1141     tail = (sector_num + i) & (alignment - 1);
1142     if (tail) {
1143         if (is_zero && i <= tail) {
1144             /* treat unallocated areas which only consist
1145              * of a small tail as allocated. */
1146             is_zero = false;
1147         }
1148         if (!is_zero) {
1149             /* align up end offset of allocated areas. */
1150             i += alignment - tail;
1151             i = MIN(i, n);
1152         } else {
1153             /* align down end offset of zero areas. */
1154             i -= tail;
1155         }
1156     }
1157     *pnum = i;
1158     return !is_zero;
1159 }
1160 
1161 /*
1162  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1163  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1164  * breaking up write requests for only small sparse areas.
1165  */
1166 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1167     int min, int64_t sector_num, int alignment)
1168 {
1169     int ret;
1170     int num_checked, num_used;
1171 
1172     if (n < min) {
1173         min = n;
1174     }
1175 
1176     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1177     if (!ret) {
1178         return ret;
1179     }
1180 
1181     num_used = *pnum;
1182     buf += BDRV_SECTOR_SIZE * *pnum;
1183     n -= *pnum;
1184     sector_num += *pnum;
1185     num_checked = num_used;
1186 
1187     while (n > 0) {
1188         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1189 
1190         buf += BDRV_SECTOR_SIZE * *pnum;
1191         n -= *pnum;
1192         sector_num += *pnum;
1193         num_checked += *pnum;
1194         if (ret) {
1195             num_used = num_checked;
1196         } else if (*pnum >= min) {
1197             break;
1198         }
1199     }
1200 
1201     *pnum = num_used;
1202     return 1;
1203 }
1204 
1205 /*
1206  * Compares two buffers sector by sector. Returns 0 if the first
1207  * sector of each buffer matches, non-zero otherwise.
1208  *
1209  * pnum is set to the sector-aligned size of the buffer prefix that
1210  * has the same matching status as the first sector.
1211  */
1212 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1213                            int64_t bytes, int64_t *pnum)
1214 {
1215     bool res;
1216     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1217 
1218     assert(bytes > 0);
1219 
1220     res = !!memcmp(buf1, buf2, i);
1221     while (i < bytes) {
1222         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1223 
1224         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1225             break;
1226         }
1227         i += len;
1228     }
1229 
1230     *pnum = i;
1231     return res;
1232 }
1233 
1234 #define IO_BUF_SIZE (2 * MiB)
1235 
1236 /*
1237  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1238  *
1239  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1240  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1241  * failure), and 4 on error (the exit status for read errors), after emitting
1242  * an error message.
1243  *
1244  * @param blk:  BlockBackend for the image
1245  * @param offset: Starting offset to check
1246  * @param bytes: Number of bytes to check
1247  * @param filename: Name of disk file we are checking (logging purpose)
1248  * @param buffer: Allocated buffer for storing read data
1249  * @param quiet: Flag for quiet mode
1250  */
1251 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1252                                int64_t bytes, const char *filename,
1253                                uint8_t *buffer, bool quiet)
1254 {
1255     int ret = 0;
1256     int64_t idx;
1257 
1258     ret = blk_pread(blk, offset, buffer, bytes);
1259     if (ret < 0) {
1260         error_report("Error while reading offset %" PRId64 " of %s: %s",
1261                      offset, filename, strerror(-ret));
1262         return 4;
1263     }
1264     idx = find_nonzero(buffer, bytes);
1265     if (idx >= 0) {
1266         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1267                 offset + idx);
1268         return 1;
1269     }
1270 
1271     return 0;
1272 }
1273 
1274 /*
1275  * Compares two images. Exit codes:
1276  *
1277  * 0 - Images are identical
1278  * 1 - Images differ
1279  * >1 - Error occurred
1280  */
1281 static int img_compare(int argc, char **argv)
1282 {
1283     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1284     BlockBackend *blk1, *blk2;
1285     BlockDriverState *bs1, *bs2;
1286     int64_t total_size1, total_size2;
1287     uint8_t *buf1 = NULL, *buf2 = NULL;
1288     int64_t pnum1, pnum2;
1289     int allocated1, allocated2;
1290     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1291     bool progress = false, quiet = false, strict = false;
1292     int flags;
1293     bool writethrough;
1294     int64_t total_size;
1295     int64_t offset = 0;
1296     int64_t chunk;
1297     int c;
1298     uint64_t progress_base;
1299     bool image_opts = false;
1300     bool force_share = false;
1301 
1302     cache = BDRV_DEFAULT_CACHE;
1303     for (;;) {
1304         static const struct option long_options[] = {
1305             {"help", no_argument, 0, 'h'},
1306             {"object", required_argument, 0, OPTION_OBJECT},
1307             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1308             {"force-share", no_argument, 0, 'U'},
1309             {0, 0, 0, 0}
1310         };
1311         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1312                         long_options, NULL);
1313         if (c == -1) {
1314             break;
1315         }
1316         switch (c) {
1317         case ':':
1318             missing_argument(argv[optind - 1]);
1319             break;
1320         case '?':
1321             unrecognized_option(argv[optind - 1]);
1322             break;
1323         case 'h':
1324             help();
1325             break;
1326         case 'f':
1327             fmt1 = optarg;
1328             break;
1329         case 'F':
1330             fmt2 = optarg;
1331             break;
1332         case 'T':
1333             cache = optarg;
1334             break;
1335         case 'p':
1336             progress = true;
1337             break;
1338         case 'q':
1339             quiet = true;
1340             break;
1341         case 's':
1342             strict = true;
1343             break;
1344         case 'U':
1345             force_share = true;
1346             break;
1347         case OPTION_OBJECT: {
1348             QemuOpts *opts;
1349             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1350                                            optarg, true);
1351             if (!opts) {
1352                 ret = 2;
1353                 goto out4;
1354             }
1355         }   break;
1356         case OPTION_IMAGE_OPTS:
1357             image_opts = true;
1358             break;
1359         }
1360     }
1361 
1362     /* Progress is not shown in Quiet mode */
1363     if (quiet) {
1364         progress = false;
1365     }
1366 
1367 
1368     if (optind != argc - 2) {
1369         error_exit("Expecting two image file names");
1370     }
1371     filename1 = argv[optind++];
1372     filename2 = argv[optind++];
1373 
1374     if (qemu_opts_foreach(&qemu_object_opts,
1375                           user_creatable_add_opts_foreach,
1376                           qemu_img_object_print_help, &error_fatal)) {
1377         ret = 2;
1378         goto out4;
1379     }
1380 
1381     /* Initialize before goto out */
1382     qemu_progress_init(progress, 2.0);
1383 
1384     flags = 0;
1385     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1386     if (ret < 0) {
1387         error_report("Invalid source cache option: %s", cache);
1388         ret = 2;
1389         goto out3;
1390     }
1391 
1392     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1393                     force_share);
1394     if (!blk1) {
1395         ret = 2;
1396         goto out3;
1397     }
1398 
1399     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1400                     force_share);
1401     if (!blk2) {
1402         ret = 2;
1403         goto out2;
1404     }
1405     bs1 = blk_bs(blk1);
1406     bs2 = blk_bs(blk2);
1407 
1408     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1409     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1410     total_size1 = blk_getlength(blk1);
1411     if (total_size1 < 0) {
1412         error_report("Can't get size of %s: %s",
1413                      filename1, strerror(-total_size1));
1414         ret = 4;
1415         goto out;
1416     }
1417     total_size2 = blk_getlength(blk2);
1418     if (total_size2 < 0) {
1419         error_report("Can't get size of %s: %s",
1420                      filename2, strerror(-total_size2));
1421         ret = 4;
1422         goto out;
1423     }
1424     total_size = MIN(total_size1, total_size2);
1425     progress_base = MAX(total_size1, total_size2);
1426 
1427     qemu_progress_print(0, 100);
1428 
1429     if (strict && total_size1 != total_size2) {
1430         ret = 1;
1431         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1432         goto out;
1433     }
1434 
1435     while (offset < total_size) {
1436         int status1, status2;
1437 
1438         status1 = bdrv_block_status_above(bs1, NULL, offset,
1439                                           total_size1 - offset, &pnum1, NULL,
1440                                           NULL);
1441         if (status1 < 0) {
1442             ret = 3;
1443             error_report("Sector allocation test failed for %s", filename1);
1444             goto out;
1445         }
1446         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1447 
1448         status2 = bdrv_block_status_above(bs2, NULL, offset,
1449                                           total_size2 - offset, &pnum2, NULL,
1450                                           NULL);
1451         if (status2 < 0) {
1452             ret = 3;
1453             error_report("Sector allocation test failed for %s", filename2);
1454             goto out;
1455         }
1456         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1457 
1458         assert(pnum1 && pnum2);
1459         chunk = MIN(pnum1, pnum2);
1460 
1461         if (strict) {
1462             if (status1 != status2) {
1463                 ret = 1;
1464                 qprintf(quiet, "Strict mode: Offset %" PRId64
1465                         " block status mismatch!\n", offset);
1466                 goto out;
1467             }
1468         }
1469         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1470             /* nothing to do */
1471         } else if (allocated1 == allocated2) {
1472             if (allocated1) {
1473                 int64_t pnum;
1474 
1475                 chunk = MIN(chunk, IO_BUF_SIZE);
1476                 ret = blk_pread(blk1, offset, buf1, chunk);
1477                 if (ret < 0) {
1478                     error_report("Error while reading offset %" PRId64
1479                                  " of %s: %s",
1480                                  offset, filename1, strerror(-ret));
1481                     ret = 4;
1482                     goto out;
1483                 }
1484                 ret = blk_pread(blk2, offset, buf2, chunk);
1485                 if (ret < 0) {
1486                     error_report("Error while reading offset %" PRId64
1487                                  " of %s: %s",
1488                                  offset, filename2, strerror(-ret));
1489                     ret = 4;
1490                     goto out;
1491                 }
1492                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1493                 if (ret || pnum != chunk) {
1494                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1495                             offset + (ret ? 0 : pnum));
1496                     ret = 1;
1497                     goto out;
1498                 }
1499             }
1500         } else {
1501             chunk = MIN(chunk, IO_BUF_SIZE);
1502             if (allocated1) {
1503                 ret = check_empty_sectors(blk1, offset, chunk,
1504                                           filename1, buf1, quiet);
1505             } else {
1506                 ret = check_empty_sectors(blk2, offset, chunk,
1507                                           filename2, buf1, quiet);
1508             }
1509             if (ret) {
1510                 goto out;
1511             }
1512         }
1513         offset += chunk;
1514         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1515     }
1516 
1517     if (total_size1 != total_size2) {
1518         BlockBackend *blk_over;
1519         const char *filename_over;
1520 
1521         qprintf(quiet, "Warning: Image size mismatch!\n");
1522         if (total_size1 > total_size2) {
1523             blk_over = blk1;
1524             filename_over = filename1;
1525         } else {
1526             blk_over = blk2;
1527             filename_over = filename2;
1528         }
1529 
1530         while (offset < progress_base) {
1531             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1532                                           progress_base - offset, &chunk,
1533                                           NULL, NULL);
1534             if (ret < 0) {
1535                 ret = 3;
1536                 error_report("Sector allocation test failed for %s",
1537                              filename_over);
1538                 goto out;
1539 
1540             }
1541             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1542                 chunk = MIN(chunk, IO_BUF_SIZE);
1543                 ret = check_empty_sectors(blk_over, offset, chunk,
1544                                           filename_over, buf1, quiet);
1545                 if (ret) {
1546                     goto out;
1547                 }
1548             }
1549             offset += chunk;
1550             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1551         }
1552     }
1553 
1554     qprintf(quiet, "Images are identical.\n");
1555     ret = 0;
1556 
1557 out:
1558     qemu_vfree(buf1);
1559     qemu_vfree(buf2);
1560     blk_unref(blk2);
1561 out2:
1562     blk_unref(blk1);
1563 out3:
1564     qemu_progress_end();
1565 out4:
1566     return ret;
1567 }
1568 
1569 enum ImgConvertBlockStatus {
1570     BLK_DATA,
1571     BLK_ZERO,
1572     BLK_BACKING_FILE,
1573 };
1574 
1575 #define MAX_COROUTINES 16
1576 
1577 typedef struct ImgConvertState {
1578     BlockBackend **src;
1579     int64_t *src_sectors;
1580     int src_num;
1581     int64_t total_sectors;
1582     int64_t allocated_sectors;
1583     int64_t allocated_done;
1584     int64_t sector_num;
1585     int64_t wr_offs;
1586     enum ImgConvertBlockStatus status;
1587     int64_t sector_next_status;
1588     BlockBackend *target;
1589     bool has_zero_init;
1590     bool compressed;
1591     bool unallocated_blocks_are_zero;
1592     bool target_is_new;
1593     bool target_has_backing;
1594     int64_t target_backing_sectors; /* negative if unknown */
1595     bool wr_in_order;
1596     bool copy_range;
1597     bool salvage;
1598     bool quiet;
1599     int min_sparse;
1600     int alignment;
1601     size_t cluster_sectors;
1602     size_t buf_sectors;
1603     long num_coroutines;
1604     int running_coroutines;
1605     Coroutine *co[MAX_COROUTINES];
1606     int64_t wait_sector_num[MAX_COROUTINES];
1607     CoMutex lock;
1608     int ret;
1609 } ImgConvertState;
1610 
1611 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1612                                 int *src_cur, int64_t *src_cur_offset)
1613 {
1614     *src_cur = 0;
1615     *src_cur_offset = 0;
1616     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1617         *src_cur_offset += s->src_sectors[*src_cur];
1618         (*src_cur)++;
1619         assert(*src_cur < s->src_num);
1620     }
1621 }
1622 
1623 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1624 {
1625     int64_t src_cur_offset;
1626     int ret, n, src_cur;
1627     bool post_backing_zero = false;
1628 
1629     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1630 
1631     assert(s->total_sectors > sector_num);
1632     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1633 
1634     if (s->target_backing_sectors >= 0) {
1635         if (sector_num >= s->target_backing_sectors) {
1636             post_backing_zero = s->unallocated_blocks_are_zero;
1637         } else if (sector_num + n > s->target_backing_sectors) {
1638             /* Split requests around target_backing_sectors (because
1639              * starting from there, zeros are handled differently) */
1640             n = s->target_backing_sectors - sector_num;
1641         }
1642     }
1643 
1644     if (s->sector_next_status <= sector_num) {
1645         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1646         int64_t count;
1647 
1648         do {
1649             count = n * BDRV_SECTOR_SIZE;
1650 
1651             if (s->target_has_backing) {
1652                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1653                                         count, &count, NULL, NULL);
1654             } else {
1655                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1656                                               offset, count, &count, NULL,
1657                                               NULL);
1658             }
1659 
1660             if (ret < 0) {
1661                 if (s->salvage) {
1662                     if (n == 1) {
1663                         if (!s->quiet) {
1664                             warn_report("error while reading block status at "
1665                                         "offset %" PRIu64 ": %s", offset,
1666                                         strerror(-ret));
1667                         }
1668                         /* Just try to read the data, then */
1669                         ret = BDRV_BLOCK_DATA;
1670                         count = BDRV_SECTOR_SIZE;
1671                     } else {
1672                         /* Retry on a shorter range */
1673                         n = DIV_ROUND_UP(n, 4);
1674                     }
1675                 } else {
1676                     error_report("error while reading block status at offset "
1677                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1678                     return ret;
1679                 }
1680             }
1681         } while (ret < 0);
1682 
1683         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1684 
1685         if (ret & BDRV_BLOCK_ZERO) {
1686             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1687         } else if (ret & BDRV_BLOCK_DATA) {
1688             s->status = BLK_DATA;
1689         } else {
1690             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1691         }
1692 
1693         s->sector_next_status = sector_num + n;
1694     }
1695 
1696     n = MIN(n, s->sector_next_status - sector_num);
1697     if (s->status == BLK_DATA) {
1698         n = MIN(n, s->buf_sectors);
1699     }
1700 
1701     /* We need to write complete clusters for compressed images, so if an
1702      * unallocated area is shorter than that, we must consider the whole
1703      * cluster allocated. */
1704     if (s->compressed) {
1705         if (n < s->cluster_sectors) {
1706             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1707             s->status = BLK_DATA;
1708         } else {
1709             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1710         }
1711     }
1712 
1713     return n;
1714 }
1715 
1716 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1717                                         int nb_sectors, uint8_t *buf)
1718 {
1719     uint64_t single_read_until = 0;
1720     int n, ret;
1721 
1722     assert(nb_sectors <= s->buf_sectors);
1723     while (nb_sectors > 0) {
1724         BlockBackend *blk;
1725         int src_cur;
1726         int64_t bs_sectors, src_cur_offset;
1727         uint64_t offset;
1728 
1729         /* In the case of compression with multiple source files, we can get a
1730          * nb_sectors that spreads into the next part. So we must be able to
1731          * read across multiple BDSes for one convert_read() call. */
1732         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1733         blk = s->src[src_cur];
1734         bs_sectors = s->src_sectors[src_cur];
1735 
1736         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1737 
1738         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1739         if (single_read_until > offset) {
1740             n = 1;
1741         }
1742 
1743         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1744         if (ret < 0) {
1745             if (s->salvage) {
1746                 if (n > 1) {
1747                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1748                     continue;
1749                 } else {
1750                     if (!s->quiet) {
1751                         warn_report("error while reading offset %" PRIu64
1752                                     ": %s", offset, strerror(-ret));
1753                     }
1754                     memset(buf, 0, BDRV_SECTOR_SIZE);
1755                 }
1756             } else {
1757                 return ret;
1758             }
1759         }
1760 
1761         sector_num += n;
1762         nb_sectors -= n;
1763         buf += n * BDRV_SECTOR_SIZE;
1764     }
1765 
1766     return 0;
1767 }
1768 
1769 
1770 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1771                                          int nb_sectors, uint8_t *buf,
1772                                          enum ImgConvertBlockStatus status)
1773 {
1774     int ret;
1775 
1776     while (nb_sectors > 0) {
1777         int n = nb_sectors;
1778         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1779 
1780         switch (status) {
1781         case BLK_BACKING_FILE:
1782             /* If we have a backing file, leave clusters unallocated that are
1783              * unallocated in the source image, so that the backing file is
1784              * visible at the respective offset. */
1785             assert(s->target_has_backing);
1786             break;
1787 
1788         case BLK_DATA:
1789             /* If we're told to keep the target fully allocated (-S 0) or there
1790              * is real non-zero data, we must write it. Otherwise we can treat
1791              * it as zero sectors.
1792              * Compressed clusters need to be written as a whole, so in that
1793              * case we can only save the write if the buffer is completely
1794              * zeroed. */
1795             if (!s->min_sparse ||
1796                 (!s->compressed &&
1797                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1798                                           sector_num, s->alignment)) ||
1799                 (s->compressed &&
1800                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1801             {
1802                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1803                                     n << BDRV_SECTOR_BITS, buf, flags);
1804                 if (ret < 0) {
1805                     return ret;
1806                 }
1807                 break;
1808             }
1809             /* fall-through */
1810 
1811         case BLK_ZERO:
1812             if (s->has_zero_init) {
1813                 assert(!s->target_has_backing);
1814                 break;
1815             }
1816             ret = blk_co_pwrite_zeroes(s->target,
1817                                        sector_num << BDRV_SECTOR_BITS,
1818                                        n << BDRV_SECTOR_BITS,
1819                                        BDRV_REQ_MAY_UNMAP);
1820             if (ret < 0) {
1821                 return ret;
1822             }
1823             break;
1824         }
1825 
1826         sector_num += n;
1827         nb_sectors -= n;
1828         buf += n * BDRV_SECTOR_SIZE;
1829     }
1830 
1831     return 0;
1832 }
1833 
1834 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1835                                               int nb_sectors)
1836 {
1837     int n, ret;
1838 
1839     while (nb_sectors > 0) {
1840         BlockBackend *blk;
1841         int src_cur;
1842         int64_t bs_sectors, src_cur_offset;
1843         int64_t offset;
1844 
1845         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1846         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1847         blk = s->src[src_cur];
1848         bs_sectors = s->src_sectors[src_cur];
1849 
1850         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1851 
1852         ret = blk_co_copy_range(blk, offset, s->target,
1853                                 sector_num << BDRV_SECTOR_BITS,
1854                                 n << BDRV_SECTOR_BITS, 0, 0);
1855         if (ret < 0) {
1856             return ret;
1857         }
1858 
1859         sector_num += n;
1860         nb_sectors -= n;
1861     }
1862     return 0;
1863 }
1864 
1865 static void coroutine_fn convert_co_do_copy(void *opaque)
1866 {
1867     ImgConvertState *s = opaque;
1868     uint8_t *buf = NULL;
1869     int ret, i;
1870     int index = -1;
1871 
1872     for (i = 0; i < s->num_coroutines; i++) {
1873         if (s->co[i] == qemu_coroutine_self()) {
1874             index = i;
1875             break;
1876         }
1877     }
1878     assert(index >= 0);
1879 
1880     s->running_coroutines++;
1881     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1882 
1883     while (1) {
1884         int n;
1885         int64_t sector_num;
1886         enum ImgConvertBlockStatus status;
1887         bool copy_range;
1888 
1889         qemu_co_mutex_lock(&s->lock);
1890         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1891             qemu_co_mutex_unlock(&s->lock);
1892             break;
1893         }
1894         n = convert_iteration_sectors(s, s->sector_num);
1895         if (n < 0) {
1896             qemu_co_mutex_unlock(&s->lock);
1897             s->ret = n;
1898             break;
1899         }
1900         /* save current sector and allocation status to local variables */
1901         sector_num = s->sector_num;
1902         status = s->status;
1903         if (!s->min_sparse && s->status == BLK_ZERO) {
1904             n = MIN(n, s->buf_sectors);
1905         }
1906         /* increment global sector counter so that other coroutines can
1907          * already continue reading beyond this request */
1908         s->sector_num += n;
1909         qemu_co_mutex_unlock(&s->lock);
1910 
1911         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1912             s->allocated_done += n;
1913             qemu_progress_print(100.0 * s->allocated_done /
1914                                         s->allocated_sectors, 0);
1915         }
1916 
1917 retry:
1918         copy_range = s->copy_range && s->status == BLK_DATA;
1919         if (status == BLK_DATA && !copy_range) {
1920             ret = convert_co_read(s, sector_num, n, buf);
1921             if (ret < 0) {
1922                 error_report("error while reading sector %" PRId64
1923                              ": %s", sector_num, strerror(-ret));
1924                 s->ret = ret;
1925             }
1926         } else if (!s->min_sparse && status == BLK_ZERO) {
1927             status = BLK_DATA;
1928             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1929         }
1930 
1931         if (s->wr_in_order) {
1932             /* keep writes in order */
1933             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1934                 s->wait_sector_num[index] = sector_num;
1935                 qemu_coroutine_yield();
1936             }
1937             s->wait_sector_num[index] = -1;
1938         }
1939 
1940         if (s->ret == -EINPROGRESS) {
1941             if (copy_range) {
1942                 ret = convert_co_copy_range(s, sector_num, n);
1943                 if (ret) {
1944                     s->copy_range = false;
1945                     goto retry;
1946                 }
1947             } else {
1948                 ret = convert_co_write(s, sector_num, n, buf, status);
1949             }
1950             if (ret < 0) {
1951                 error_report("error while writing sector %" PRId64
1952                              ": %s", sector_num, strerror(-ret));
1953                 s->ret = ret;
1954             }
1955         }
1956 
1957         if (s->wr_in_order) {
1958             /* reenter the coroutine that might have waited
1959              * for this write to complete */
1960             s->wr_offs = sector_num + n;
1961             for (i = 0; i < s->num_coroutines; i++) {
1962                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1963                     /*
1964                      * A -> B -> A cannot occur because A has
1965                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1966                      * B will never enter A during this time window.
1967                      */
1968                     qemu_coroutine_enter(s->co[i]);
1969                     break;
1970                 }
1971             }
1972         }
1973     }
1974 
1975     qemu_vfree(buf);
1976     s->co[index] = NULL;
1977     s->running_coroutines--;
1978     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1979         /* the convert job finished successfully */
1980         s->ret = 0;
1981     }
1982 }
1983 
1984 static int convert_do_copy(ImgConvertState *s)
1985 {
1986     int ret, i, n;
1987     int64_t sector_num = 0;
1988 
1989     /* Check whether we have zero initialisation or can get it efficiently */
1990     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
1991         !s->target_has_backing) {
1992         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1993     }
1994 
1995     if (!s->has_zero_init && !s->target_has_backing &&
1996         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1997     {
1998         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
1999         if (ret == 0) {
2000             s->has_zero_init = true;
2001         }
2002     }
2003 
2004     /* Allocate buffer for copied data. For compressed images, only one cluster
2005      * can be copied at a time. */
2006     if (s->compressed) {
2007         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2008             error_report("invalid cluster size");
2009             return -EINVAL;
2010         }
2011         s->buf_sectors = s->cluster_sectors;
2012     }
2013 
2014     while (sector_num < s->total_sectors) {
2015         n = convert_iteration_sectors(s, sector_num);
2016         if (n < 0) {
2017             return n;
2018         }
2019         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2020         {
2021             s->allocated_sectors += n;
2022         }
2023         sector_num += n;
2024     }
2025 
2026     /* Do the copy */
2027     s->sector_next_status = 0;
2028     s->ret = -EINPROGRESS;
2029 
2030     qemu_co_mutex_init(&s->lock);
2031     for (i = 0; i < s->num_coroutines; i++) {
2032         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2033         s->wait_sector_num[i] = -1;
2034         qemu_coroutine_enter(s->co[i]);
2035     }
2036 
2037     while (s->running_coroutines) {
2038         main_loop_wait(false);
2039     }
2040 
2041     if (s->compressed && !s->ret) {
2042         /* signal EOF to align */
2043         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2044         if (ret < 0) {
2045             return ret;
2046         }
2047     }
2048 
2049     return s->ret;
2050 }
2051 
2052 #define MAX_BUF_SECTORS 32768
2053 
2054 static int img_convert(int argc, char **argv)
2055 {
2056     int c, bs_i, flags, src_flags = 0;
2057     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2058                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2059                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2060     BlockDriver *drv = NULL, *proto_drv = NULL;
2061     BlockDriverInfo bdi;
2062     BlockDriverState *out_bs;
2063     QemuOpts *opts = NULL, *sn_opts = NULL;
2064     QemuOptsList *create_opts = NULL;
2065     QDict *open_opts = NULL;
2066     char *options = NULL;
2067     Error *local_err = NULL;
2068     bool writethrough, src_writethrough, image_opts = false,
2069          skip_create = false, progress = false, tgt_image_opts = false;
2070     int64_t ret = -EINVAL;
2071     bool force_share = false;
2072     bool explict_min_sparse = false;
2073 
2074     ImgConvertState s = (ImgConvertState) {
2075         /* Need at least 4k of zeros for sparse detection */
2076         .min_sparse         = 8,
2077         .copy_range         = false,
2078         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2079         .wr_in_order        = true,
2080         .num_coroutines     = 8,
2081     };
2082 
2083     for(;;) {
2084         static const struct option long_options[] = {
2085             {"help", no_argument, 0, 'h'},
2086             {"object", required_argument, 0, OPTION_OBJECT},
2087             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2088             {"force-share", no_argument, 0, 'U'},
2089             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2090             {"salvage", no_argument, 0, OPTION_SALVAGE},
2091             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2092             {0, 0, 0, 0}
2093         };
2094         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2095                         long_options, NULL);
2096         if (c == -1) {
2097             break;
2098         }
2099         switch(c) {
2100         case ':':
2101             missing_argument(argv[optind - 1]);
2102             break;
2103         case '?':
2104             unrecognized_option(argv[optind - 1]);
2105             break;
2106         case 'h':
2107             help();
2108             break;
2109         case 'f':
2110             fmt = optarg;
2111             break;
2112         case 'O':
2113             out_fmt = optarg;
2114             break;
2115         case 'B':
2116             out_baseimg = optarg;
2117             break;
2118         case 'C':
2119             s.copy_range = true;
2120             break;
2121         case 'c':
2122             s.compressed = true;
2123             break;
2124         case 'o':
2125             if (!is_valid_option_list(optarg)) {
2126                 error_report("Invalid option list: %s", optarg);
2127                 goto fail_getopt;
2128             }
2129             if (!options) {
2130                 options = g_strdup(optarg);
2131             } else {
2132                 char *old_options = options;
2133                 options = g_strdup_printf("%s,%s", options, optarg);
2134                 g_free(old_options);
2135             }
2136             break;
2137         case 'l':
2138             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2139                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2140                                                   optarg, false);
2141                 if (!sn_opts) {
2142                     error_report("Failed in parsing snapshot param '%s'",
2143                                  optarg);
2144                     goto fail_getopt;
2145                 }
2146             } else {
2147                 snapshot_name = optarg;
2148             }
2149             break;
2150         case 'S':
2151         {
2152             int64_t sval;
2153 
2154             sval = cvtnum(optarg);
2155             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2156                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2157                 error_report("Invalid buffer size for sparse output specified. "
2158                     "Valid sizes are multiples of %llu up to %llu. Select "
2159                     "0 to disable sparse detection (fully allocates output).",
2160                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2161                 goto fail_getopt;
2162             }
2163 
2164             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2165             explict_min_sparse = true;
2166             break;
2167         }
2168         case 'p':
2169             progress = true;
2170             break;
2171         case 't':
2172             cache = optarg;
2173             break;
2174         case 'T':
2175             src_cache = optarg;
2176             break;
2177         case 'q':
2178             s.quiet = true;
2179             break;
2180         case 'n':
2181             skip_create = true;
2182             break;
2183         case 'm':
2184             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2185                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2186                 error_report("Invalid number of coroutines. Allowed number of"
2187                              " coroutines is between 1 and %d", MAX_COROUTINES);
2188                 goto fail_getopt;
2189             }
2190             break;
2191         case 'W':
2192             s.wr_in_order = false;
2193             break;
2194         case 'U':
2195             force_share = true;
2196             break;
2197         case OPTION_OBJECT: {
2198             QemuOpts *object_opts;
2199             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2200                                                   optarg, true);
2201             if (!object_opts) {
2202                 goto fail_getopt;
2203             }
2204             break;
2205         }
2206         case OPTION_IMAGE_OPTS:
2207             image_opts = true;
2208             break;
2209         case OPTION_SALVAGE:
2210             s.salvage = true;
2211             break;
2212         case OPTION_TARGET_IMAGE_OPTS:
2213             tgt_image_opts = true;
2214             break;
2215         case OPTION_TARGET_IS_ZERO:
2216             /*
2217              * The user asserting that the target is blank has the
2218              * same effect as the target driver supporting zero
2219              * initialisation.
2220              */
2221             s.has_zero_init = true;
2222             break;
2223         }
2224     }
2225 
2226     if (!out_fmt && !tgt_image_opts) {
2227         out_fmt = "raw";
2228     }
2229 
2230     if (qemu_opts_foreach(&qemu_object_opts,
2231                           user_creatable_add_opts_foreach,
2232                           qemu_img_object_print_help, &error_fatal)) {
2233         goto fail_getopt;
2234     }
2235 
2236     if (s.compressed && s.copy_range) {
2237         error_report("Cannot enable copy offloading when -c is used");
2238         goto fail_getopt;
2239     }
2240 
2241     if (explict_min_sparse && s.copy_range) {
2242         error_report("Cannot enable copy offloading when -S is used");
2243         goto fail_getopt;
2244     }
2245 
2246     if (s.copy_range && s.salvage) {
2247         error_report("Cannot use copy offloading in salvaging mode");
2248         goto fail_getopt;
2249     }
2250 
2251     if (tgt_image_opts && !skip_create) {
2252         error_report("--target-image-opts requires use of -n flag");
2253         goto fail_getopt;
2254     }
2255 
2256     if (skip_create && options) {
2257         warn_report("-o has no effect when skipping image creation");
2258         warn_report("This will become an error in future QEMU versions.");
2259     }
2260 
2261     if (s.has_zero_init && !skip_create) {
2262         error_report("--target-is-zero requires use of -n flag");
2263         goto fail_getopt;
2264     }
2265 
2266     s.src_num = argc - optind - 1;
2267     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2268 
2269     if (options && has_help_option(options)) {
2270         if (out_fmt) {
2271             ret = print_block_option_help(out_filename, out_fmt);
2272             goto fail_getopt;
2273         } else {
2274             error_report("Option help requires a format be specified");
2275             goto fail_getopt;
2276         }
2277     }
2278 
2279     if (s.src_num < 1) {
2280         error_report("Must specify image file name");
2281         goto fail_getopt;
2282     }
2283 
2284 
2285     /* ret is still -EINVAL until here */
2286     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2287     if (ret < 0) {
2288         error_report("Invalid source cache option: %s", src_cache);
2289         goto fail_getopt;
2290     }
2291 
2292     /* Initialize before goto out */
2293     if (s.quiet) {
2294         progress = false;
2295     }
2296     qemu_progress_init(progress, 1.0);
2297     qemu_progress_print(0, 100);
2298 
2299     s.src = g_new0(BlockBackend *, s.src_num);
2300     s.src_sectors = g_new(int64_t, s.src_num);
2301 
2302     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2303         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2304                                fmt, src_flags, src_writethrough, s.quiet,
2305                                force_share);
2306         if (!s.src[bs_i]) {
2307             ret = -1;
2308             goto out;
2309         }
2310         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2311         if (s.src_sectors[bs_i] < 0) {
2312             error_report("Could not get size of %s: %s",
2313                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2314             ret = -1;
2315             goto out;
2316         }
2317         s.total_sectors += s.src_sectors[bs_i];
2318     }
2319 
2320     if (sn_opts) {
2321         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2322                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2323                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2324                                &local_err);
2325     } else if (snapshot_name != NULL) {
2326         if (s.src_num > 1) {
2327             error_report("No support for concatenating multiple snapshot");
2328             ret = -1;
2329             goto out;
2330         }
2331 
2332         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2333                                              &local_err);
2334     }
2335     if (local_err) {
2336         error_reportf_err(local_err, "Failed to load snapshot: ");
2337         ret = -1;
2338         goto out;
2339     }
2340 
2341     if (!skip_create) {
2342         /* Find driver and parse its options */
2343         drv = bdrv_find_format(out_fmt);
2344         if (!drv) {
2345             error_report("Unknown file format '%s'", out_fmt);
2346             ret = -1;
2347             goto out;
2348         }
2349 
2350         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2351         if (!proto_drv) {
2352             error_report_err(local_err);
2353             ret = -1;
2354             goto out;
2355         }
2356 
2357         if (!drv->create_opts) {
2358             error_report("Format driver '%s' does not support image creation",
2359                          drv->format_name);
2360             ret = -1;
2361             goto out;
2362         }
2363 
2364         if (!proto_drv->create_opts) {
2365             error_report("Protocol driver '%s' does not support image creation",
2366                          proto_drv->format_name);
2367             ret = -1;
2368             goto out;
2369         }
2370 
2371         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2372         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2373 
2374         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2375         if (options) {
2376             qemu_opts_do_parse(opts, options, NULL, &local_err);
2377             if (local_err) {
2378                 error_report_err(local_err);
2379                 ret = -1;
2380                 goto out;
2381             }
2382         }
2383 
2384         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2385                             &error_abort);
2386         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2387         if (ret < 0) {
2388             goto out;
2389         }
2390     }
2391 
2392     /* Get backing file name if -o backing_file was used */
2393     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2394     if (out_baseimg_param) {
2395         out_baseimg = out_baseimg_param;
2396     }
2397     s.target_has_backing = (bool) out_baseimg;
2398 
2399     if (s.has_zero_init && s.target_has_backing) {
2400         error_report("Cannot use --target-is-zero when the destination "
2401                      "image has a backing file");
2402         goto out;
2403     }
2404 
2405     if (s.src_num > 1 && out_baseimg) {
2406         error_report("Having a backing file for the target makes no sense when "
2407                      "concatenating multiple input images");
2408         ret = -1;
2409         goto out;
2410     }
2411 
2412     /* Check if compression is supported */
2413     if (s.compressed) {
2414         bool encryption =
2415             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2416         const char *encryptfmt =
2417             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2418         const char *preallocation =
2419             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2420 
2421         if (drv && !block_driver_can_compress(drv)) {
2422             error_report("Compression not supported for this file format");
2423             ret = -1;
2424             goto out;
2425         }
2426 
2427         if (encryption || encryptfmt) {
2428             error_report("Compression and encryption not supported at "
2429                          "the same time");
2430             ret = -1;
2431             goto out;
2432         }
2433 
2434         if (preallocation
2435             && strcmp(preallocation, "off"))
2436         {
2437             error_report("Compression and preallocation not supported at "
2438                          "the same time");
2439             ret = -1;
2440             goto out;
2441         }
2442     }
2443 
2444     /*
2445      * The later open call will need any decryption secrets, and
2446      * bdrv_create() will purge "opts", so extract them now before
2447      * they are lost.
2448      */
2449     if (!skip_create) {
2450         open_opts = qdict_new();
2451         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2452     }
2453 
2454     if (!skip_create) {
2455         /* Create the new image */
2456         ret = bdrv_create(drv, out_filename, opts, &local_err);
2457         if (ret < 0) {
2458             error_reportf_err(local_err, "%s: error while converting %s: ",
2459                               out_filename, out_fmt);
2460             goto out;
2461         }
2462     }
2463 
2464     s.target_is_new = !skip_create;
2465 
2466     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2467     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2468     if (ret < 0) {
2469         error_report("Invalid cache option: %s", cache);
2470         goto out;
2471     }
2472 
2473     if (skip_create) {
2474         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2475                             flags, writethrough, s.quiet, false);
2476     } else {
2477         /* TODO ultimately we should allow --target-image-opts
2478          * to be used even when -n is not given.
2479          * That has to wait for bdrv_create to be improved
2480          * to allow filenames in option syntax
2481          */
2482         s.target = img_open_file(out_filename, open_opts, out_fmt,
2483                                  flags, writethrough, s.quiet, false);
2484         open_opts = NULL; /* blk_new_open will have freed it */
2485     }
2486     if (!s.target) {
2487         ret = -1;
2488         goto out;
2489     }
2490     out_bs = blk_bs(s.target);
2491 
2492     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2493         error_report("Compression not supported for this file format");
2494         ret = -1;
2495         goto out;
2496     }
2497 
2498     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2499      * or discard_alignment of the out_bs is greater. Limit to
2500      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2501     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2502                         MAX(s.buf_sectors,
2503                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2504                                 out_bs->bl.pdiscard_alignment >>
2505                                 BDRV_SECTOR_BITS)));
2506 
2507     /* try to align the write requests to the destination to avoid unnecessary
2508      * RMW cycles. */
2509     s.alignment = MAX(pow2floor(s.min_sparse),
2510                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2511                                    BDRV_SECTOR_SIZE));
2512     assert(is_power_of_2(s.alignment));
2513 
2514     if (skip_create) {
2515         int64_t output_sectors = blk_nb_sectors(s.target);
2516         if (output_sectors < 0) {
2517             error_report("unable to get output image length: %s",
2518                          strerror(-output_sectors));
2519             ret = -1;
2520             goto out;
2521         } else if (output_sectors < s.total_sectors) {
2522             error_report("output file is smaller than input file");
2523             ret = -1;
2524             goto out;
2525         }
2526     }
2527 
2528     if (s.target_has_backing && s.target_is_new) {
2529         /* Errors are treated as "backing length unknown" (which means
2530          * s.target_backing_sectors has to be negative, which it will
2531          * be automatically).  The backing file length is used only
2532          * for optimizations, so such a case is not fatal. */
2533         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2534     } else {
2535         s.target_backing_sectors = -1;
2536     }
2537 
2538     ret = bdrv_get_info(out_bs, &bdi);
2539     if (ret < 0) {
2540         if (s.compressed) {
2541             error_report("could not get block driver info");
2542             goto out;
2543         }
2544     } else {
2545         s.compressed = s.compressed || bdi.needs_compressed_writes;
2546         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2547         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2548     }
2549 
2550     ret = convert_do_copy(&s);
2551 out:
2552     if (!ret) {
2553         qemu_progress_print(100, 0);
2554     }
2555     qemu_progress_end();
2556     qemu_opts_del(opts);
2557     qemu_opts_free(create_opts);
2558     qemu_opts_del(sn_opts);
2559     qobject_unref(open_opts);
2560     blk_unref(s.target);
2561     if (s.src) {
2562         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2563             blk_unref(s.src[bs_i]);
2564         }
2565         g_free(s.src);
2566     }
2567     g_free(s.src_sectors);
2568 fail_getopt:
2569     g_free(options);
2570 
2571     return !!ret;
2572 }
2573 
2574 
2575 static void dump_snapshots(BlockDriverState *bs)
2576 {
2577     QEMUSnapshotInfo *sn_tab, *sn;
2578     int nb_sns, i;
2579 
2580     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2581     if (nb_sns <= 0)
2582         return;
2583     printf("Snapshot list:\n");
2584     bdrv_snapshot_dump(NULL);
2585     printf("\n");
2586     for(i = 0; i < nb_sns; i++) {
2587         sn = &sn_tab[i];
2588         bdrv_snapshot_dump(sn);
2589         printf("\n");
2590     }
2591     g_free(sn_tab);
2592 }
2593 
2594 static void dump_json_image_info_list(ImageInfoList *list)
2595 {
2596     QString *str;
2597     QObject *obj;
2598     Visitor *v = qobject_output_visitor_new(&obj);
2599 
2600     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2601     visit_complete(v, &obj);
2602     str = qobject_to_json_pretty(obj);
2603     assert(str != NULL);
2604     printf("%s\n", qstring_get_str(str));
2605     qobject_unref(obj);
2606     visit_free(v);
2607     qobject_unref(str);
2608 }
2609 
2610 static void dump_json_image_info(ImageInfo *info)
2611 {
2612     QString *str;
2613     QObject *obj;
2614     Visitor *v = qobject_output_visitor_new(&obj);
2615 
2616     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2617     visit_complete(v, &obj);
2618     str = qobject_to_json_pretty(obj);
2619     assert(str != NULL);
2620     printf("%s\n", qstring_get_str(str));
2621     qobject_unref(obj);
2622     visit_free(v);
2623     qobject_unref(str);
2624 }
2625 
2626 static void dump_human_image_info_list(ImageInfoList *list)
2627 {
2628     ImageInfoList *elem;
2629     bool delim = false;
2630 
2631     for (elem = list; elem; elem = elem->next) {
2632         if (delim) {
2633             printf("\n");
2634         }
2635         delim = true;
2636 
2637         bdrv_image_info_dump(elem->value);
2638     }
2639 }
2640 
2641 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2642 {
2643     return strcmp(a, b) == 0;
2644 }
2645 
2646 /**
2647  * Open an image file chain and return an ImageInfoList
2648  *
2649  * @filename: topmost image filename
2650  * @fmt: topmost image format (may be NULL to autodetect)
2651  * @chain: true  - enumerate entire backing file chain
2652  *         false - only topmost image file
2653  *
2654  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2655  * image file.  If there was an error a message will have been printed to
2656  * stderr.
2657  */
2658 static ImageInfoList *collect_image_info_list(bool image_opts,
2659                                               const char *filename,
2660                                               const char *fmt,
2661                                               bool chain, bool force_share)
2662 {
2663     ImageInfoList *head = NULL;
2664     ImageInfoList **last = &head;
2665     GHashTable *filenames;
2666     Error *err = NULL;
2667 
2668     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2669 
2670     while (filename) {
2671         BlockBackend *blk;
2672         BlockDriverState *bs;
2673         ImageInfo *info;
2674         ImageInfoList *elem;
2675 
2676         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2677             error_report("Backing file '%s' creates an infinite loop.",
2678                          filename);
2679             goto err;
2680         }
2681         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2682 
2683         blk = img_open(image_opts, filename, fmt,
2684                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2685                        force_share);
2686         if (!blk) {
2687             goto err;
2688         }
2689         bs = blk_bs(blk);
2690 
2691         bdrv_query_image_info(bs, &info, &err);
2692         if (err) {
2693             error_report_err(err);
2694             blk_unref(blk);
2695             goto err;
2696         }
2697 
2698         elem = g_new0(ImageInfoList, 1);
2699         elem->value = info;
2700         *last = elem;
2701         last = &elem->next;
2702 
2703         blk_unref(blk);
2704 
2705         /* Clear parameters that only apply to the topmost image */
2706         filename = fmt = NULL;
2707         image_opts = false;
2708 
2709         if (chain) {
2710             if (info->has_full_backing_filename) {
2711                 filename = info->full_backing_filename;
2712             } else if (info->has_backing_filename) {
2713                 error_report("Could not determine absolute backing filename,"
2714                              " but backing filename '%s' present",
2715                              info->backing_filename);
2716                 goto err;
2717             }
2718             if (info->has_backing_filename_format) {
2719                 fmt = info->backing_filename_format;
2720             }
2721         }
2722     }
2723     g_hash_table_destroy(filenames);
2724     return head;
2725 
2726 err:
2727     qapi_free_ImageInfoList(head);
2728     g_hash_table_destroy(filenames);
2729     return NULL;
2730 }
2731 
2732 static int img_info(int argc, char **argv)
2733 {
2734     int c;
2735     OutputFormat output_format = OFORMAT_HUMAN;
2736     bool chain = false;
2737     const char *filename, *fmt, *output;
2738     ImageInfoList *list;
2739     bool image_opts = false;
2740     bool force_share = false;
2741 
2742     fmt = NULL;
2743     output = NULL;
2744     for(;;) {
2745         int option_index = 0;
2746         static const struct option long_options[] = {
2747             {"help", no_argument, 0, 'h'},
2748             {"format", required_argument, 0, 'f'},
2749             {"output", required_argument, 0, OPTION_OUTPUT},
2750             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2751             {"object", required_argument, 0, OPTION_OBJECT},
2752             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2753             {"force-share", no_argument, 0, 'U'},
2754             {0, 0, 0, 0}
2755         };
2756         c = getopt_long(argc, argv, ":f:hU",
2757                         long_options, &option_index);
2758         if (c == -1) {
2759             break;
2760         }
2761         switch(c) {
2762         case ':':
2763             missing_argument(argv[optind - 1]);
2764             break;
2765         case '?':
2766             unrecognized_option(argv[optind - 1]);
2767             break;
2768         case 'h':
2769             help();
2770             break;
2771         case 'f':
2772             fmt = optarg;
2773             break;
2774         case 'U':
2775             force_share = true;
2776             break;
2777         case OPTION_OUTPUT:
2778             output = optarg;
2779             break;
2780         case OPTION_BACKING_CHAIN:
2781             chain = true;
2782             break;
2783         case OPTION_OBJECT: {
2784             QemuOpts *opts;
2785             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2786                                            optarg, true);
2787             if (!opts) {
2788                 return 1;
2789             }
2790         }   break;
2791         case OPTION_IMAGE_OPTS:
2792             image_opts = true;
2793             break;
2794         }
2795     }
2796     if (optind != argc - 1) {
2797         error_exit("Expecting one image file name");
2798     }
2799     filename = argv[optind++];
2800 
2801     if (output && !strcmp(output, "json")) {
2802         output_format = OFORMAT_JSON;
2803     } else if (output && !strcmp(output, "human")) {
2804         output_format = OFORMAT_HUMAN;
2805     } else if (output) {
2806         error_report("--output must be used with human or json as argument.");
2807         return 1;
2808     }
2809 
2810     if (qemu_opts_foreach(&qemu_object_opts,
2811                           user_creatable_add_opts_foreach,
2812                           qemu_img_object_print_help, &error_fatal)) {
2813         return 1;
2814     }
2815 
2816     list = collect_image_info_list(image_opts, filename, fmt, chain,
2817                                    force_share);
2818     if (!list) {
2819         return 1;
2820     }
2821 
2822     switch (output_format) {
2823     case OFORMAT_HUMAN:
2824         dump_human_image_info_list(list);
2825         break;
2826     case OFORMAT_JSON:
2827         if (chain) {
2828             dump_json_image_info_list(list);
2829         } else {
2830             dump_json_image_info(list->value);
2831         }
2832         break;
2833     }
2834 
2835     qapi_free_ImageInfoList(list);
2836     return 0;
2837 }
2838 
2839 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2840                           MapEntry *next)
2841 {
2842     switch (output_format) {
2843     case OFORMAT_HUMAN:
2844         if (e->data && !e->has_offset) {
2845             error_report("File contains external, encrypted or compressed clusters.");
2846             return -1;
2847         }
2848         if (e->data && !e->zero) {
2849             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2850                    e->start, e->length,
2851                    e->has_offset ? e->offset : 0,
2852                    e->has_filename ? e->filename : "");
2853         }
2854         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2855          * Modify the flags here to allow more coalescing.
2856          */
2857         if (next && (!next->data || next->zero)) {
2858             next->data = false;
2859             next->zero = true;
2860         }
2861         break;
2862     case OFORMAT_JSON:
2863         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2864                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2865                (e->start == 0 ? "[" : ",\n"),
2866                e->start, e->length, e->depth,
2867                e->zero ? "true" : "false",
2868                e->data ? "true" : "false");
2869         if (e->has_offset) {
2870             printf(", \"offset\": %"PRId64"", e->offset);
2871         }
2872         putchar('}');
2873 
2874         if (!next) {
2875             printf("]\n");
2876         }
2877         break;
2878     }
2879     return 0;
2880 }
2881 
2882 static int get_block_status(BlockDriverState *bs, int64_t offset,
2883                             int64_t bytes, MapEntry *e)
2884 {
2885     int ret;
2886     int depth;
2887     BlockDriverState *file;
2888     bool has_offset;
2889     int64_t map;
2890     char *filename = NULL;
2891 
2892     /* As an optimization, we could cache the current range of unallocated
2893      * clusters in each file of the chain, and avoid querying the same
2894      * range repeatedly.
2895      */
2896 
2897     depth = 0;
2898     for (;;) {
2899         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2900         if (ret < 0) {
2901             return ret;
2902         }
2903         assert(bytes);
2904         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2905             break;
2906         }
2907         bs = backing_bs(bs);
2908         if (bs == NULL) {
2909             ret = 0;
2910             break;
2911         }
2912 
2913         depth++;
2914     }
2915 
2916     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2917 
2918     if (file && has_offset) {
2919         bdrv_refresh_filename(file);
2920         filename = file->filename;
2921     }
2922 
2923     *e = (MapEntry) {
2924         .start = offset,
2925         .length = bytes,
2926         .data = !!(ret & BDRV_BLOCK_DATA),
2927         .zero = !!(ret & BDRV_BLOCK_ZERO),
2928         .offset = map,
2929         .has_offset = has_offset,
2930         .depth = depth,
2931         .has_filename = filename,
2932         .filename = filename,
2933     };
2934 
2935     return 0;
2936 }
2937 
2938 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2939 {
2940     if (curr->length == 0) {
2941         return false;
2942     }
2943     if (curr->zero != next->zero ||
2944         curr->data != next->data ||
2945         curr->depth != next->depth ||
2946         curr->has_filename != next->has_filename ||
2947         curr->has_offset != next->has_offset) {
2948         return false;
2949     }
2950     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2951         return false;
2952     }
2953     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2954         return false;
2955     }
2956     return true;
2957 }
2958 
2959 static int img_map(int argc, char **argv)
2960 {
2961     int c;
2962     OutputFormat output_format = OFORMAT_HUMAN;
2963     BlockBackend *blk;
2964     BlockDriverState *bs;
2965     const char *filename, *fmt, *output;
2966     int64_t length;
2967     MapEntry curr = { .length = 0 }, next;
2968     int ret = 0;
2969     bool image_opts = false;
2970     bool force_share = false;
2971 
2972     fmt = NULL;
2973     output = NULL;
2974     for (;;) {
2975         int option_index = 0;
2976         static const struct option long_options[] = {
2977             {"help", no_argument, 0, 'h'},
2978             {"format", required_argument, 0, 'f'},
2979             {"output", required_argument, 0, OPTION_OUTPUT},
2980             {"object", required_argument, 0, OPTION_OBJECT},
2981             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2982             {"force-share", no_argument, 0, 'U'},
2983             {0, 0, 0, 0}
2984         };
2985         c = getopt_long(argc, argv, ":f:hU",
2986                         long_options, &option_index);
2987         if (c == -1) {
2988             break;
2989         }
2990         switch (c) {
2991         case ':':
2992             missing_argument(argv[optind - 1]);
2993             break;
2994         case '?':
2995             unrecognized_option(argv[optind - 1]);
2996             break;
2997         case 'h':
2998             help();
2999             break;
3000         case 'f':
3001             fmt = optarg;
3002             break;
3003         case 'U':
3004             force_share = true;
3005             break;
3006         case OPTION_OUTPUT:
3007             output = optarg;
3008             break;
3009         case OPTION_OBJECT: {
3010             QemuOpts *opts;
3011             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3012                                            optarg, true);
3013             if (!opts) {
3014                 return 1;
3015             }
3016         }   break;
3017         case OPTION_IMAGE_OPTS:
3018             image_opts = true;
3019             break;
3020         }
3021     }
3022     if (optind != argc - 1) {
3023         error_exit("Expecting one image file name");
3024     }
3025     filename = argv[optind];
3026 
3027     if (output && !strcmp(output, "json")) {
3028         output_format = OFORMAT_JSON;
3029     } else if (output && !strcmp(output, "human")) {
3030         output_format = OFORMAT_HUMAN;
3031     } else if (output) {
3032         error_report("--output must be used with human or json as argument.");
3033         return 1;
3034     }
3035 
3036     if (qemu_opts_foreach(&qemu_object_opts,
3037                           user_creatable_add_opts_foreach,
3038                           qemu_img_object_print_help, &error_fatal)) {
3039         return 1;
3040     }
3041 
3042     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3043     if (!blk) {
3044         return 1;
3045     }
3046     bs = blk_bs(blk);
3047 
3048     if (output_format == OFORMAT_HUMAN) {
3049         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3050     }
3051 
3052     length = blk_getlength(blk);
3053     while (curr.start + curr.length < length) {
3054         int64_t offset = curr.start + curr.length;
3055         int64_t n;
3056 
3057         /* Probe up to 1 GiB at a time.  */
3058         n = MIN(1 * GiB, length - offset);
3059         ret = get_block_status(bs, offset, n, &next);
3060 
3061         if (ret < 0) {
3062             error_report("Could not read file metadata: %s", strerror(-ret));
3063             goto out;
3064         }
3065 
3066         if (entry_mergeable(&curr, &next)) {
3067             curr.length += next.length;
3068             continue;
3069         }
3070 
3071         if (curr.length > 0) {
3072             ret = dump_map_entry(output_format, &curr, &next);
3073             if (ret < 0) {
3074                 goto out;
3075             }
3076         }
3077         curr = next;
3078     }
3079 
3080     ret = dump_map_entry(output_format, &curr, NULL);
3081 
3082 out:
3083     blk_unref(blk);
3084     return ret < 0;
3085 }
3086 
3087 #define SNAPSHOT_LIST   1
3088 #define SNAPSHOT_CREATE 2
3089 #define SNAPSHOT_APPLY  3
3090 #define SNAPSHOT_DELETE 4
3091 
3092 static int img_snapshot(int argc, char **argv)
3093 {
3094     BlockBackend *blk;
3095     BlockDriverState *bs;
3096     QEMUSnapshotInfo sn;
3097     char *filename, *snapshot_name = NULL;
3098     int c, ret = 0, bdrv_oflags;
3099     int action = 0;
3100     qemu_timeval tv;
3101     bool quiet = false;
3102     Error *err = NULL;
3103     bool image_opts = false;
3104     bool force_share = false;
3105 
3106     bdrv_oflags = BDRV_O_RDWR;
3107     /* Parse commandline parameters */
3108     for(;;) {
3109         static const struct option long_options[] = {
3110             {"help", no_argument, 0, 'h'},
3111             {"object", required_argument, 0, OPTION_OBJECT},
3112             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3113             {"force-share", no_argument, 0, 'U'},
3114             {0, 0, 0, 0}
3115         };
3116         c = getopt_long(argc, argv, ":la:c:d:hqU",
3117                         long_options, NULL);
3118         if (c == -1) {
3119             break;
3120         }
3121         switch(c) {
3122         case ':':
3123             missing_argument(argv[optind - 1]);
3124             break;
3125         case '?':
3126             unrecognized_option(argv[optind - 1]);
3127             break;
3128         case 'h':
3129             help();
3130             return 0;
3131         case 'l':
3132             if (action) {
3133                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3134                 return 0;
3135             }
3136             action = SNAPSHOT_LIST;
3137             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3138             break;
3139         case 'a':
3140             if (action) {
3141                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3142                 return 0;
3143             }
3144             action = SNAPSHOT_APPLY;
3145             snapshot_name = optarg;
3146             break;
3147         case 'c':
3148             if (action) {
3149                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3150                 return 0;
3151             }
3152             action = SNAPSHOT_CREATE;
3153             snapshot_name = optarg;
3154             break;
3155         case 'd':
3156             if (action) {
3157                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3158                 return 0;
3159             }
3160             action = SNAPSHOT_DELETE;
3161             snapshot_name = optarg;
3162             break;
3163         case 'q':
3164             quiet = true;
3165             break;
3166         case 'U':
3167             force_share = true;
3168             break;
3169         case OPTION_OBJECT: {
3170             QemuOpts *opts;
3171             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3172                                            optarg, true);
3173             if (!opts) {
3174                 return 1;
3175             }
3176         }   break;
3177         case OPTION_IMAGE_OPTS:
3178             image_opts = true;
3179             break;
3180         }
3181     }
3182 
3183     if (optind != argc - 1) {
3184         error_exit("Expecting one image file name");
3185     }
3186     filename = argv[optind++];
3187 
3188     if (qemu_opts_foreach(&qemu_object_opts,
3189                           user_creatable_add_opts_foreach,
3190                           qemu_img_object_print_help, &error_fatal)) {
3191         return 1;
3192     }
3193 
3194     /* Open the image */
3195     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3196                    force_share);
3197     if (!blk) {
3198         return 1;
3199     }
3200     bs = blk_bs(blk);
3201 
3202     /* Perform the requested action */
3203     switch(action) {
3204     case SNAPSHOT_LIST:
3205         dump_snapshots(bs);
3206         break;
3207 
3208     case SNAPSHOT_CREATE:
3209         memset(&sn, 0, sizeof(sn));
3210         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3211 
3212         qemu_gettimeofday(&tv);
3213         sn.date_sec = tv.tv_sec;
3214         sn.date_nsec = tv.tv_usec * 1000;
3215 
3216         ret = bdrv_snapshot_create(bs, &sn);
3217         if (ret) {
3218             error_report("Could not create snapshot '%s': %d (%s)",
3219                 snapshot_name, ret, strerror(-ret));
3220         }
3221         break;
3222 
3223     case SNAPSHOT_APPLY:
3224         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3225         if (ret) {
3226             error_reportf_err(err, "Could not apply snapshot '%s': ",
3227                               snapshot_name);
3228         }
3229         break;
3230 
3231     case SNAPSHOT_DELETE:
3232         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3233         if (ret < 0) {
3234             error_report("Could not delete snapshot '%s': snapshot not "
3235                          "found", snapshot_name);
3236             ret = 1;
3237         } else {
3238             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3239             if (ret < 0) {
3240                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3241                                   snapshot_name);
3242                 ret = 1;
3243             }
3244         }
3245         break;
3246     }
3247 
3248     /* Cleanup */
3249     blk_unref(blk);
3250     if (ret) {
3251         return 1;
3252     }
3253     return 0;
3254 }
3255 
3256 static int img_rebase(int argc, char **argv)
3257 {
3258     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3259     uint8_t *buf_old = NULL;
3260     uint8_t *buf_new = NULL;
3261     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3262     char *filename;
3263     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3264     int c, flags, src_flags, ret;
3265     bool writethrough, src_writethrough;
3266     int unsafe = 0;
3267     bool force_share = false;
3268     int progress = 0;
3269     bool quiet = false;
3270     Error *local_err = NULL;
3271     bool image_opts = false;
3272 
3273     /* Parse commandline parameters */
3274     fmt = NULL;
3275     cache = BDRV_DEFAULT_CACHE;
3276     src_cache = BDRV_DEFAULT_CACHE;
3277     out_baseimg = NULL;
3278     out_basefmt = NULL;
3279     for(;;) {
3280         static const struct option long_options[] = {
3281             {"help", no_argument, 0, 'h'},
3282             {"object", required_argument, 0, OPTION_OBJECT},
3283             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3284             {"force-share", no_argument, 0, 'U'},
3285             {0, 0, 0, 0}
3286         };
3287         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3288                         long_options, NULL);
3289         if (c == -1) {
3290             break;
3291         }
3292         switch(c) {
3293         case ':':
3294             missing_argument(argv[optind - 1]);
3295             break;
3296         case '?':
3297             unrecognized_option(argv[optind - 1]);
3298             break;
3299         case 'h':
3300             help();
3301             return 0;
3302         case 'f':
3303             fmt = optarg;
3304             break;
3305         case 'F':
3306             out_basefmt = optarg;
3307             break;
3308         case 'b':
3309             out_baseimg = optarg;
3310             break;
3311         case 'u':
3312             unsafe = 1;
3313             break;
3314         case 'p':
3315             progress = 1;
3316             break;
3317         case 't':
3318             cache = optarg;
3319             break;
3320         case 'T':
3321             src_cache = optarg;
3322             break;
3323         case 'q':
3324             quiet = true;
3325             break;
3326         case OPTION_OBJECT: {
3327             QemuOpts *opts;
3328             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3329                                            optarg, true);
3330             if (!opts) {
3331                 return 1;
3332             }
3333         }   break;
3334         case OPTION_IMAGE_OPTS:
3335             image_opts = true;
3336             break;
3337         case 'U':
3338             force_share = true;
3339             break;
3340         }
3341     }
3342 
3343     if (quiet) {
3344         progress = 0;
3345     }
3346 
3347     if (optind != argc - 1) {
3348         error_exit("Expecting one image file name");
3349     }
3350     if (!unsafe && !out_baseimg) {
3351         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3352     }
3353     filename = argv[optind++];
3354 
3355     if (qemu_opts_foreach(&qemu_object_opts,
3356                           user_creatable_add_opts_foreach,
3357                           qemu_img_object_print_help, &error_fatal)) {
3358         return 1;
3359     }
3360 
3361     qemu_progress_init(progress, 2.0);
3362     qemu_progress_print(0, 100);
3363 
3364     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3365     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3366     if (ret < 0) {
3367         error_report("Invalid cache option: %s", cache);
3368         goto out;
3369     }
3370 
3371     src_flags = 0;
3372     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3373     if (ret < 0) {
3374         error_report("Invalid source cache option: %s", src_cache);
3375         goto out;
3376     }
3377 
3378     /* The source files are opened read-only, don't care about WCE */
3379     assert((src_flags & BDRV_O_RDWR) == 0);
3380     (void) src_writethrough;
3381 
3382     /*
3383      * Open the images.
3384      *
3385      * Ignore the old backing file for unsafe rebase in case we want to correct
3386      * the reference to a renamed or moved backing file.
3387      */
3388     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3389                    false);
3390     if (!blk) {
3391         ret = -1;
3392         goto out;
3393     }
3394     bs = blk_bs(blk);
3395 
3396     if (out_basefmt != NULL) {
3397         if (bdrv_find_format(out_basefmt) == NULL) {
3398             error_report("Invalid format name: '%s'", out_basefmt);
3399             ret = -1;
3400             goto out;
3401         }
3402     }
3403 
3404     /* For safe rebasing we need to compare old and new backing file */
3405     if (!unsafe) {
3406         QDict *options = NULL;
3407         BlockDriverState *base_bs = backing_bs(bs);
3408 
3409         if (base_bs) {
3410             blk_old_backing = blk_new(qemu_get_aio_context(),
3411                                       BLK_PERM_CONSISTENT_READ,
3412                                       BLK_PERM_ALL);
3413             ret = blk_insert_bs(blk_old_backing, base_bs,
3414                                 &local_err);
3415             if (ret < 0) {
3416                 error_reportf_err(local_err,
3417                                   "Could not reuse old backing file '%s': ",
3418                                   base_bs->filename);
3419                 goto out;
3420             }
3421         } else {
3422             blk_old_backing = NULL;
3423         }
3424 
3425         if (out_baseimg[0]) {
3426             const char *overlay_filename;
3427             char *out_real_path;
3428 
3429             options = qdict_new();
3430             if (out_basefmt) {
3431                 qdict_put_str(options, "driver", out_basefmt);
3432             }
3433             if (force_share) {
3434                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3435             }
3436 
3437             bdrv_refresh_filename(bs);
3438             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3439                                                      : bs->filename;
3440             out_real_path =
3441                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3442                                                              out_baseimg,
3443                                                              &local_err);
3444             if (local_err) {
3445                 qobject_unref(options);
3446                 error_reportf_err(local_err,
3447                                   "Could not resolve backing filename: ");
3448                 ret = -1;
3449                 goto out;
3450             }
3451 
3452             /*
3453              * Find out whether we rebase an image on top of a previous image
3454              * in its chain.
3455              */
3456             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3457             if (prefix_chain_bs) {
3458                 qobject_unref(options);
3459                 g_free(out_real_path);
3460 
3461                 blk_new_backing = blk_new(qemu_get_aio_context(),
3462                                           BLK_PERM_CONSISTENT_READ,
3463                                           BLK_PERM_ALL);
3464                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3465                                     &local_err);
3466                 if (ret < 0) {
3467                     error_reportf_err(local_err,
3468                                       "Could not reuse backing file '%s': ",
3469                                       out_baseimg);
3470                     goto out;
3471                 }
3472             } else {
3473                 blk_new_backing = blk_new_open(out_real_path, NULL,
3474                                                options, src_flags, &local_err);
3475                 g_free(out_real_path);
3476                 if (!blk_new_backing) {
3477                     error_reportf_err(local_err,
3478                                       "Could not open new backing file '%s': ",
3479                                       out_baseimg);
3480                     ret = -1;
3481                     goto out;
3482                 }
3483             }
3484         }
3485     }
3486 
3487     /*
3488      * Check each unallocated cluster in the COW file. If it is unallocated,
3489      * accesses go to the backing file. We must therefore compare this cluster
3490      * in the old and new backing file, and if they differ we need to copy it
3491      * from the old backing file into the COW file.
3492      *
3493      * If qemu-img crashes during this step, no harm is done. The content of
3494      * the image is the same as the original one at any time.
3495      */
3496     if (!unsafe) {
3497         int64_t size;
3498         int64_t old_backing_size = 0;
3499         int64_t new_backing_size = 0;
3500         uint64_t offset;
3501         int64_t n;
3502         float local_progress = 0;
3503 
3504         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3505         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3506 
3507         size = blk_getlength(blk);
3508         if (size < 0) {
3509             error_report("Could not get size of '%s': %s",
3510                          filename, strerror(-size));
3511             ret = -1;
3512             goto out;
3513         }
3514         if (blk_old_backing) {
3515             old_backing_size = blk_getlength(blk_old_backing);
3516             if (old_backing_size < 0) {
3517                 char backing_name[PATH_MAX];
3518 
3519                 bdrv_get_backing_filename(bs, backing_name,
3520                                           sizeof(backing_name));
3521                 error_report("Could not get size of '%s': %s",
3522                              backing_name, strerror(-old_backing_size));
3523                 ret = -1;
3524                 goto out;
3525             }
3526         }
3527         if (blk_new_backing) {
3528             new_backing_size = blk_getlength(blk_new_backing);
3529             if (new_backing_size < 0) {
3530                 error_report("Could not get size of '%s': %s",
3531                              out_baseimg, strerror(-new_backing_size));
3532                 ret = -1;
3533                 goto out;
3534             }
3535         }
3536 
3537         if (size != 0) {
3538             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3539         }
3540 
3541         for (offset = 0; offset < size; offset += n) {
3542             bool buf_old_is_zero = false;
3543 
3544             /* How many bytes can we handle with the next read? */
3545             n = MIN(IO_BUF_SIZE, size - offset);
3546 
3547             /* If the cluster is allocated, we don't need to take action */
3548             ret = bdrv_is_allocated(bs, offset, n, &n);
3549             if (ret < 0) {
3550                 error_report("error while reading image metadata: %s",
3551                              strerror(-ret));
3552                 goto out;
3553             }
3554             if (ret) {
3555                 continue;
3556             }
3557 
3558             if (prefix_chain_bs) {
3559                 /*
3560                  * If cluster wasn't changed since prefix_chain, we don't need
3561                  * to take action
3562                  */
3563                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3564                                               false, offset, n, &n);
3565                 if (ret < 0) {
3566                     error_report("error while reading image metadata: %s",
3567                                  strerror(-ret));
3568                     goto out;
3569                 }
3570                 if (!ret) {
3571                     continue;
3572                 }
3573             }
3574 
3575             /*
3576              * Read old and new backing file and take into consideration that
3577              * backing files may be smaller than the COW image.
3578              */
3579             if (offset >= old_backing_size) {
3580                 memset(buf_old, 0, n);
3581                 buf_old_is_zero = true;
3582             } else {
3583                 if (offset + n > old_backing_size) {
3584                     n = old_backing_size - offset;
3585                 }
3586 
3587                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3588                 if (ret < 0) {
3589                     error_report("error while reading from old backing file");
3590                     goto out;
3591                 }
3592             }
3593 
3594             if (offset >= new_backing_size || !blk_new_backing) {
3595                 memset(buf_new, 0, n);
3596             } else {
3597                 if (offset + n > new_backing_size) {
3598                     n = new_backing_size - offset;
3599                 }
3600 
3601                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3602                 if (ret < 0) {
3603                     error_report("error while reading from new backing file");
3604                     goto out;
3605                 }
3606             }
3607 
3608             /* If they differ, we need to write to the COW file */
3609             uint64_t written = 0;
3610 
3611             while (written < n) {
3612                 int64_t pnum;
3613 
3614                 if (compare_buffers(buf_old + written, buf_new + written,
3615                                     n - written, &pnum))
3616                 {
3617                     if (buf_old_is_zero) {
3618                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3619                     } else {
3620                         ret = blk_pwrite(blk, offset + written,
3621                                          buf_old + written, pnum, 0);
3622                     }
3623                     if (ret < 0) {
3624                         error_report("Error while writing to COW image: %s",
3625                             strerror(-ret));
3626                         goto out;
3627                     }
3628                 }
3629 
3630                 written += pnum;
3631             }
3632             qemu_progress_print(local_progress, 100);
3633         }
3634     }
3635 
3636     /*
3637      * Change the backing file. All clusters that are different from the old
3638      * backing file are overwritten in the COW file now, so the visible content
3639      * doesn't change when we switch the backing file.
3640      */
3641     if (out_baseimg && *out_baseimg) {
3642         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3643     } else {
3644         ret = bdrv_change_backing_file(bs, NULL, NULL);
3645     }
3646 
3647     if (ret == -ENOSPC) {
3648         error_report("Could not change the backing file to '%s': No "
3649                      "space left in the file header", out_baseimg);
3650     } else if (ret < 0) {
3651         error_report("Could not change the backing file to '%s': %s",
3652             out_baseimg, strerror(-ret));
3653     }
3654 
3655     qemu_progress_print(100, 0);
3656     /*
3657      * TODO At this point it is possible to check if any clusters that are
3658      * allocated in the COW file are the same in the backing file. If so, they
3659      * could be dropped from the COW file. Don't do this before switching the
3660      * backing file, in case of a crash this would lead to corruption.
3661      */
3662 out:
3663     qemu_progress_end();
3664     /* Cleanup */
3665     if (!unsafe) {
3666         blk_unref(blk_old_backing);
3667         blk_unref(blk_new_backing);
3668     }
3669     qemu_vfree(buf_old);
3670     qemu_vfree(buf_new);
3671 
3672     blk_unref(blk);
3673     if (ret) {
3674         return 1;
3675     }
3676     return 0;
3677 }
3678 
3679 static int img_resize(int argc, char **argv)
3680 {
3681     Error *err = NULL;
3682     int c, ret, relative;
3683     const char *filename, *fmt, *size;
3684     int64_t n, total_size, current_size;
3685     bool quiet = false;
3686     BlockBackend *blk = NULL;
3687     PreallocMode prealloc = PREALLOC_MODE_OFF;
3688     QemuOpts *param;
3689 
3690     static QemuOptsList resize_options = {
3691         .name = "resize_options",
3692         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3693         .desc = {
3694             {
3695                 .name = BLOCK_OPT_SIZE,
3696                 .type = QEMU_OPT_SIZE,
3697                 .help = "Virtual disk size"
3698             }, {
3699                 /* end of list */
3700             }
3701         },
3702     };
3703     bool image_opts = false;
3704     bool shrink = false;
3705 
3706     /* Remove size from argv manually so that negative numbers are not treated
3707      * as options by getopt. */
3708     if (argc < 3) {
3709         error_exit("Not enough arguments");
3710         return 1;
3711     }
3712 
3713     size = argv[--argc];
3714 
3715     /* Parse getopt arguments */
3716     fmt = NULL;
3717     for(;;) {
3718         static const struct option long_options[] = {
3719             {"help", no_argument, 0, 'h'},
3720             {"object", required_argument, 0, OPTION_OBJECT},
3721             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3722             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3723             {"shrink", no_argument, 0, OPTION_SHRINK},
3724             {0, 0, 0, 0}
3725         };
3726         c = getopt_long(argc, argv, ":f:hq",
3727                         long_options, NULL);
3728         if (c == -1) {
3729             break;
3730         }
3731         switch(c) {
3732         case ':':
3733             missing_argument(argv[optind - 1]);
3734             break;
3735         case '?':
3736             unrecognized_option(argv[optind - 1]);
3737             break;
3738         case 'h':
3739             help();
3740             break;
3741         case 'f':
3742             fmt = optarg;
3743             break;
3744         case 'q':
3745             quiet = true;
3746             break;
3747         case OPTION_OBJECT: {
3748             QemuOpts *opts;
3749             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3750                                            optarg, true);
3751             if (!opts) {
3752                 return 1;
3753             }
3754         }   break;
3755         case OPTION_IMAGE_OPTS:
3756             image_opts = true;
3757             break;
3758         case OPTION_PREALLOCATION:
3759             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3760                                        PREALLOC_MODE__MAX, NULL);
3761             if (prealloc == PREALLOC_MODE__MAX) {
3762                 error_report("Invalid preallocation mode '%s'", optarg);
3763                 return 1;
3764             }
3765             break;
3766         case OPTION_SHRINK:
3767             shrink = true;
3768             break;
3769         }
3770     }
3771     if (optind != argc - 1) {
3772         error_exit("Expecting image file name and size");
3773     }
3774     filename = argv[optind++];
3775 
3776     if (qemu_opts_foreach(&qemu_object_opts,
3777                           user_creatable_add_opts_foreach,
3778                           qemu_img_object_print_help, &error_fatal)) {
3779         return 1;
3780     }
3781 
3782     /* Choose grow, shrink, or absolute resize mode */
3783     switch (size[0]) {
3784     case '+':
3785         relative = 1;
3786         size++;
3787         break;
3788     case '-':
3789         relative = -1;
3790         size++;
3791         break;
3792     default:
3793         relative = 0;
3794         break;
3795     }
3796 
3797     /* Parse size */
3798     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3799     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3800     if (err) {
3801         error_report_err(err);
3802         ret = -1;
3803         qemu_opts_del(param);
3804         goto out;
3805     }
3806     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3807     qemu_opts_del(param);
3808 
3809     blk = img_open(image_opts, filename, fmt,
3810                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3811                    false);
3812     if (!blk) {
3813         ret = -1;
3814         goto out;
3815     }
3816 
3817     current_size = blk_getlength(blk);
3818     if (current_size < 0) {
3819         error_report("Failed to inquire current image length: %s",
3820                      strerror(-current_size));
3821         ret = -1;
3822         goto out;
3823     }
3824 
3825     if (relative) {
3826         total_size = current_size + n * relative;
3827     } else {
3828         total_size = n;
3829     }
3830     if (total_size <= 0) {
3831         error_report("New image size must be positive");
3832         ret = -1;
3833         goto out;
3834     }
3835 
3836     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3837         error_report("Preallocation can only be used for growing images");
3838         ret = -1;
3839         goto out;
3840     }
3841 
3842     if (total_size < current_size && !shrink) {
3843         warn_report("Shrinking an image will delete all data beyond the "
3844                     "shrunken image's end. Before performing such an "
3845                     "operation, make sure there is no important data there.");
3846 
3847         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3848             error_report(
3849               "Use the --shrink option to perform a shrink operation.");
3850             ret = -1;
3851             goto out;
3852         } else {
3853             warn_report("Using the --shrink option will suppress this message. "
3854                         "Note that future versions of qemu-img may refuse to "
3855                         "shrink images without this option.");
3856         }
3857     }
3858 
3859     /*
3860      * The user expects the image to have the desired size after
3861      * resizing, so pass @exact=true.  It is of no use to report
3862      * success when the image has not actually been resized.
3863      */
3864     ret = blk_truncate(blk, total_size, true, prealloc, &err);
3865     if (!ret) {
3866         qprintf(quiet, "Image resized.\n");
3867     } else {
3868         error_report_err(err);
3869     }
3870 out:
3871     blk_unref(blk);
3872     if (ret) {
3873         return 1;
3874     }
3875     return 0;
3876 }
3877 
3878 static void amend_status_cb(BlockDriverState *bs,
3879                             int64_t offset, int64_t total_work_size,
3880                             void *opaque)
3881 {
3882     qemu_progress_print(100.f * offset / total_work_size, 0);
3883 }
3884 
3885 static int print_amend_option_help(const char *format)
3886 {
3887     BlockDriver *drv;
3888 
3889     /* Find driver and parse its options */
3890     drv = bdrv_find_format(format);
3891     if (!drv) {
3892         error_report("Unknown file format '%s'", format);
3893         return 1;
3894     }
3895 
3896     if (!drv->bdrv_amend_options) {
3897         error_report("Format driver '%s' does not support option amendment",
3898                      format);
3899         return 1;
3900     }
3901 
3902     /* Every driver supporting amendment must have create_opts */
3903     assert(drv->create_opts);
3904 
3905     printf("Creation options for '%s':\n", format);
3906     qemu_opts_print_help(drv->create_opts, false);
3907     printf("\nNote that not all of these options may be amendable.\n");
3908     return 0;
3909 }
3910 
3911 static int img_amend(int argc, char **argv)
3912 {
3913     Error *err = NULL;
3914     int c, ret = 0;
3915     char *options = NULL;
3916     QemuOptsList *create_opts = NULL;
3917     QemuOpts *opts = NULL;
3918     const char *fmt = NULL, *filename, *cache;
3919     int flags;
3920     bool writethrough;
3921     bool quiet = false, progress = false;
3922     BlockBackend *blk = NULL;
3923     BlockDriverState *bs = NULL;
3924     bool image_opts = false;
3925 
3926     cache = BDRV_DEFAULT_CACHE;
3927     for (;;) {
3928         static const struct option long_options[] = {
3929             {"help", no_argument, 0, 'h'},
3930             {"object", required_argument, 0, OPTION_OBJECT},
3931             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3932             {0, 0, 0, 0}
3933         };
3934         c = getopt_long(argc, argv, ":ho:f:t:pq",
3935                         long_options, NULL);
3936         if (c == -1) {
3937             break;
3938         }
3939 
3940         switch (c) {
3941         case ':':
3942             missing_argument(argv[optind - 1]);
3943             break;
3944         case '?':
3945             unrecognized_option(argv[optind - 1]);
3946             break;
3947         case 'h':
3948             help();
3949             break;
3950         case 'o':
3951             if (!is_valid_option_list(optarg)) {
3952                 error_report("Invalid option list: %s", optarg);
3953                 ret = -1;
3954                 goto out_no_progress;
3955             }
3956             if (!options) {
3957                 options = g_strdup(optarg);
3958             } else {
3959                 char *old_options = options;
3960                 options = g_strdup_printf("%s,%s", options, optarg);
3961                 g_free(old_options);
3962             }
3963             break;
3964         case 'f':
3965             fmt = optarg;
3966             break;
3967         case 't':
3968             cache = optarg;
3969             break;
3970         case 'p':
3971             progress = true;
3972             break;
3973         case 'q':
3974             quiet = true;
3975             break;
3976         case OPTION_OBJECT:
3977             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3978                                            optarg, true);
3979             if (!opts) {
3980                 ret = -1;
3981                 goto out_no_progress;
3982             }
3983             break;
3984         case OPTION_IMAGE_OPTS:
3985             image_opts = true;
3986             break;
3987         }
3988     }
3989 
3990     if (!options) {
3991         error_exit("Must specify options (-o)");
3992     }
3993 
3994     if (qemu_opts_foreach(&qemu_object_opts,
3995                           user_creatable_add_opts_foreach,
3996                           qemu_img_object_print_help, &error_fatal)) {
3997         ret = -1;
3998         goto out_no_progress;
3999     }
4000 
4001     if (quiet) {
4002         progress = false;
4003     }
4004     qemu_progress_init(progress, 1.0);
4005 
4006     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4007     if (fmt && has_help_option(options)) {
4008         /* If a format is explicitly specified (and possibly no filename is
4009          * given), print option help here */
4010         ret = print_amend_option_help(fmt);
4011         goto out;
4012     }
4013 
4014     if (optind != argc - 1) {
4015         error_report("Expecting one image file name");
4016         ret = -1;
4017         goto out;
4018     }
4019 
4020     flags = BDRV_O_RDWR;
4021     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4022     if (ret < 0) {
4023         error_report("Invalid cache option: %s", cache);
4024         goto out;
4025     }
4026 
4027     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4028                    false);
4029     if (!blk) {
4030         ret = -1;
4031         goto out;
4032     }
4033     bs = blk_bs(blk);
4034 
4035     fmt = bs->drv->format_name;
4036 
4037     if (has_help_option(options)) {
4038         /* If the format was auto-detected, print option help here */
4039         ret = print_amend_option_help(fmt);
4040         goto out;
4041     }
4042 
4043     if (!bs->drv->bdrv_amend_options) {
4044         error_report("Format driver '%s' does not support option amendment",
4045                      fmt);
4046         ret = -1;
4047         goto out;
4048     }
4049 
4050     /* Every driver supporting amendment must have create_opts */
4051     assert(bs->drv->create_opts);
4052 
4053     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4054     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4055     qemu_opts_do_parse(opts, options, NULL, &err);
4056     if (err) {
4057         error_report_err(err);
4058         ret = -1;
4059         goto out;
4060     }
4061 
4062     /* In case the driver does not call amend_status_cb() */
4063     qemu_progress_print(0.f, 0);
4064     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4065     qemu_progress_print(100.f, 0);
4066     if (ret < 0) {
4067         error_report_err(err);
4068         goto out;
4069     }
4070 
4071 out:
4072     qemu_progress_end();
4073 
4074 out_no_progress:
4075     blk_unref(blk);
4076     qemu_opts_del(opts);
4077     qemu_opts_free(create_opts);
4078     g_free(options);
4079 
4080     if (ret) {
4081         return 1;
4082     }
4083     return 0;
4084 }
4085 
4086 typedef struct BenchData {
4087     BlockBackend *blk;
4088     uint64_t image_size;
4089     bool write;
4090     int bufsize;
4091     int step;
4092     int nrreq;
4093     int n;
4094     int flush_interval;
4095     bool drain_on_flush;
4096     uint8_t *buf;
4097     QEMUIOVector *qiov;
4098 
4099     int in_flight;
4100     bool in_flush;
4101     uint64_t offset;
4102 } BenchData;
4103 
4104 static void bench_undrained_flush_cb(void *opaque, int ret)
4105 {
4106     if (ret < 0) {
4107         error_report("Failed flush request: %s", strerror(-ret));
4108         exit(EXIT_FAILURE);
4109     }
4110 }
4111 
4112 static void bench_cb(void *opaque, int ret)
4113 {
4114     BenchData *b = opaque;
4115     BlockAIOCB *acb;
4116 
4117     if (ret < 0) {
4118         error_report("Failed request: %s", strerror(-ret));
4119         exit(EXIT_FAILURE);
4120     }
4121 
4122     if (b->in_flush) {
4123         /* Just finished a flush with drained queue: Start next requests */
4124         assert(b->in_flight == 0);
4125         b->in_flush = false;
4126     } else if (b->in_flight > 0) {
4127         int remaining = b->n - b->in_flight;
4128 
4129         b->n--;
4130         b->in_flight--;
4131 
4132         /* Time for flush? Drain queue if requested, then flush */
4133         if (b->flush_interval && remaining % b->flush_interval == 0) {
4134             if (!b->in_flight || !b->drain_on_flush) {
4135                 BlockCompletionFunc *cb;
4136 
4137                 if (b->drain_on_flush) {
4138                     b->in_flush = true;
4139                     cb = bench_cb;
4140                 } else {
4141                     cb = bench_undrained_flush_cb;
4142                 }
4143 
4144                 acb = blk_aio_flush(b->blk, cb, b);
4145                 if (!acb) {
4146                     error_report("Failed to issue flush request");
4147                     exit(EXIT_FAILURE);
4148                 }
4149             }
4150             if (b->drain_on_flush) {
4151                 return;
4152             }
4153         }
4154     }
4155 
4156     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4157         int64_t offset = b->offset;
4158         /* blk_aio_* might look for completed I/Os and kick bench_cb
4159          * again, so make sure this operation is counted by in_flight
4160          * and b->offset is ready for the next submission.
4161          */
4162         b->in_flight++;
4163         b->offset += b->step;
4164         b->offset %= b->image_size;
4165         if (b->write) {
4166             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4167         } else {
4168             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4169         }
4170         if (!acb) {
4171             error_report("Failed to issue request");
4172             exit(EXIT_FAILURE);
4173         }
4174     }
4175 }
4176 
4177 static int img_bench(int argc, char **argv)
4178 {
4179     int c, ret = 0;
4180     const char *fmt = NULL, *filename;
4181     bool quiet = false;
4182     bool image_opts = false;
4183     bool is_write = false;
4184     int count = 75000;
4185     int depth = 64;
4186     int64_t offset = 0;
4187     size_t bufsize = 4096;
4188     int pattern = 0;
4189     size_t step = 0;
4190     int flush_interval = 0;
4191     bool drain_on_flush = true;
4192     int64_t image_size;
4193     BlockBackend *blk = NULL;
4194     BenchData data = {};
4195     int flags = 0;
4196     bool writethrough = false;
4197     struct timeval t1, t2;
4198     int i;
4199     bool force_share = false;
4200     size_t buf_size;
4201 
4202     for (;;) {
4203         static const struct option long_options[] = {
4204             {"help", no_argument, 0, 'h'},
4205             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4206             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4207             {"pattern", required_argument, 0, OPTION_PATTERN},
4208             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4209             {"force-share", no_argument, 0, 'U'},
4210             {0, 0, 0, 0}
4211         };
4212         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4213                         NULL);
4214         if (c == -1) {
4215             break;
4216         }
4217 
4218         switch (c) {
4219         case ':':
4220             missing_argument(argv[optind - 1]);
4221             break;
4222         case '?':
4223             unrecognized_option(argv[optind - 1]);
4224             break;
4225         case 'h':
4226             help();
4227             break;
4228         case 'c':
4229         {
4230             unsigned long res;
4231 
4232             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4233                 error_report("Invalid request count specified");
4234                 return 1;
4235             }
4236             count = res;
4237             break;
4238         }
4239         case 'd':
4240         {
4241             unsigned long res;
4242 
4243             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4244                 error_report("Invalid queue depth specified");
4245                 return 1;
4246             }
4247             depth = res;
4248             break;
4249         }
4250         case 'f':
4251             fmt = optarg;
4252             break;
4253         case 'n':
4254             flags |= BDRV_O_NATIVE_AIO;
4255             break;
4256         case 'i':
4257             ret = bdrv_parse_aio(optarg, &flags);
4258             if (ret < 0) {
4259                 error_report("Invalid aio option: %s", optarg);
4260                 ret = -1;
4261                 goto out;
4262             }
4263             break;
4264         case 'o':
4265         {
4266             offset = cvtnum(optarg);
4267             if (offset < 0) {
4268                 error_report("Invalid offset specified");
4269                 return 1;
4270             }
4271             break;
4272         }
4273             break;
4274         case 'q':
4275             quiet = true;
4276             break;
4277         case 's':
4278         {
4279             int64_t sval;
4280 
4281             sval = cvtnum(optarg);
4282             if (sval < 0 || sval > INT_MAX) {
4283                 error_report("Invalid buffer size specified");
4284                 return 1;
4285             }
4286 
4287             bufsize = sval;
4288             break;
4289         }
4290         case 'S':
4291         {
4292             int64_t sval;
4293 
4294             sval = cvtnum(optarg);
4295             if (sval < 0 || sval > INT_MAX) {
4296                 error_report("Invalid step size specified");
4297                 return 1;
4298             }
4299 
4300             step = sval;
4301             break;
4302         }
4303         case 't':
4304             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4305             if (ret < 0) {
4306                 error_report("Invalid cache mode");
4307                 ret = -1;
4308                 goto out;
4309             }
4310             break;
4311         case 'w':
4312             flags |= BDRV_O_RDWR;
4313             is_write = true;
4314             break;
4315         case 'U':
4316             force_share = true;
4317             break;
4318         case OPTION_PATTERN:
4319         {
4320             unsigned long res;
4321 
4322             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4323                 error_report("Invalid pattern byte specified");
4324                 return 1;
4325             }
4326             pattern = res;
4327             break;
4328         }
4329         case OPTION_FLUSH_INTERVAL:
4330         {
4331             unsigned long res;
4332 
4333             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4334                 error_report("Invalid flush interval specified");
4335                 return 1;
4336             }
4337             flush_interval = res;
4338             break;
4339         }
4340         case OPTION_NO_DRAIN:
4341             drain_on_flush = false;
4342             break;
4343         case OPTION_IMAGE_OPTS:
4344             image_opts = true;
4345             break;
4346         }
4347     }
4348 
4349     if (optind != argc - 1) {
4350         error_exit("Expecting one image file name");
4351     }
4352     filename = argv[argc - 1];
4353 
4354     if (!is_write && flush_interval) {
4355         error_report("--flush-interval is only available in write tests");
4356         ret = -1;
4357         goto out;
4358     }
4359     if (flush_interval && flush_interval < depth) {
4360         error_report("Flush interval can't be smaller than depth");
4361         ret = -1;
4362         goto out;
4363     }
4364 
4365     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4366                    force_share);
4367     if (!blk) {
4368         ret = -1;
4369         goto out;
4370     }
4371 
4372     image_size = blk_getlength(blk);
4373     if (image_size < 0) {
4374         ret = image_size;
4375         goto out;
4376     }
4377 
4378     data = (BenchData) {
4379         .blk            = blk,
4380         .image_size     = image_size,
4381         .bufsize        = bufsize,
4382         .step           = step ?: bufsize,
4383         .nrreq          = depth,
4384         .n              = count,
4385         .offset         = offset,
4386         .write          = is_write,
4387         .flush_interval = flush_interval,
4388         .drain_on_flush = drain_on_flush,
4389     };
4390     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4391            "(starting at offset %" PRId64 ", step size %d)\n",
4392            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4393            data.offset, data.step);
4394     if (flush_interval) {
4395         printf("Sending flush every %d requests\n", flush_interval);
4396     }
4397 
4398     buf_size = data.nrreq * data.bufsize;
4399     data.buf = blk_blockalign(blk, buf_size);
4400     memset(data.buf, pattern, data.nrreq * data.bufsize);
4401 
4402     blk_register_buf(blk, data.buf, buf_size);
4403 
4404     data.qiov = g_new(QEMUIOVector, data.nrreq);
4405     for (i = 0; i < data.nrreq; i++) {
4406         qemu_iovec_init(&data.qiov[i], 1);
4407         qemu_iovec_add(&data.qiov[i],
4408                        data.buf + i * data.bufsize, data.bufsize);
4409     }
4410 
4411     gettimeofday(&t1, NULL);
4412     bench_cb(&data, 0);
4413 
4414     while (data.n > 0) {
4415         main_loop_wait(false);
4416     }
4417     gettimeofday(&t2, NULL);
4418 
4419     printf("Run completed in %3.3f seconds.\n",
4420            (t2.tv_sec - t1.tv_sec)
4421            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4422 
4423 out:
4424     if (data.buf) {
4425         blk_unregister_buf(blk, data.buf);
4426     }
4427     qemu_vfree(data.buf);
4428     blk_unref(blk);
4429 
4430     if (ret) {
4431         return 1;
4432     }
4433     return 0;
4434 }
4435 
4436 #define C_BS      01
4437 #define C_COUNT   02
4438 #define C_IF      04
4439 #define C_OF      010
4440 #define C_SKIP    020
4441 
4442 struct DdInfo {
4443     unsigned int flags;
4444     int64_t count;
4445 };
4446 
4447 struct DdIo {
4448     int bsz;    /* Block size */
4449     char *filename;
4450     uint8_t *buf;
4451     int64_t offset;
4452 };
4453 
4454 struct DdOpts {
4455     const char *name;
4456     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4457     unsigned int flag;
4458 };
4459 
4460 static int img_dd_bs(const char *arg,
4461                      struct DdIo *in, struct DdIo *out,
4462                      struct DdInfo *dd)
4463 {
4464     int64_t res;
4465 
4466     res = cvtnum(arg);
4467 
4468     if (res <= 0 || res > INT_MAX) {
4469         error_report("invalid number: '%s'", arg);
4470         return 1;
4471     }
4472     in->bsz = out->bsz = res;
4473 
4474     return 0;
4475 }
4476 
4477 static int img_dd_count(const char *arg,
4478                         struct DdIo *in, struct DdIo *out,
4479                         struct DdInfo *dd)
4480 {
4481     dd->count = cvtnum(arg);
4482 
4483     if (dd->count < 0) {
4484         error_report("invalid number: '%s'", arg);
4485         return 1;
4486     }
4487 
4488     return 0;
4489 }
4490 
4491 static int img_dd_if(const char *arg,
4492                      struct DdIo *in, struct DdIo *out,
4493                      struct DdInfo *dd)
4494 {
4495     in->filename = g_strdup(arg);
4496 
4497     return 0;
4498 }
4499 
4500 static int img_dd_of(const char *arg,
4501                      struct DdIo *in, struct DdIo *out,
4502                      struct DdInfo *dd)
4503 {
4504     out->filename = g_strdup(arg);
4505 
4506     return 0;
4507 }
4508 
4509 static int img_dd_skip(const char *arg,
4510                        struct DdIo *in, struct DdIo *out,
4511                        struct DdInfo *dd)
4512 {
4513     in->offset = cvtnum(arg);
4514 
4515     if (in->offset < 0) {
4516         error_report("invalid number: '%s'", arg);
4517         return 1;
4518     }
4519 
4520     return 0;
4521 }
4522 
4523 static int img_dd(int argc, char **argv)
4524 {
4525     int ret = 0;
4526     char *arg = NULL;
4527     char *tmp;
4528     BlockDriver *drv = NULL, *proto_drv = NULL;
4529     BlockBackend *blk1 = NULL, *blk2 = NULL;
4530     QemuOpts *opts = NULL;
4531     QemuOptsList *create_opts = NULL;
4532     Error *local_err = NULL;
4533     bool image_opts = false;
4534     int c, i;
4535     const char *out_fmt = "raw";
4536     const char *fmt = NULL;
4537     int64_t size = 0;
4538     int64_t block_count = 0, out_pos, in_pos;
4539     bool force_share = false;
4540     struct DdInfo dd = {
4541         .flags = 0,
4542         .count = 0,
4543     };
4544     struct DdIo in = {
4545         .bsz = 512, /* Block size is by default 512 bytes */
4546         .filename = NULL,
4547         .buf = NULL,
4548         .offset = 0
4549     };
4550     struct DdIo out = {
4551         .bsz = 512,
4552         .filename = NULL,
4553         .buf = NULL,
4554         .offset = 0
4555     };
4556 
4557     const struct DdOpts options[] = {
4558         { "bs", img_dd_bs, C_BS },
4559         { "count", img_dd_count, C_COUNT },
4560         { "if", img_dd_if, C_IF },
4561         { "of", img_dd_of, C_OF },
4562         { "skip", img_dd_skip, C_SKIP },
4563         { NULL, NULL, 0 }
4564     };
4565     const struct option long_options[] = {
4566         { "help", no_argument, 0, 'h'},
4567         { "object", required_argument, 0, OPTION_OBJECT},
4568         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4569         { "force-share", no_argument, 0, 'U'},
4570         { 0, 0, 0, 0 }
4571     };
4572 
4573     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4574         if (c == EOF) {
4575             break;
4576         }
4577         switch (c) {
4578         case 'O':
4579             out_fmt = optarg;
4580             break;
4581         case 'f':
4582             fmt = optarg;
4583             break;
4584         case ':':
4585             missing_argument(argv[optind - 1]);
4586             break;
4587         case '?':
4588             unrecognized_option(argv[optind - 1]);
4589             break;
4590         case 'h':
4591             help();
4592             break;
4593         case 'U':
4594             force_share = true;
4595             break;
4596         case OPTION_OBJECT:
4597             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4598                 ret = -1;
4599                 goto out;
4600             }
4601             break;
4602         case OPTION_IMAGE_OPTS:
4603             image_opts = true;
4604             break;
4605         }
4606     }
4607 
4608     for (i = optind; i < argc; i++) {
4609         int j;
4610         arg = g_strdup(argv[i]);
4611 
4612         tmp = strchr(arg, '=');
4613         if (tmp == NULL) {
4614             error_report("unrecognized operand %s", arg);
4615             ret = -1;
4616             goto out;
4617         }
4618 
4619         *tmp++ = '\0';
4620 
4621         for (j = 0; options[j].name != NULL; j++) {
4622             if (!strcmp(arg, options[j].name)) {
4623                 break;
4624             }
4625         }
4626         if (options[j].name == NULL) {
4627             error_report("unrecognized operand %s", arg);
4628             ret = -1;
4629             goto out;
4630         }
4631 
4632         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4633             ret = -1;
4634             goto out;
4635         }
4636         dd.flags |= options[j].flag;
4637         g_free(arg);
4638         arg = NULL;
4639     }
4640 
4641     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4642         error_report("Must specify both input and output files");
4643         ret = -1;
4644         goto out;
4645     }
4646 
4647     if (qemu_opts_foreach(&qemu_object_opts,
4648                           user_creatable_add_opts_foreach,
4649                           qemu_img_object_print_help, &error_fatal)) {
4650         ret = -1;
4651         goto out;
4652     }
4653 
4654     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4655                     force_share);
4656 
4657     if (!blk1) {
4658         ret = -1;
4659         goto out;
4660     }
4661 
4662     drv = bdrv_find_format(out_fmt);
4663     if (!drv) {
4664         error_report("Unknown file format");
4665         ret = -1;
4666         goto out;
4667     }
4668     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4669 
4670     if (!proto_drv) {
4671         error_report_err(local_err);
4672         ret = -1;
4673         goto out;
4674     }
4675     if (!drv->create_opts) {
4676         error_report("Format driver '%s' does not support image creation",
4677                      drv->format_name);
4678         ret = -1;
4679         goto out;
4680     }
4681     if (!proto_drv->create_opts) {
4682         error_report("Protocol driver '%s' does not support image creation",
4683                      proto_drv->format_name);
4684         ret = -1;
4685         goto out;
4686     }
4687     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4688     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4689 
4690     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4691 
4692     size = blk_getlength(blk1);
4693     if (size < 0) {
4694         error_report("Failed to get size for '%s'", in.filename);
4695         ret = -1;
4696         goto out;
4697     }
4698 
4699     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4700         dd.count * in.bsz < size) {
4701         size = dd.count * in.bsz;
4702     }
4703 
4704     /* Overflow means the specified offset is beyond input image's size */
4705     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4706                               size < in.bsz * in.offset)) {
4707         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4708     } else {
4709         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4710                             size - in.bsz * in.offset, &error_abort);
4711     }
4712 
4713     ret = bdrv_create(drv, out.filename, opts, &local_err);
4714     if (ret < 0) {
4715         error_reportf_err(local_err,
4716                           "%s: error while creating output image: ",
4717                           out.filename);
4718         ret = -1;
4719         goto out;
4720     }
4721 
4722     /* TODO, we can't honour --image-opts for the target,
4723      * since it needs to be given in a format compatible
4724      * with the bdrv_create() call above which does not
4725      * support image-opts style.
4726      */
4727     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4728                          false, false, false);
4729 
4730     if (!blk2) {
4731         ret = -1;
4732         goto out;
4733     }
4734 
4735     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4736                               size < in.offset * in.bsz)) {
4737         /* We give a warning if the skip option is bigger than the input
4738          * size and create an empty output disk image (i.e. like dd(1)).
4739          */
4740         error_report("%s: cannot skip to specified offset", in.filename);
4741         in_pos = size;
4742     } else {
4743         in_pos = in.offset * in.bsz;
4744     }
4745 
4746     in.buf = g_new(uint8_t, in.bsz);
4747 
4748     for (out_pos = 0; in_pos < size; block_count++) {
4749         int in_ret, out_ret;
4750 
4751         if (in_pos + in.bsz > size) {
4752             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4753         } else {
4754             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4755         }
4756         if (in_ret < 0) {
4757             error_report("error while reading from input image file: %s",
4758                          strerror(-in_ret));
4759             ret = -1;
4760             goto out;
4761         }
4762         in_pos += in_ret;
4763 
4764         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4765 
4766         if (out_ret < 0) {
4767             error_report("error while writing to output image file: %s",
4768                          strerror(-out_ret));
4769             ret = -1;
4770             goto out;
4771         }
4772         out_pos += out_ret;
4773     }
4774 
4775 out:
4776     g_free(arg);
4777     qemu_opts_del(opts);
4778     qemu_opts_free(create_opts);
4779     blk_unref(blk1);
4780     blk_unref(blk2);
4781     g_free(in.filename);
4782     g_free(out.filename);
4783     g_free(in.buf);
4784     g_free(out.buf);
4785 
4786     if (ret) {
4787         return 1;
4788     }
4789     return 0;
4790 }
4791 
4792 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4793 {
4794     QString *str;
4795     QObject *obj;
4796     Visitor *v = qobject_output_visitor_new(&obj);
4797 
4798     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4799     visit_complete(v, &obj);
4800     str = qobject_to_json_pretty(obj);
4801     assert(str != NULL);
4802     printf("%s\n", qstring_get_str(str));
4803     qobject_unref(obj);
4804     visit_free(v);
4805     qobject_unref(str);
4806 }
4807 
4808 static int img_measure(int argc, char **argv)
4809 {
4810     static const struct option long_options[] = {
4811         {"help", no_argument, 0, 'h'},
4812         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4813         {"object", required_argument, 0, OPTION_OBJECT},
4814         {"output", required_argument, 0, OPTION_OUTPUT},
4815         {"size", required_argument, 0, OPTION_SIZE},
4816         {"force-share", no_argument, 0, 'U'},
4817         {0, 0, 0, 0}
4818     };
4819     OutputFormat output_format = OFORMAT_HUMAN;
4820     BlockBackend *in_blk = NULL;
4821     BlockDriver *drv;
4822     const char *filename = NULL;
4823     const char *fmt = NULL;
4824     const char *out_fmt = "raw";
4825     char *options = NULL;
4826     char *snapshot_name = NULL;
4827     bool force_share = false;
4828     QemuOpts *opts = NULL;
4829     QemuOpts *object_opts = NULL;
4830     QemuOpts *sn_opts = NULL;
4831     QemuOptsList *create_opts = NULL;
4832     bool image_opts = false;
4833     uint64_t img_size = UINT64_MAX;
4834     BlockMeasureInfo *info = NULL;
4835     Error *local_err = NULL;
4836     int ret = 1;
4837     int c;
4838 
4839     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4840                             long_options, NULL)) != -1) {
4841         switch (c) {
4842         case '?':
4843         case 'h':
4844             help();
4845             break;
4846         case 'f':
4847             fmt = optarg;
4848             break;
4849         case 'O':
4850             out_fmt = optarg;
4851             break;
4852         case 'o':
4853             if (!is_valid_option_list(optarg)) {
4854                 error_report("Invalid option list: %s", optarg);
4855                 goto out;
4856             }
4857             if (!options) {
4858                 options = g_strdup(optarg);
4859             } else {
4860                 char *old_options = options;
4861                 options = g_strdup_printf("%s,%s", options, optarg);
4862                 g_free(old_options);
4863             }
4864             break;
4865         case 'l':
4866             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4867                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4868                                                   optarg, false);
4869                 if (!sn_opts) {
4870                     error_report("Failed in parsing snapshot param '%s'",
4871                                  optarg);
4872                     goto out;
4873                 }
4874             } else {
4875                 snapshot_name = optarg;
4876             }
4877             break;
4878         case 'U':
4879             force_share = true;
4880             break;
4881         case OPTION_OBJECT:
4882             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4883                                                   optarg, true);
4884             if (!object_opts) {
4885                 goto out;
4886             }
4887             break;
4888         case OPTION_IMAGE_OPTS:
4889             image_opts = true;
4890             break;
4891         case OPTION_OUTPUT:
4892             if (!strcmp(optarg, "json")) {
4893                 output_format = OFORMAT_JSON;
4894             } else if (!strcmp(optarg, "human")) {
4895                 output_format = OFORMAT_HUMAN;
4896             } else {
4897                 error_report("--output must be used with human or json "
4898                              "as argument.");
4899                 goto out;
4900             }
4901             break;
4902         case OPTION_SIZE:
4903         {
4904             int64_t sval;
4905 
4906             sval = cvtnum(optarg);
4907             if (sval < 0) {
4908                 if (sval == -ERANGE) {
4909                     error_report("Image size must be less than 8 EiB!");
4910                 } else {
4911                     error_report("Invalid image size specified! You may use "
4912                                  "k, M, G, T, P or E suffixes for ");
4913                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4914                                  "petabytes and exabytes.");
4915                 }
4916                 goto out;
4917             }
4918             img_size = (uint64_t)sval;
4919         }
4920         break;
4921         }
4922     }
4923 
4924     if (qemu_opts_foreach(&qemu_object_opts,
4925                           user_creatable_add_opts_foreach,
4926                           qemu_img_object_print_help, &error_fatal)) {
4927         goto out;
4928     }
4929 
4930     if (argc - optind > 1) {
4931         error_report("At most one filename argument is allowed.");
4932         goto out;
4933     } else if (argc - optind == 1) {
4934         filename = argv[optind];
4935     }
4936 
4937     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
4938         error_report("--image-opts, -f, and -l require a filename argument.");
4939         goto out;
4940     }
4941     if (filename && img_size != UINT64_MAX) {
4942         error_report("--size N cannot be used together with a filename.");
4943         goto out;
4944     }
4945     if (!filename && img_size == UINT64_MAX) {
4946         error_report("Either --size N or one filename must be specified.");
4947         goto out;
4948     }
4949 
4950     if (filename) {
4951         in_blk = img_open(image_opts, filename, fmt, 0,
4952                           false, false, force_share);
4953         if (!in_blk) {
4954             goto out;
4955         }
4956 
4957         if (sn_opts) {
4958             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4959                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4960                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4961                     &local_err);
4962         } else if (snapshot_name != NULL) {
4963             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4964                     snapshot_name, &local_err);
4965         }
4966         if (local_err) {
4967             error_reportf_err(local_err, "Failed to load snapshot: ");
4968             goto out;
4969         }
4970     }
4971 
4972     drv = bdrv_find_format(out_fmt);
4973     if (!drv) {
4974         error_report("Unknown file format '%s'", out_fmt);
4975         goto out;
4976     }
4977     if (!drv->create_opts) {
4978         error_report("Format driver '%s' does not support image creation",
4979                      drv->format_name);
4980         goto out;
4981     }
4982 
4983     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4984     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4985     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4986     if (options) {
4987         qemu_opts_do_parse(opts, options, NULL, &local_err);
4988         if (local_err) {
4989             error_report_err(local_err);
4990             error_report("Invalid options for file format '%s'", out_fmt);
4991             goto out;
4992         }
4993     }
4994     if (img_size != UINT64_MAX) {
4995         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4996     }
4997 
4998     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
4999     if (local_err) {
5000         error_report_err(local_err);
5001         goto out;
5002     }
5003 
5004     if (output_format == OFORMAT_HUMAN) {
5005         printf("required size: %" PRIu64 "\n", info->required);
5006         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5007     } else {
5008         dump_json_block_measure_info(info);
5009     }
5010 
5011     ret = 0;
5012 
5013 out:
5014     qapi_free_BlockMeasureInfo(info);
5015     qemu_opts_del(object_opts);
5016     qemu_opts_del(opts);
5017     qemu_opts_del(sn_opts);
5018     qemu_opts_free(create_opts);
5019     g_free(options);
5020     blk_unref(in_blk);
5021     return ret;
5022 }
5023 
5024 static const img_cmd_t img_cmds[] = {
5025 #define DEF(option, callback, arg_string)        \
5026     { option, callback },
5027 #include "qemu-img-cmds.h"
5028 #undef DEF
5029     { NULL, NULL, },
5030 };
5031 
5032 int main(int argc, char **argv)
5033 {
5034     const img_cmd_t *cmd;
5035     const char *cmdname;
5036     Error *local_error = NULL;
5037     char *trace_file = NULL;
5038     int c;
5039     static const struct option long_options[] = {
5040         {"help", no_argument, 0, 'h'},
5041         {"version", no_argument, 0, 'V'},
5042         {"trace", required_argument, NULL, 'T'},
5043         {0, 0, 0, 0}
5044     };
5045 
5046 #ifdef CONFIG_POSIX
5047     signal(SIGPIPE, SIG_IGN);
5048 #endif
5049 
5050     error_init(argv[0]);
5051     module_call_init(MODULE_INIT_TRACE);
5052     qemu_init_exec_dir(argv[0]);
5053 
5054     if (qemu_init_main_loop(&local_error)) {
5055         error_report_err(local_error);
5056         exit(EXIT_FAILURE);
5057     }
5058 
5059     qcrypto_init(&error_fatal);
5060 
5061     module_call_init(MODULE_INIT_QOM);
5062     bdrv_init();
5063     if (argc < 2) {
5064         error_exit("Not enough arguments");
5065     }
5066 
5067     qemu_add_opts(&qemu_object_opts);
5068     qemu_add_opts(&qemu_source_opts);
5069     qemu_add_opts(&qemu_trace_opts);
5070 
5071     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5072         switch (c) {
5073         case ':':
5074             missing_argument(argv[optind - 1]);
5075             return 0;
5076         case '?':
5077             unrecognized_option(argv[optind - 1]);
5078             return 0;
5079         case 'h':
5080             help();
5081             return 0;
5082         case 'V':
5083             printf(QEMU_IMG_VERSION);
5084             return 0;
5085         case 'T':
5086             g_free(trace_file);
5087             trace_file = trace_opt_parse(optarg);
5088             break;
5089         }
5090     }
5091 
5092     cmdname = argv[optind];
5093 
5094     /* reset getopt_long scanning */
5095     argc -= optind;
5096     if (argc < 1) {
5097         return 0;
5098     }
5099     argv += optind;
5100     qemu_reset_optind();
5101 
5102     if (!trace_init_backends()) {
5103         exit(1);
5104     }
5105     trace_init_file(trace_file);
5106     qemu_set_log(LOG_TRACE);
5107 
5108     /* find the command */
5109     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5110         if (!strcmp(cmdname, cmd->name)) {
5111             return cmd->handler(argc, argv);
5112         }
5113     }
5114 
5115     /* not found */
5116     error_exit("Command not found: %s", cmdname);
5117 }
5118