xref: /qemu/qemu-img.c (revision c3e31eaa)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-version.h"
26 #include "qapi/error.h"
27 #include "qapi-visit.h"
28 #include "qapi/qobject-output-visitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qapi/qmp/qjson.h"
31 #include "qemu/cutils.h"
32 #include "qemu/config-file.h"
33 #include "qemu/option.h"
34 #include "qemu/error-report.h"
35 #include "qemu/log.h"
36 #include "qom/object_interfaces.h"
37 #include "sysemu/sysemu.h"
38 #include "sysemu/block-backend.h"
39 #include "block/block_int.h"
40 #include "block/blockjob.h"
41 #include "block/qapi.h"
42 #include "crypto/init.h"
43 #include "trace/control.h"
44 #include <getopt.h>
45 
46 #define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
47                           "\n" QEMU_COPYRIGHT "\n"
48 
49 typedef struct img_cmd_t {
50     const char *name;
51     int (*handler)(int argc, char **argv);
52 } img_cmd_t;
53 
54 enum {
55     OPTION_OUTPUT = 256,
56     OPTION_BACKING_CHAIN = 257,
57     OPTION_OBJECT = 258,
58     OPTION_IMAGE_OPTS = 259,
59     OPTION_PATTERN = 260,
60     OPTION_FLUSH_INTERVAL = 261,
61     OPTION_NO_DRAIN = 262,
62 };
63 
64 typedef enum OutputFormat {
65     OFORMAT_JSON,
66     OFORMAT_HUMAN,
67 } OutputFormat;
68 
69 /* Default to cache=writeback as data integrity is not important for qemu-img */
70 #define BDRV_DEFAULT_CACHE "writeback"
71 
72 static void format_print(void *opaque, const char *name)
73 {
74     printf(" %s", name);
75 }
76 
77 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
78 {
79     va_list ap;
80 
81     error_printf("qemu-img: ");
82 
83     va_start(ap, fmt);
84     error_vprintf(fmt, ap);
85     va_end(ap);
86 
87     error_printf("\nTry 'qemu-img --help' for more information\n");
88     exit(EXIT_FAILURE);
89 }
90 
91 /* Please keep in synch with qemu-img.texi */
92 static void QEMU_NORETURN help(void)
93 {
94     const char *help_msg =
95            QEMU_IMG_VERSION
96            "usage: qemu-img [standard options] command [command options]\n"
97            "QEMU disk image utility\n"
98            "\n"
99            "    '-h', '--help'       display this help and exit\n"
100            "    '-V', '--version'    output version information and exit\n"
101            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
102            "                         specify tracing options\n"
103            "\n"
104            "Command syntax:\n"
105 #define DEF(option, callback, arg_string)        \
106            "  " arg_string "\n"
107 #include "qemu-img-cmds.h"
108 #undef DEF
109 #undef GEN_DOCS
110            "\n"
111            "Command parameters:\n"
112            "  'filename' is a disk image filename\n"
113            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
114            "    manual page for a description of the object properties. The most common\n"
115            "    object type is a 'secret', which is used to supply passwords and/or\n"
116            "    encryption keys.\n"
117            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
118            "  'cache' is the cache mode used to write the output disk image, the valid\n"
119            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
120            "    'directsync' and 'unsafe' (default for convert)\n"
121            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
122            "    options are the same as for the 'cache' option\n"
123            "  'size' is the disk image size in bytes. Optional suffixes\n"
124            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
125            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
126            "    supported. 'b' is ignored.\n"
127            "  'output_filename' is the destination disk image filename\n"
128            "  'output_fmt' is the destination format\n"
129            "  'options' is a comma separated list of format specific options in a\n"
130            "    name=value format. Use -o ? for an overview of the options supported by the\n"
131            "    used format\n"
132            "  'snapshot_param' is param used for internal snapshot, format\n"
133            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
134            "    '[ID_OR_NAME]'\n"
135            "  'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n"
136            "    instead\n"
137            "  '-c' indicates that target image must be compressed (qcow format only)\n"
138            "  '-u' enables unsafe rebasing. It is assumed that old and new backing file\n"
139            "       match exactly. The image doesn't need a working backing file before\n"
140            "       rebasing in this case (useful for renaming the backing file)\n"
141            "  '-h' with or without a command shows this help and lists the supported formats\n"
142            "  '-p' show progress of command (only certain commands)\n"
143            "  '-q' use Quiet mode - do not print any output (except errors)\n"
144            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
145            "       contain only zeros for qemu-img to create a sparse image during\n"
146            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
147            "       unallocated or zero sectors, and the destination image will always be\n"
148            "       fully allocated\n"
149            "  '--output' takes the format in which the output must be done (human or json)\n"
150            "  '-n' skips the target volume creation (useful if the volume is created\n"
151            "       prior to running qemu-img)\n"
152            "\n"
153            "Parameters to check subcommand:\n"
154            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
155            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
156            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
157            "       hiding corruption that has already occurred.\n"
158            "\n"
159            "Parameters to convert subcommand:\n"
160            "  '-m' specifies how many coroutines work in parallel during the convert\n"
161            "       process (defaults to 8)\n"
162            "  '-W' allow to write to the target out of order rather than sequential\n"
163            "\n"
164            "Parameters to snapshot subcommand:\n"
165            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
166            "  '-a' applies a snapshot (revert disk to saved state)\n"
167            "  '-c' creates a snapshot\n"
168            "  '-d' deletes a snapshot\n"
169            "  '-l' lists all snapshots in the given image\n"
170            "\n"
171            "Parameters to compare subcommand:\n"
172            "  '-f' first image format\n"
173            "  '-F' second image format\n"
174            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
175            "\n"
176            "Parameters to dd subcommand:\n"
177            "  'bs=BYTES' read and write up to BYTES bytes at a time "
178            "(default: 512)\n"
179            "  'count=N' copy only N input blocks\n"
180            "  'if=FILE' read from FILE\n"
181            "  'of=FILE' write to FILE\n"
182            "  'skip=N' skip N bs-sized blocks at the start of input\n";
183 
184     printf("%s\nSupported formats:", help_msg);
185     bdrv_iterate_format(format_print, NULL);
186     printf("\n");
187     exit(EXIT_SUCCESS);
188 }
189 
190 static QemuOptsList qemu_object_opts = {
191     .name = "object",
192     .implied_opt_name = "qom-type",
193     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
194     .desc = {
195         { }
196     },
197 };
198 
199 static QemuOptsList qemu_source_opts = {
200     .name = "source",
201     .implied_opt_name = "file",
202     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
203     .desc = {
204         { }
205     },
206 };
207 
208 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
209 {
210     int ret = 0;
211     if (!quiet) {
212         va_list args;
213         va_start(args, fmt);
214         ret = vprintf(fmt, args);
215         va_end(args);
216     }
217     return ret;
218 }
219 
220 
221 static int print_block_option_help(const char *filename, const char *fmt)
222 {
223     BlockDriver *drv, *proto_drv;
224     QemuOptsList *create_opts = NULL;
225     Error *local_err = NULL;
226 
227     /* Find driver and parse its options */
228     drv = bdrv_find_format(fmt);
229     if (!drv) {
230         error_report("Unknown file format '%s'", fmt);
231         return 1;
232     }
233 
234     create_opts = qemu_opts_append(create_opts, drv->create_opts);
235     if (filename) {
236         proto_drv = bdrv_find_protocol(filename, true, &local_err);
237         if (!proto_drv) {
238             error_report_err(local_err);
239             qemu_opts_free(create_opts);
240             return 1;
241         }
242         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
243     }
244 
245     qemu_opts_print_help(create_opts);
246     qemu_opts_free(create_opts);
247     return 0;
248 }
249 
250 
251 static int img_open_password(BlockBackend *blk, const char *filename,
252                              int flags, bool quiet)
253 {
254     BlockDriverState *bs;
255     char password[256];
256 
257     bs = blk_bs(blk);
258     if (bdrv_is_encrypted(bs) && bdrv_key_required(bs) &&
259         !(flags & BDRV_O_NO_IO)) {
260         qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
261         if (qemu_read_password(password, sizeof(password)) < 0) {
262             error_report("No password given");
263             return -1;
264         }
265         if (bdrv_set_key(bs, password) < 0) {
266             error_report("invalid password");
267             return -1;
268         }
269     }
270     return 0;
271 }
272 
273 
274 static BlockBackend *img_open_opts(const char *optstr,
275                                    QemuOpts *opts, int flags, bool writethrough,
276                                    bool quiet)
277 {
278     QDict *options;
279     Error *local_err = NULL;
280     BlockBackend *blk;
281     options = qemu_opts_to_qdict(opts, NULL);
282     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
283     if (!blk) {
284         error_reportf_err(local_err, "Could not open '%s': ", optstr);
285         return NULL;
286     }
287     blk_set_enable_write_cache(blk, !writethrough);
288 
289     if (img_open_password(blk, optstr, flags, quiet) < 0) {
290         blk_unref(blk);
291         return NULL;
292     }
293     return blk;
294 }
295 
296 static BlockBackend *img_open_file(const char *filename,
297                                    const char *fmt, int flags,
298                                    bool writethrough, bool quiet)
299 {
300     BlockBackend *blk;
301     Error *local_err = NULL;
302     QDict *options = NULL;
303 
304     if (fmt) {
305         options = qdict_new();
306         qdict_put(options, "driver", qstring_from_str(fmt));
307     }
308 
309     blk = blk_new_open(filename, NULL, options, flags, &local_err);
310     if (!blk) {
311         error_reportf_err(local_err, "Could not open '%s': ", filename);
312         return NULL;
313     }
314     blk_set_enable_write_cache(blk, !writethrough);
315 
316     if (img_open_password(blk, filename, flags, quiet) < 0) {
317         blk_unref(blk);
318         return NULL;
319     }
320     return blk;
321 }
322 
323 
324 static BlockBackend *img_open(bool image_opts,
325                               const char *filename,
326                               const char *fmt, int flags, bool writethrough,
327                               bool quiet)
328 {
329     BlockBackend *blk;
330     if (image_opts) {
331         QemuOpts *opts;
332         if (fmt) {
333             error_report("--image-opts and --format are mutually exclusive");
334             return NULL;
335         }
336         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
337                                        filename, true);
338         if (!opts) {
339             return NULL;
340         }
341         blk = img_open_opts(filename, opts, flags, writethrough, quiet);
342     } else {
343         blk = img_open_file(filename, fmt, flags, writethrough, quiet);
344     }
345     return blk;
346 }
347 
348 
349 static int add_old_style_options(const char *fmt, QemuOpts *opts,
350                                  const char *base_filename,
351                                  const char *base_fmt)
352 {
353     Error *err = NULL;
354 
355     if (base_filename) {
356         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
357         if (err) {
358             error_report("Backing file not supported for file format '%s'",
359                          fmt);
360             error_free(err);
361             return -1;
362         }
363     }
364     if (base_fmt) {
365         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
366         if (err) {
367             error_report("Backing file format not supported for file "
368                          "format '%s'", fmt);
369             error_free(err);
370             return -1;
371         }
372     }
373     return 0;
374 }
375 
376 static int64_t cvtnum(const char *s)
377 {
378     int err;
379     uint64_t value;
380 
381     err = qemu_strtosz(s, NULL, &value);
382     if (err < 0) {
383         return err;
384     }
385     if (value > INT64_MAX) {
386         return -ERANGE;
387     }
388     return value;
389 }
390 
391 static int img_create(int argc, char **argv)
392 {
393     int c;
394     uint64_t img_size = -1;
395     const char *fmt = "raw";
396     const char *base_fmt = NULL;
397     const char *filename;
398     const char *base_filename = NULL;
399     char *options = NULL;
400     Error *local_err = NULL;
401     bool quiet = false;
402 
403     for(;;) {
404         static const struct option long_options[] = {
405             {"help", no_argument, 0, 'h'},
406             {"object", required_argument, 0, OPTION_OBJECT},
407             {0, 0, 0, 0}
408         };
409         c = getopt_long(argc, argv, "F:b:f:he6o:q",
410                         long_options, NULL);
411         if (c == -1) {
412             break;
413         }
414         switch(c) {
415         case '?':
416         case 'h':
417             help();
418             break;
419         case 'F':
420             base_fmt = optarg;
421             break;
422         case 'b':
423             base_filename = optarg;
424             break;
425         case 'f':
426             fmt = optarg;
427             break;
428         case 'e':
429             error_report("option -e is deprecated, please use \'-o "
430                   "encryption\' instead!");
431             goto fail;
432         case '6':
433             error_report("option -6 is deprecated, please use \'-o "
434                   "compat6\' instead!");
435             goto fail;
436         case 'o':
437             if (!is_valid_option_list(optarg)) {
438                 error_report("Invalid option list: %s", optarg);
439                 goto fail;
440             }
441             if (!options) {
442                 options = g_strdup(optarg);
443             } else {
444                 char *old_options = options;
445                 options = g_strdup_printf("%s,%s", options, optarg);
446                 g_free(old_options);
447             }
448             break;
449         case 'q':
450             quiet = true;
451             break;
452         case OPTION_OBJECT: {
453             QemuOpts *opts;
454             opts = qemu_opts_parse_noisily(&qemu_object_opts,
455                                            optarg, true);
456             if (!opts) {
457                 goto fail;
458             }
459         }   break;
460         }
461     }
462 
463     /* Get the filename */
464     filename = (optind < argc) ? argv[optind] : NULL;
465     if (options && has_help_option(options)) {
466         g_free(options);
467         return print_block_option_help(filename, fmt);
468     }
469 
470     if (optind >= argc) {
471         error_exit("Expecting image file name");
472     }
473     optind++;
474 
475     if (qemu_opts_foreach(&qemu_object_opts,
476                           user_creatable_add_opts_foreach,
477                           NULL, NULL)) {
478         goto fail;
479     }
480 
481     /* Get image size, if specified */
482     if (optind < argc) {
483         int64_t sval;
484 
485         sval = cvtnum(argv[optind++]);
486         if (sval < 0) {
487             if (sval == -ERANGE) {
488                 error_report("Image size must be less than 8 EiB!");
489             } else {
490                 error_report("Invalid image size specified! You may use k, M, "
491                       "G, T, P or E suffixes for ");
492                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
493                              "petabytes and exabytes.");
494             }
495             goto fail;
496         }
497         img_size = (uint64_t)sval;
498     }
499     if (optind != argc) {
500         error_exit("Unexpected argument: %s", argv[optind]);
501     }
502 
503     bdrv_img_create(filename, fmt, base_filename, base_fmt,
504                     options, img_size, 0, &local_err, quiet);
505     if (local_err) {
506         error_reportf_err(local_err, "%s: ", filename);
507         goto fail;
508     }
509 
510     g_free(options);
511     return 0;
512 
513 fail:
514     g_free(options);
515     return 1;
516 }
517 
518 static void dump_json_image_check(ImageCheck *check, bool quiet)
519 {
520     QString *str;
521     QObject *obj;
522     Visitor *v = qobject_output_visitor_new(&obj);
523 
524     visit_type_ImageCheck(v, NULL, &check, &error_abort);
525     visit_complete(v, &obj);
526     str = qobject_to_json_pretty(obj);
527     assert(str != NULL);
528     qprintf(quiet, "%s\n", qstring_get_str(str));
529     qobject_decref(obj);
530     visit_free(v);
531     QDECREF(str);
532 }
533 
534 static void dump_human_image_check(ImageCheck *check, bool quiet)
535 {
536     if (!(check->corruptions || check->leaks || check->check_errors)) {
537         qprintf(quiet, "No errors were found on the image.\n");
538     } else {
539         if (check->corruptions) {
540             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
541                     "Data may be corrupted, or further writes to the image "
542                     "may corrupt it.\n",
543                     check->corruptions);
544         }
545 
546         if (check->leaks) {
547             qprintf(quiet,
548                     "\n%" PRId64 " leaked clusters were found on the image.\n"
549                     "This means waste of disk space, but no harm to data.\n",
550                     check->leaks);
551         }
552 
553         if (check->check_errors) {
554             qprintf(quiet,
555                     "\n%" PRId64
556                     " internal errors have occurred during the check.\n",
557                     check->check_errors);
558         }
559     }
560 
561     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
562         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
563                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
564                 check->allocated_clusters, check->total_clusters,
565                 check->allocated_clusters * 100.0 / check->total_clusters,
566                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
567                 check->compressed_clusters * 100.0 /
568                 check->allocated_clusters);
569     }
570 
571     if (check->image_end_offset) {
572         qprintf(quiet,
573                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
574     }
575 }
576 
577 static int collect_image_check(BlockDriverState *bs,
578                    ImageCheck *check,
579                    const char *filename,
580                    const char *fmt,
581                    int fix)
582 {
583     int ret;
584     BdrvCheckResult result;
585 
586     ret = bdrv_check(bs, &result, fix);
587     if (ret < 0) {
588         return ret;
589     }
590 
591     check->filename                 = g_strdup(filename);
592     check->format                   = g_strdup(bdrv_get_format_name(bs));
593     check->check_errors             = result.check_errors;
594     check->corruptions              = result.corruptions;
595     check->has_corruptions          = result.corruptions != 0;
596     check->leaks                    = result.leaks;
597     check->has_leaks                = result.leaks != 0;
598     check->corruptions_fixed        = result.corruptions_fixed;
599     check->has_corruptions_fixed    = result.corruptions != 0;
600     check->leaks_fixed              = result.leaks_fixed;
601     check->has_leaks_fixed          = result.leaks != 0;
602     check->image_end_offset         = result.image_end_offset;
603     check->has_image_end_offset     = result.image_end_offset != 0;
604     check->total_clusters           = result.bfi.total_clusters;
605     check->has_total_clusters       = result.bfi.total_clusters != 0;
606     check->allocated_clusters       = result.bfi.allocated_clusters;
607     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
608     check->fragmented_clusters      = result.bfi.fragmented_clusters;
609     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
610     check->compressed_clusters      = result.bfi.compressed_clusters;
611     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
612 
613     return 0;
614 }
615 
616 /*
617  * Checks an image for consistency. Exit codes:
618  *
619  *  0 - Check completed, image is good
620  *  1 - Check not completed because of internal errors
621  *  2 - Check completed, image is corrupted
622  *  3 - Check completed, image has leaked clusters, but is good otherwise
623  * 63 - Checks are not supported by the image format
624  */
625 static int img_check(int argc, char **argv)
626 {
627     int c, ret;
628     OutputFormat output_format = OFORMAT_HUMAN;
629     const char *filename, *fmt, *output, *cache;
630     BlockBackend *blk;
631     BlockDriverState *bs;
632     int fix = 0;
633     int flags = BDRV_O_CHECK;
634     bool writethrough;
635     ImageCheck *check;
636     bool quiet = false;
637     bool image_opts = false;
638 
639     fmt = NULL;
640     output = NULL;
641     cache = BDRV_DEFAULT_CACHE;
642 
643     for(;;) {
644         int option_index = 0;
645         static const struct option long_options[] = {
646             {"help", no_argument, 0, 'h'},
647             {"format", required_argument, 0, 'f'},
648             {"repair", required_argument, 0, 'r'},
649             {"output", required_argument, 0, OPTION_OUTPUT},
650             {"object", required_argument, 0, OPTION_OBJECT},
651             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
652             {0, 0, 0, 0}
653         };
654         c = getopt_long(argc, argv, "hf:r:T:q",
655                         long_options, &option_index);
656         if (c == -1) {
657             break;
658         }
659         switch(c) {
660         case '?':
661         case 'h':
662             help();
663             break;
664         case 'f':
665             fmt = optarg;
666             break;
667         case 'r':
668             flags |= BDRV_O_RDWR;
669 
670             if (!strcmp(optarg, "leaks")) {
671                 fix = BDRV_FIX_LEAKS;
672             } else if (!strcmp(optarg, "all")) {
673                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
674             } else {
675                 error_exit("Unknown option value for -r "
676                            "(expecting 'leaks' or 'all'): %s", optarg);
677             }
678             break;
679         case OPTION_OUTPUT:
680             output = optarg;
681             break;
682         case 'T':
683             cache = optarg;
684             break;
685         case 'q':
686             quiet = true;
687             break;
688         case OPTION_OBJECT: {
689             QemuOpts *opts;
690             opts = qemu_opts_parse_noisily(&qemu_object_opts,
691                                            optarg, true);
692             if (!opts) {
693                 return 1;
694             }
695         }   break;
696         case OPTION_IMAGE_OPTS:
697             image_opts = true;
698             break;
699         }
700     }
701     if (optind != argc - 1) {
702         error_exit("Expecting one image file name");
703     }
704     filename = argv[optind++];
705 
706     if (output && !strcmp(output, "json")) {
707         output_format = OFORMAT_JSON;
708     } else if (output && !strcmp(output, "human")) {
709         output_format = OFORMAT_HUMAN;
710     } else if (output) {
711         error_report("--output must be used with human or json as argument.");
712         return 1;
713     }
714 
715     if (qemu_opts_foreach(&qemu_object_opts,
716                           user_creatable_add_opts_foreach,
717                           NULL, NULL)) {
718         return 1;
719     }
720 
721     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
722     if (ret < 0) {
723         error_report("Invalid source cache option: %s", cache);
724         return 1;
725     }
726 
727     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
728     if (!blk) {
729         return 1;
730     }
731     bs = blk_bs(blk);
732 
733     check = g_new0(ImageCheck, 1);
734     ret = collect_image_check(bs, check, filename, fmt, fix);
735 
736     if (ret == -ENOTSUP) {
737         error_report("This image format does not support checks");
738         ret = 63;
739         goto fail;
740     }
741 
742     if (check->corruptions_fixed || check->leaks_fixed) {
743         int corruptions_fixed, leaks_fixed;
744 
745         leaks_fixed         = check->leaks_fixed;
746         corruptions_fixed   = check->corruptions_fixed;
747 
748         if (output_format == OFORMAT_HUMAN) {
749             qprintf(quiet,
750                     "The following inconsistencies were found and repaired:\n\n"
751                     "    %" PRId64 " leaked clusters\n"
752                     "    %" PRId64 " corruptions\n\n"
753                     "Double checking the fixed image now...\n",
754                     check->leaks_fixed,
755                     check->corruptions_fixed);
756         }
757 
758         ret = collect_image_check(bs, check, filename, fmt, 0);
759 
760         check->leaks_fixed          = leaks_fixed;
761         check->corruptions_fixed    = corruptions_fixed;
762     }
763 
764     if (!ret) {
765         switch (output_format) {
766         case OFORMAT_HUMAN:
767             dump_human_image_check(check, quiet);
768             break;
769         case OFORMAT_JSON:
770             dump_json_image_check(check, quiet);
771             break;
772         }
773     }
774 
775     if (ret || check->check_errors) {
776         if (ret) {
777             error_report("Check failed: %s", strerror(-ret));
778         } else {
779             error_report("Check failed");
780         }
781         ret = 1;
782         goto fail;
783     }
784 
785     if (check->corruptions) {
786         ret = 2;
787     } else if (check->leaks) {
788         ret = 3;
789     } else {
790         ret = 0;
791     }
792 
793 fail:
794     qapi_free_ImageCheck(check);
795     blk_unref(blk);
796     return ret;
797 }
798 
799 typedef struct CommonBlockJobCBInfo {
800     BlockDriverState *bs;
801     Error **errp;
802 } CommonBlockJobCBInfo;
803 
804 static void common_block_job_cb(void *opaque, int ret)
805 {
806     CommonBlockJobCBInfo *cbi = opaque;
807 
808     if (ret < 0) {
809         error_setg_errno(cbi->errp, -ret, "Block job failed");
810     }
811 }
812 
813 static void run_block_job(BlockJob *job, Error **errp)
814 {
815     AioContext *aio_context = blk_get_aio_context(job->blk);
816 
817     /* FIXME In error cases, the job simply goes away and we access a dangling
818      * pointer below. */
819     aio_context_acquire(aio_context);
820     do {
821         aio_poll(aio_context, true);
822         qemu_progress_print(job->len ?
823                             ((float)job->offset / job->len * 100.f) : 0.0f, 0);
824     } while (!job->ready);
825 
826     block_job_complete_sync(job, errp);
827     aio_context_release(aio_context);
828 
829     /* A block job may finish instantaneously without publishing any progress,
830      * so just signal completion here */
831     qemu_progress_print(100.f, 0);
832 }
833 
834 static int img_commit(int argc, char **argv)
835 {
836     int c, ret, flags;
837     const char *filename, *fmt, *cache, *base;
838     BlockBackend *blk;
839     BlockDriverState *bs, *base_bs;
840     BlockJob *job;
841     bool progress = false, quiet = false, drop = false;
842     bool writethrough;
843     Error *local_err = NULL;
844     CommonBlockJobCBInfo cbi;
845     bool image_opts = false;
846     AioContext *aio_context;
847 
848     fmt = NULL;
849     cache = BDRV_DEFAULT_CACHE;
850     base = NULL;
851     for(;;) {
852         static const struct option long_options[] = {
853             {"help", no_argument, 0, 'h'},
854             {"object", required_argument, 0, OPTION_OBJECT},
855             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
856             {0, 0, 0, 0}
857         };
858         c = getopt_long(argc, argv, "f:ht:b:dpq",
859                         long_options, NULL);
860         if (c == -1) {
861             break;
862         }
863         switch(c) {
864         case '?':
865         case 'h':
866             help();
867             break;
868         case 'f':
869             fmt = optarg;
870             break;
871         case 't':
872             cache = optarg;
873             break;
874         case 'b':
875             base = optarg;
876             /* -b implies -d */
877             drop = true;
878             break;
879         case 'd':
880             drop = true;
881             break;
882         case 'p':
883             progress = true;
884             break;
885         case 'q':
886             quiet = true;
887             break;
888         case OPTION_OBJECT: {
889             QemuOpts *opts;
890             opts = qemu_opts_parse_noisily(&qemu_object_opts,
891                                            optarg, true);
892             if (!opts) {
893                 return 1;
894             }
895         }   break;
896         case OPTION_IMAGE_OPTS:
897             image_opts = true;
898             break;
899         }
900     }
901 
902     /* Progress is not shown in Quiet mode */
903     if (quiet) {
904         progress = false;
905     }
906 
907     if (optind != argc - 1) {
908         error_exit("Expecting one image file name");
909     }
910     filename = argv[optind++];
911 
912     if (qemu_opts_foreach(&qemu_object_opts,
913                           user_creatable_add_opts_foreach,
914                           NULL, NULL)) {
915         return 1;
916     }
917 
918     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
919     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
920     if (ret < 0) {
921         error_report("Invalid cache option: %s", cache);
922         return 1;
923     }
924 
925     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
926     if (!blk) {
927         return 1;
928     }
929     bs = blk_bs(blk);
930 
931     qemu_progress_init(progress, 1.f);
932     qemu_progress_print(0.f, 100);
933 
934     if (base) {
935         base_bs = bdrv_find_backing_image(bs, base);
936         if (!base_bs) {
937             error_setg(&local_err,
938                        "Did not find '%s' in the backing chain of '%s'",
939                        base, filename);
940             goto done;
941         }
942     } else {
943         /* This is different from QMP, which by default uses the deepest file in
944          * the backing chain (i.e., the very base); however, the traditional
945          * behavior of qemu-img commit is using the immediate backing file. */
946         base_bs = backing_bs(bs);
947         if (!base_bs) {
948             error_setg(&local_err, "Image does not have a backing file");
949             goto done;
950         }
951     }
952 
953     cbi = (CommonBlockJobCBInfo){
954         .errp = &local_err,
955         .bs   = bs,
956     };
957 
958     aio_context = bdrv_get_aio_context(bs);
959     aio_context_acquire(aio_context);
960     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
961                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
962                         &cbi, &local_err, false);
963     aio_context_release(aio_context);
964     if (local_err) {
965         goto done;
966     }
967 
968     /* When the block job completes, the BlockBackend reference will point to
969      * the old backing file. In order to avoid that the top image is already
970      * deleted, so we can still empty it afterwards, increment the reference
971      * counter here preemptively. */
972     if (!drop) {
973         bdrv_ref(bs);
974     }
975 
976     job = block_job_get("commit");
977     run_block_job(job, &local_err);
978     if (local_err) {
979         goto unref_backing;
980     }
981 
982     if (!drop && bs->drv->bdrv_make_empty) {
983         ret = bs->drv->bdrv_make_empty(bs);
984         if (ret) {
985             error_setg_errno(&local_err, -ret, "Could not empty %s",
986                              filename);
987             goto unref_backing;
988         }
989     }
990 
991 unref_backing:
992     if (!drop) {
993         bdrv_unref(bs);
994     }
995 
996 done:
997     qemu_progress_end();
998 
999     blk_unref(blk);
1000 
1001     if (local_err) {
1002         error_report_err(local_err);
1003         return 1;
1004     }
1005 
1006     qprintf(quiet, "Image committed.\n");
1007     return 0;
1008 }
1009 
1010 /*
1011  * Returns true iff the first sector pointed to by 'buf' contains at least
1012  * a non-NUL byte.
1013  *
1014  * 'pnum' is set to the number of sectors (including and immediately following
1015  * the first one) that are known to be in the same allocated/unallocated state.
1016  */
1017 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
1018 {
1019     bool is_zero;
1020     int i;
1021 
1022     if (n <= 0) {
1023         *pnum = 0;
1024         return 0;
1025     }
1026     is_zero = buffer_is_zero(buf, 512);
1027     for(i = 1; i < n; i++) {
1028         buf += 512;
1029         if (is_zero != buffer_is_zero(buf, 512)) {
1030             break;
1031         }
1032     }
1033     *pnum = i;
1034     return !is_zero;
1035 }
1036 
1037 /*
1038  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1039  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1040  * breaking up write requests for only small sparse areas.
1041  */
1042 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1043     int min)
1044 {
1045     int ret;
1046     int num_checked, num_used;
1047 
1048     if (n < min) {
1049         min = n;
1050     }
1051 
1052     ret = is_allocated_sectors(buf, n, pnum);
1053     if (!ret) {
1054         return ret;
1055     }
1056 
1057     num_used = *pnum;
1058     buf += BDRV_SECTOR_SIZE * *pnum;
1059     n -= *pnum;
1060     num_checked = num_used;
1061 
1062     while (n > 0) {
1063         ret = is_allocated_sectors(buf, n, pnum);
1064 
1065         buf += BDRV_SECTOR_SIZE * *pnum;
1066         n -= *pnum;
1067         num_checked += *pnum;
1068         if (ret) {
1069             num_used = num_checked;
1070         } else if (*pnum >= min) {
1071             break;
1072         }
1073     }
1074 
1075     *pnum = num_used;
1076     return 1;
1077 }
1078 
1079 /*
1080  * Compares two buffers sector by sector. Returns 0 if the first sector of both
1081  * buffers matches, non-zero otherwise.
1082  *
1083  * pnum is set to the number of sectors (including and immediately following
1084  * the first one) that are known to have the same comparison result
1085  */
1086 static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
1087     int *pnum)
1088 {
1089     bool res;
1090     int i;
1091 
1092     if (n <= 0) {
1093         *pnum = 0;
1094         return 0;
1095     }
1096 
1097     res = !!memcmp(buf1, buf2, 512);
1098     for(i = 1; i < n; i++) {
1099         buf1 += 512;
1100         buf2 += 512;
1101 
1102         if (!!memcmp(buf1, buf2, 512) != res) {
1103             break;
1104         }
1105     }
1106 
1107     *pnum = i;
1108     return res;
1109 }
1110 
1111 #define IO_BUF_SIZE (2 * 1024 * 1024)
1112 
1113 static int64_t sectors_to_bytes(int64_t sectors)
1114 {
1115     return sectors << BDRV_SECTOR_BITS;
1116 }
1117 
1118 static int64_t sectors_to_process(int64_t total, int64_t from)
1119 {
1120     return MIN(total - from, IO_BUF_SIZE >> BDRV_SECTOR_BITS);
1121 }
1122 
1123 /*
1124  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1125  *
1126  * Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero
1127  * data and negative value on error.
1128  *
1129  * @param blk:  BlockBackend for the image
1130  * @param sect_num: Number of first sector to check
1131  * @param sect_count: Number of sectors to check
1132  * @param filename: Name of disk file we are checking (logging purpose)
1133  * @param buffer: Allocated buffer for storing read data
1134  * @param quiet: Flag for quiet mode
1135  */
1136 static int check_empty_sectors(BlockBackend *blk, int64_t sect_num,
1137                                int sect_count, const char *filename,
1138                                uint8_t *buffer, bool quiet)
1139 {
1140     int pnum, ret = 0;
1141     ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
1142                     sect_count << BDRV_SECTOR_BITS);
1143     if (ret < 0) {
1144         error_report("Error while reading offset %" PRId64 " of %s: %s",
1145                      sectors_to_bytes(sect_num), filename, strerror(-ret));
1146         return ret;
1147     }
1148     ret = is_allocated_sectors(buffer, sect_count, &pnum);
1149     if (ret || pnum != sect_count) {
1150         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1151                 sectors_to_bytes(ret ? sect_num : sect_num + pnum));
1152         return 1;
1153     }
1154 
1155     return 0;
1156 }
1157 
1158 /*
1159  * Compares two images. Exit codes:
1160  *
1161  * 0 - Images are identical
1162  * 1 - Images differ
1163  * >1 - Error occurred
1164  */
1165 static int img_compare(int argc, char **argv)
1166 {
1167     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1168     BlockBackend *blk1, *blk2;
1169     BlockDriverState *bs1, *bs2;
1170     int64_t total_sectors1, total_sectors2;
1171     uint8_t *buf1 = NULL, *buf2 = NULL;
1172     int pnum1, pnum2;
1173     int allocated1, allocated2;
1174     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1175     bool progress = false, quiet = false, strict = false;
1176     int flags;
1177     bool writethrough;
1178     int64_t total_sectors;
1179     int64_t sector_num = 0;
1180     int64_t nb_sectors;
1181     int c, pnum;
1182     uint64_t progress_base;
1183     bool image_opts = false;
1184 
1185     cache = BDRV_DEFAULT_CACHE;
1186     for (;;) {
1187         static const struct option long_options[] = {
1188             {"help", no_argument, 0, 'h'},
1189             {"object", required_argument, 0, OPTION_OBJECT},
1190             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1191             {0, 0, 0, 0}
1192         };
1193         c = getopt_long(argc, argv, "hf:F:T:pqs",
1194                         long_options, NULL);
1195         if (c == -1) {
1196             break;
1197         }
1198         switch (c) {
1199         case '?':
1200         case 'h':
1201             help();
1202             break;
1203         case 'f':
1204             fmt1 = optarg;
1205             break;
1206         case 'F':
1207             fmt2 = optarg;
1208             break;
1209         case 'T':
1210             cache = optarg;
1211             break;
1212         case 'p':
1213             progress = true;
1214             break;
1215         case 'q':
1216             quiet = true;
1217             break;
1218         case 's':
1219             strict = true;
1220             break;
1221         case OPTION_OBJECT: {
1222             QemuOpts *opts;
1223             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1224                                            optarg, true);
1225             if (!opts) {
1226                 ret = 2;
1227                 goto out4;
1228             }
1229         }   break;
1230         case OPTION_IMAGE_OPTS:
1231             image_opts = true;
1232             break;
1233         }
1234     }
1235 
1236     /* Progress is not shown in Quiet mode */
1237     if (quiet) {
1238         progress = false;
1239     }
1240 
1241 
1242     if (optind != argc - 2) {
1243         error_exit("Expecting two image file names");
1244     }
1245     filename1 = argv[optind++];
1246     filename2 = argv[optind++];
1247 
1248     if (qemu_opts_foreach(&qemu_object_opts,
1249                           user_creatable_add_opts_foreach,
1250                           NULL, NULL)) {
1251         ret = 2;
1252         goto out4;
1253     }
1254 
1255     /* Initialize before goto out */
1256     qemu_progress_init(progress, 2.0);
1257 
1258     flags = 0;
1259     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1260     if (ret < 0) {
1261         error_report("Invalid source cache option: %s", cache);
1262         ret = 2;
1263         goto out3;
1264     }
1265 
1266     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet);
1267     if (!blk1) {
1268         ret = 2;
1269         goto out3;
1270     }
1271 
1272     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet);
1273     if (!blk2) {
1274         ret = 2;
1275         goto out2;
1276     }
1277     bs1 = blk_bs(blk1);
1278     bs2 = blk_bs(blk2);
1279 
1280     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1281     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1282     total_sectors1 = blk_nb_sectors(blk1);
1283     if (total_sectors1 < 0) {
1284         error_report("Can't get size of %s: %s",
1285                      filename1, strerror(-total_sectors1));
1286         ret = 4;
1287         goto out;
1288     }
1289     total_sectors2 = blk_nb_sectors(blk2);
1290     if (total_sectors2 < 0) {
1291         error_report("Can't get size of %s: %s",
1292                      filename2, strerror(-total_sectors2));
1293         ret = 4;
1294         goto out;
1295     }
1296     total_sectors = MIN(total_sectors1, total_sectors2);
1297     progress_base = MAX(total_sectors1, total_sectors2);
1298 
1299     qemu_progress_print(0, 100);
1300 
1301     if (strict && total_sectors1 != total_sectors2) {
1302         ret = 1;
1303         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1304         goto out;
1305     }
1306 
1307     for (;;) {
1308         int64_t status1, status2;
1309         BlockDriverState *file;
1310 
1311         nb_sectors = sectors_to_process(total_sectors, sector_num);
1312         if (nb_sectors <= 0) {
1313             break;
1314         }
1315         status1 = bdrv_get_block_status_above(bs1, NULL, sector_num,
1316                                               total_sectors1 - sector_num,
1317                                               &pnum1, &file);
1318         if (status1 < 0) {
1319             ret = 3;
1320             error_report("Sector allocation test failed for %s", filename1);
1321             goto out;
1322         }
1323         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1324 
1325         status2 = bdrv_get_block_status_above(bs2, NULL, sector_num,
1326                                               total_sectors2 - sector_num,
1327                                               &pnum2, &file);
1328         if (status2 < 0) {
1329             ret = 3;
1330             error_report("Sector allocation test failed for %s", filename2);
1331             goto out;
1332         }
1333         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1334         if (pnum1) {
1335             nb_sectors = MIN(nb_sectors, pnum1);
1336         }
1337         if (pnum2) {
1338             nb_sectors = MIN(nb_sectors, pnum2);
1339         }
1340 
1341         if (strict) {
1342             if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) !=
1343                 (status2 & ~BDRV_BLOCK_OFFSET_MASK)) {
1344                 ret = 1;
1345                 qprintf(quiet, "Strict mode: Offset %" PRId64
1346                         " block status mismatch!\n",
1347                         sectors_to_bytes(sector_num));
1348                 goto out;
1349             }
1350         }
1351         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1352             nb_sectors = MIN(pnum1, pnum2);
1353         } else if (allocated1 == allocated2) {
1354             if (allocated1) {
1355                 ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
1356                                 nb_sectors << BDRV_SECTOR_BITS);
1357                 if (ret < 0) {
1358                     error_report("Error while reading offset %" PRId64 " of %s:"
1359                                  " %s", sectors_to_bytes(sector_num), filename1,
1360                                  strerror(-ret));
1361                     ret = 4;
1362                     goto out;
1363                 }
1364                 ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
1365                                 nb_sectors << BDRV_SECTOR_BITS);
1366                 if (ret < 0) {
1367                     error_report("Error while reading offset %" PRId64
1368                                  " of %s: %s", sectors_to_bytes(sector_num),
1369                                  filename2, strerror(-ret));
1370                     ret = 4;
1371                     goto out;
1372                 }
1373                 ret = compare_sectors(buf1, buf2, nb_sectors, &pnum);
1374                 if (ret || pnum != nb_sectors) {
1375                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1376                             sectors_to_bytes(
1377                                 ret ? sector_num : sector_num + pnum));
1378                     ret = 1;
1379                     goto out;
1380                 }
1381             }
1382         } else {
1383 
1384             if (allocated1) {
1385                 ret = check_empty_sectors(blk1, sector_num, nb_sectors,
1386                                           filename1, buf1, quiet);
1387             } else {
1388                 ret = check_empty_sectors(blk2, sector_num, nb_sectors,
1389                                           filename2, buf1, quiet);
1390             }
1391             if (ret) {
1392                 if (ret < 0) {
1393                     error_report("Error while reading offset %" PRId64 ": %s",
1394                                  sectors_to_bytes(sector_num), strerror(-ret));
1395                     ret = 4;
1396                 }
1397                 goto out;
1398             }
1399         }
1400         sector_num += nb_sectors;
1401         qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1402     }
1403 
1404     if (total_sectors1 != total_sectors2) {
1405         BlockBackend *blk_over;
1406         int64_t total_sectors_over;
1407         const char *filename_over;
1408 
1409         qprintf(quiet, "Warning: Image size mismatch!\n");
1410         if (total_sectors1 > total_sectors2) {
1411             total_sectors_over = total_sectors1;
1412             blk_over = blk1;
1413             filename_over = filename1;
1414         } else {
1415             total_sectors_over = total_sectors2;
1416             blk_over = blk2;
1417             filename_over = filename2;
1418         }
1419 
1420         for (;;) {
1421             nb_sectors = sectors_to_process(total_sectors_over, sector_num);
1422             if (nb_sectors <= 0) {
1423                 break;
1424             }
1425             ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
1426                                           nb_sectors, &pnum);
1427             if (ret < 0) {
1428                 ret = 3;
1429                 error_report("Sector allocation test failed for %s",
1430                              filename_over);
1431                 goto out;
1432 
1433             }
1434             nb_sectors = pnum;
1435             if (ret) {
1436                 ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
1437                                           filename_over, buf1, quiet);
1438                 if (ret) {
1439                     if (ret < 0) {
1440                         error_report("Error while reading offset %" PRId64
1441                                      " of %s: %s", sectors_to_bytes(sector_num),
1442                                      filename_over, strerror(-ret));
1443                         ret = 4;
1444                     }
1445                     goto out;
1446                 }
1447             }
1448             sector_num += nb_sectors;
1449             qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1450         }
1451     }
1452 
1453     qprintf(quiet, "Images are identical.\n");
1454     ret = 0;
1455 
1456 out:
1457     qemu_vfree(buf1);
1458     qemu_vfree(buf2);
1459     blk_unref(blk2);
1460 out2:
1461     blk_unref(blk1);
1462 out3:
1463     qemu_progress_end();
1464 out4:
1465     return ret;
1466 }
1467 
1468 enum ImgConvertBlockStatus {
1469     BLK_DATA,
1470     BLK_ZERO,
1471     BLK_BACKING_FILE,
1472 };
1473 
1474 #define MAX_COROUTINES 16
1475 
1476 typedef struct ImgConvertState {
1477     BlockBackend **src;
1478     int64_t *src_sectors;
1479     int src_num;
1480     int64_t total_sectors;
1481     int64_t allocated_sectors;
1482     int64_t allocated_done;
1483     int64_t sector_num;
1484     int64_t wr_offs;
1485     enum ImgConvertBlockStatus status;
1486     int64_t sector_next_status;
1487     BlockBackend *target;
1488     bool has_zero_init;
1489     bool compressed;
1490     bool target_has_backing;
1491     bool wr_in_order;
1492     int min_sparse;
1493     size_t cluster_sectors;
1494     size_t buf_sectors;
1495     int num_coroutines;
1496     int running_coroutines;
1497     Coroutine *co[MAX_COROUTINES];
1498     int64_t wait_sector_num[MAX_COROUTINES];
1499     CoMutex lock;
1500     int ret;
1501 } ImgConvertState;
1502 
1503 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1504                                 int *src_cur, int64_t *src_cur_offset)
1505 {
1506     *src_cur = 0;
1507     *src_cur_offset = 0;
1508     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1509         *src_cur_offset += s->src_sectors[*src_cur];
1510         (*src_cur)++;
1511         assert(*src_cur < s->src_num);
1512     }
1513 }
1514 
1515 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1516 {
1517     int64_t ret, src_cur_offset;
1518     int n, src_cur;
1519 
1520     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1521 
1522     assert(s->total_sectors > sector_num);
1523     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1524 
1525     if (s->sector_next_status <= sector_num) {
1526         BlockDriverState *file;
1527         ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
1528                                     sector_num - src_cur_offset,
1529                                     n, &n, &file);
1530         if (ret < 0) {
1531             return ret;
1532         }
1533 
1534         if (ret & BDRV_BLOCK_ZERO) {
1535             s->status = BLK_ZERO;
1536         } else if (ret & BDRV_BLOCK_DATA) {
1537             s->status = BLK_DATA;
1538         } else if (!s->target_has_backing) {
1539             /* Without a target backing file we must copy over the contents of
1540              * the backing file as well. */
1541             /* Check block status of the backing file chain to avoid
1542              * needlessly reading zeroes and limiting the iteration to the
1543              * buffer size */
1544             ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
1545                                               sector_num - src_cur_offset,
1546                                               n, &n, &file);
1547             if (ret < 0) {
1548                 return ret;
1549             }
1550 
1551             if (ret & BDRV_BLOCK_ZERO) {
1552                 s->status = BLK_ZERO;
1553             } else {
1554                 s->status = BLK_DATA;
1555             }
1556         } else {
1557             s->status = BLK_BACKING_FILE;
1558         }
1559 
1560         s->sector_next_status = sector_num + n;
1561     }
1562 
1563     n = MIN(n, s->sector_next_status - sector_num);
1564     if (s->status == BLK_DATA) {
1565         n = MIN(n, s->buf_sectors);
1566     }
1567 
1568     /* We need to write complete clusters for compressed images, so if an
1569      * unallocated area is shorter than that, we must consider the whole
1570      * cluster allocated. */
1571     if (s->compressed) {
1572         if (n < s->cluster_sectors) {
1573             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1574             s->status = BLK_DATA;
1575         } else {
1576             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1577         }
1578     }
1579 
1580     return n;
1581 }
1582 
1583 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1584                                         int nb_sectors, uint8_t *buf)
1585 {
1586     int n, ret;
1587     QEMUIOVector qiov;
1588     struct iovec iov;
1589 
1590     assert(nb_sectors <= s->buf_sectors);
1591     while (nb_sectors > 0) {
1592         BlockBackend *blk;
1593         int src_cur;
1594         int64_t bs_sectors, src_cur_offset;
1595 
1596         /* In the case of compression with multiple source files, we can get a
1597          * nb_sectors that spreads into the next part. So we must be able to
1598          * read across multiple BDSes for one convert_read() call. */
1599         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1600         blk = s->src[src_cur];
1601         bs_sectors = s->src_sectors[src_cur];
1602 
1603         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1604         iov.iov_base = buf;
1605         iov.iov_len = n << BDRV_SECTOR_BITS;
1606         qemu_iovec_init_external(&qiov, &iov, 1);
1607 
1608         ret = blk_co_preadv(
1609                 blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
1610                 n << BDRV_SECTOR_BITS, &qiov, 0);
1611         if (ret < 0) {
1612             return ret;
1613         }
1614 
1615         sector_num += n;
1616         nb_sectors -= n;
1617         buf += n * BDRV_SECTOR_SIZE;
1618     }
1619 
1620     return 0;
1621 }
1622 
1623 
1624 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1625                                          int nb_sectors, uint8_t *buf,
1626                                          enum ImgConvertBlockStatus status)
1627 {
1628     int ret;
1629     QEMUIOVector qiov;
1630     struct iovec iov;
1631 
1632     while (nb_sectors > 0) {
1633         int n = nb_sectors;
1634         switch (status) {
1635         case BLK_BACKING_FILE:
1636             /* If we have a backing file, leave clusters unallocated that are
1637              * unallocated in the source image, so that the backing file is
1638              * visible at the respective offset. */
1639             assert(s->target_has_backing);
1640             break;
1641 
1642         case BLK_DATA:
1643             /* We must always write compressed clusters as a whole, so don't
1644              * try to find zeroed parts in the buffer. We can only save the
1645              * write if the buffer is completely zeroed and we're allowed to
1646              * keep the target sparse. */
1647             if (s->compressed) {
1648                 if (s->has_zero_init && s->min_sparse &&
1649                     buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
1650                 {
1651                     assert(!s->target_has_backing);
1652                     break;
1653                 }
1654 
1655                 iov.iov_base = buf;
1656                 iov.iov_len = n << BDRV_SECTOR_BITS;
1657                 qemu_iovec_init_external(&qiov, &iov, 1);
1658 
1659                 ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
1660                                      n << BDRV_SECTOR_BITS, &qiov,
1661                                      BDRV_REQ_WRITE_COMPRESSED);
1662                 if (ret < 0) {
1663                     return ret;
1664                 }
1665                 break;
1666             }
1667 
1668             /* If there is real non-zero data or we're told to keep the target
1669              * fully allocated (-S 0), we must write it. Otherwise we can treat
1670              * it as zero sectors. */
1671             if (!s->min_sparse ||
1672                 is_allocated_sectors_min(buf, n, &n, s->min_sparse))
1673             {
1674                 iov.iov_base = buf;
1675                 iov.iov_len = n << BDRV_SECTOR_BITS;
1676                 qemu_iovec_init_external(&qiov, &iov, 1);
1677 
1678                 ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
1679                                      n << BDRV_SECTOR_BITS, &qiov, 0);
1680                 if (ret < 0) {
1681                     return ret;
1682                 }
1683                 break;
1684             }
1685             /* fall-through */
1686 
1687         case BLK_ZERO:
1688             if (s->has_zero_init) {
1689                 break;
1690             }
1691             ret = blk_co_pwrite_zeroes(s->target,
1692                                        sector_num << BDRV_SECTOR_BITS,
1693                                        n << BDRV_SECTOR_BITS, 0);
1694             if (ret < 0) {
1695                 return ret;
1696             }
1697             break;
1698         }
1699 
1700         sector_num += n;
1701         nb_sectors -= n;
1702         buf += n * BDRV_SECTOR_SIZE;
1703     }
1704 
1705     return 0;
1706 }
1707 
1708 static void coroutine_fn convert_co_do_copy(void *opaque)
1709 {
1710     ImgConvertState *s = opaque;
1711     uint8_t *buf = NULL;
1712     int ret, i;
1713     int index = -1;
1714 
1715     for (i = 0; i < s->num_coroutines; i++) {
1716         if (s->co[i] == qemu_coroutine_self()) {
1717             index = i;
1718             break;
1719         }
1720     }
1721     assert(index >= 0);
1722 
1723     s->running_coroutines++;
1724     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1725 
1726     while (1) {
1727         int n;
1728         int64_t sector_num;
1729         enum ImgConvertBlockStatus status;
1730 
1731         qemu_co_mutex_lock(&s->lock);
1732         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1733             qemu_co_mutex_unlock(&s->lock);
1734             goto out;
1735         }
1736         n = convert_iteration_sectors(s, s->sector_num);
1737         if (n < 0) {
1738             qemu_co_mutex_unlock(&s->lock);
1739             s->ret = n;
1740             goto out;
1741         }
1742         /* save current sector and allocation status to local variables */
1743         sector_num = s->sector_num;
1744         status = s->status;
1745         if (!s->min_sparse && s->status == BLK_ZERO) {
1746             n = MIN(n, s->buf_sectors);
1747         }
1748         /* increment global sector counter so that other coroutines can
1749          * already continue reading beyond this request */
1750         s->sector_num += n;
1751         qemu_co_mutex_unlock(&s->lock);
1752 
1753         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1754             s->allocated_done += n;
1755             qemu_progress_print(100.0 * s->allocated_done /
1756                                         s->allocated_sectors, 0);
1757         }
1758 
1759         if (status == BLK_DATA) {
1760             ret = convert_co_read(s, sector_num, n, buf);
1761             if (ret < 0) {
1762                 error_report("error while reading sector %" PRId64
1763                              ": %s", sector_num, strerror(-ret));
1764                 s->ret = ret;
1765                 goto out;
1766             }
1767         } else if (!s->min_sparse && status == BLK_ZERO) {
1768             status = BLK_DATA;
1769             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1770         }
1771 
1772         if (s->wr_in_order) {
1773             /* keep writes in order */
1774             while (s->wr_offs != sector_num) {
1775                 if (s->ret != -EINPROGRESS) {
1776                     goto out;
1777                 }
1778                 s->wait_sector_num[index] = sector_num;
1779                 qemu_coroutine_yield();
1780             }
1781             s->wait_sector_num[index] = -1;
1782         }
1783 
1784         ret = convert_co_write(s, sector_num, n, buf, status);
1785         if (ret < 0) {
1786             error_report("error while writing sector %" PRId64
1787                          ": %s", sector_num, strerror(-ret));
1788             s->ret = ret;
1789             goto out;
1790         }
1791 
1792         if (s->wr_in_order) {
1793             /* reenter the coroutine that might have waited
1794              * for this write to complete */
1795             s->wr_offs = sector_num + n;
1796             for (i = 0; i < s->num_coroutines; i++) {
1797                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1798                     /*
1799                      * A -> B -> A cannot occur because A has
1800                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1801                      * B will never enter A during this time window.
1802                      */
1803                     qemu_coroutine_enter(s->co[i]);
1804                     break;
1805                 }
1806             }
1807         }
1808     }
1809 
1810 out:
1811     qemu_vfree(buf);
1812     s->co[index] = NULL;
1813     s->running_coroutines--;
1814     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1815         /* the convert job finished successfully */
1816         s->ret = 0;
1817     }
1818 }
1819 
1820 static int convert_do_copy(ImgConvertState *s)
1821 {
1822     int ret, i, n;
1823     int64_t sector_num = 0;
1824 
1825     /* Check whether we have zero initialisation or can get it efficiently */
1826     s->has_zero_init = s->min_sparse && !s->target_has_backing
1827                      ? bdrv_has_zero_init(blk_bs(s->target))
1828                      : false;
1829 
1830     if (!s->has_zero_init && !s->target_has_backing &&
1831         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1832     {
1833         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
1834         if (ret == 0) {
1835             s->has_zero_init = true;
1836         }
1837     }
1838 
1839     /* Allocate buffer for copied data. For compressed images, only one cluster
1840      * can be copied at a time. */
1841     if (s->compressed) {
1842         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
1843             error_report("invalid cluster size");
1844             return -EINVAL;
1845         }
1846         s->buf_sectors = s->cluster_sectors;
1847     }
1848 
1849     while (sector_num < s->total_sectors) {
1850         n = convert_iteration_sectors(s, sector_num);
1851         if (n < 0) {
1852             return n;
1853         }
1854         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1855         {
1856             s->allocated_sectors += n;
1857         }
1858         sector_num += n;
1859     }
1860 
1861     /* Do the copy */
1862     s->sector_next_status = 0;
1863     s->ret = -EINPROGRESS;
1864 
1865     qemu_co_mutex_init(&s->lock);
1866     for (i = 0; i < s->num_coroutines; i++) {
1867         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
1868         s->wait_sector_num[i] = -1;
1869         qemu_coroutine_enter(s->co[i]);
1870     }
1871 
1872     while (s->ret == -EINPROGRESS) {
1873         main_loop_wait(false);
1874     }
1875 
1876     if (s->compressed && !s->ret) {
1877         /* signal EOF to align */
1878         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
1879         if (ret < 0) {
1880             return ret;
1881         }
1882     }
1883 
1884     return s->ret;
1885 }
1886 
1887 static int img_convert(int argc, char **argv)
1888 {
1889     int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
1890     int64_t ret = 0;
1891     int progress = 0, flags, src_flags;
1892     bool writethrough, src_writethrough;
1893     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
1894     BlockDriver *drv, *proto_drv;
1895     BlockBackend **blk = NULL, *out_blk = NULL;
1896     BlockDriverState **bs = NULL, *out_bs = NULL;
1897     int64_t total_sectors;
1898     int64_t *bs_sectors = NULL;
1899     size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
1900     BlockDriverInfo bdi;
1901     QemuOpts *opts = NULL;
1902     QemuOptsList *create_opts = NULL;
1903     const char *out_baseimg_param;
1904     char *options = NULL;
1905     const char *snapshot_name = NULL;
1906     int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
1907     bool quiet = false;
1908     Error *local_err = NULL;
1909     QemuOpts *sn_opts = NULL;
1910     ImgConvertState state;
1911     bool image_opts = false;
1912     bool wr_in_order = true;
1913     long num_coroutines = 8;
1914 
1915     fmt = NULL;
1916     out_fmt = "raw";
1917     cache = "unsafe";
1918     src_cache = BDRV_DEFAULT_CACHE;
1919     out_baseimg = NULL;
1920     compress = 0;
1921     skip_create = 0;
1922     for(;;) {
1923         static const struct option long_options[] = {
1924             {"help", no_argument, 0, 'h'},
1925             {"object", required_argument, 0, OPTION_OBJECT},
1926             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1927             {0, 0, 0, 0}
1928         };
1929         c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
1930                         long_options, NULL);
1931         if (c == -1) {
1932             break;
1933         }
1934         switch(c) {
1935         case '?':
1936         case 'h':
1937             help();
1938             break;
1939         case 'f':
1940             fmt = optarg;
1941             break;
1942         case 'O':
1943             out_fmt = optarg;
1944             break;
1945         case 'B':
1946             out_baseimg = optarg;
1947             break;
1948         case 'c':
1949             compress = 1;
1950             break;
1951         case 'e':
1952             error_report("option -e is deprecated, please use \'-o "
1953                   "encryption\' instead!");
1954             ret = -1;
1955             goto fail_getopt;
1956         case '6':
1957             error_report("option -6 is deprecated, please use \'-o "
1958                   "compat6\' instead!");
1959             ret = -1;
1960             goto fail_getopt;
1961         case 'o':
1962             if (!is_valid_option_list(optarg)) {
1963                 error_report("Invalid option list: %s", optarg);
1964                 ret = -1;
1965                 goto fail_getopt;
1966             }
1967             if (!options) {
1968                 options = g_strdup(optarg);
1969             } else {
1970                 char *old_options = options;
1971                 options = g_strdup_printf("%s,%s", options, optarg);
1972                 g_free(old_options);
1973             }
1974             break;
1975         case 's':
1976             snapshot_name = optarg;
1977             break;
1978         case 'l':
1979             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
1980                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
1981                                                   optarg, false);
1982                 if (!sn_opts) {
1983                     error_report("Failed in parsing snapshot param '%s'",
1984                                  optarg);
1985                     ret = -1;
1986                     goto fail_getopt;
1987                 }
1988             } else {
1989                 snapshot_name = optarg;
1990             }
1991             break;
1992         case 'S':
1993         {
1994             int64_t sval;
1995 
1996             sval = cvtnum(optarg);
1997             if (sval < 0) {
1998                 error_report("Invalid minimum zero buffer size for sparse output specified");
1999                 ret = -1;
2000                 goto fail_getopt;
2001             }
2002 
2003             min_sparse = sval / BDRV_SECTOR_SIZE;
2004             break;
2005         }
2006         case 'p':
2007             progress = 1;
2008             break;
2009         case 't':
2010             cache = optarg;
2011             break;
2012         case 'T':
2013             src_cache = optarg;
2014             break;
2015         case 'q':
2016             quiet = true;
2017             break;
2018         case 'n':
2019             skip_create = 1;
2020             break;
2021         case 'm':
2022             if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
2023                 num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
2024                 error_report("Invalid number of coroutines. Allowed number of"
2025                              " coroutines is between 1 and %d", MAX_COROUTINES);
2026                 ret = -1;
2027                 goto fail_getopt;
2028             }
2029             break;
2030         case 'W':
2031             wr_in_order = false;
2032             break;
2033         case OPTION_OBJECT:
2034             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2035                                            optarg, true);
2036             if (!opts) {
2037                 goto fail_getopt;
2038             }
2039             break;
2040         case OPTION_IMAGE_OPTS:
2041             image_opts = true;
2042             break;
2043         }
2044     }
2045 
2046     if (qemu_opts_foreach(&qemu_object_opts,
2047                           user_creatable_add_opts_foreach,
2048                           NULL, NULL)) {
2049         goto fail_getopt;
2050     }
2051 
2052     if (!wr_in_order && compress) {
2053         error_report("Out of order write and compress are mutually exclusive");
2054         ret = -1;
2055         goto fail_getopt;
2056     }
2057 
2058     /* Initialize before goto out */
2059     if (quiet) {
2060         progress = 0;
2061     }
2062     qemu_progress_init(progress, 1.0);
2063 
2064     bs_n = argc - optind - 1;
2065     out_filename = bs_n >= 1 ? argv[argc - 1] : NULL;
2066 
2067     if (options && has_help_option(options)) {
2068         ret = print_block_option_help(out_filename, out_fmt);
2069         goto out;
2070     }
2071 
2072     if (bs_n < 1) {
2073         error_exit("Must specify image file name");
2074     }
2075 
2076 
2077     if (bs_n > 1 && out_baseimg) {
2078         error_report("-B makes no sense when concatenating multiple input "
2079                      "images");
2080         ret = -1;
2081         goto out;
2082     }
2083 
2084     src_flags = 0;
2085     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2086     if (ret < 0) {
2087         error_report("Invalid source cache option: %s", src_cache);
2088         goto out;
2089     }
2090 
2091     qemu_progress_print(0, 100);
2092 
2093     blk = g_new0(BlockBackend *, bs_n);
2094     bs = g_new0(BlockDriverState *, bs_n);
2095     bs_sectors = g_new(int64_t, bs_n);
2096 
2097     total_sectors = 0;
2098     for (bs_i = 0; bs_i < bs_n; bs_i++) {
2099         blk[bs_i] = img_open(image_opts, argv[optind + bs_i],
2100                              fmt, src_flags, src_writethrough, quiet);
2101         if (!blk[bs_i]) {
2102             ret = -1;
2103             goto out;
2104         }
2105         bs[bs_i] = blk_bs(blk[bs_i]);
2106         bs_sectors[bs_i] = blk_nb_sectors(blk[bs_i]);
2107         if (bs_sectors[bs_i] < 0) {
2108             error_report("Could not get size of %s: %s",
2109                          argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
2110             ret = -1;
2111             goto out;
2112         }
2113         total_sectors += bs_sectors[bs_i];
2114     }
2115 
2116     if (sn_opts) {
2117         bdrv_snapshot_load_tmp(bs[0],
2118                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2119                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2120                                &local_err);
2121     } else if (snapshot_name != NULL) {
2122         if (bs_n > 1) {
2123             error_report("No support for concatenating multiple snapshot");
2124             ret = -1;
2125             goto out;
2126         }
2127 
2128         bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, &local_err);
2129     }
2130     if (local_err) {
2131         error_reportf_err(local_err, "Failed to load snapshot: ");
2132         ret = -1;
2133         goto out;
2134     }
2135 
2136     /* Find driver and parse its options */
2137     drv = bdrv_find_format(out_fmt);
2138     if (!drv) {
2139         error_report("Unknown file format '%s'", out_fmt);
2140         ret = -1;
2141         goto out;
2142     }
2143 
2144     proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2145     if (!proto_drv) {
2146         error_report_err(local_err);
2147         ret = -1;
2148         goto out;
2149     }
2150 
2151     if (!skip_create) {
2152         if (!drv->create_opts) {
2153             error_report("Format driver '%s' does not support image creation",
2154                          drv->format_name);
2155             ret = -1;
2156             goto out;
2157         }
2158 
2159         if (!proto_drv->create_opts) {
2160             error_report("Protocol driver '%s' does not support image creation",
2161                          proto_drv->format_name);
2162             ret = -1;
2163             goto out;
2164         }
2165 
2166         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2167         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2168 
2169         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2170         if (options) {
2171             qemu_opts_do_parse(opts, options, NULL, &local_err);
2172             if (local_err) {
2173                 error_report_err(local_err);
2174                 ret = -1;
2175                 goto out;
2176             }
2177         }
2178 
2179         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512,
2180                             &error_abort);
2181         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2182         if (ret < 0) {
2183             goto out;
2184         }
2185     }
2186 
2187     /* Get backing file name if -o backing_file was used */
2188     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2189     if (out_baseimg_param) {
2190         out_baseimg = out_baseimg_param;
2191     }
2192 
2193     /* Check if compression is supported */
2194     if (compress) {
2195         bool encryption =
2196             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2197         const char *preallocation =
2198             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2199 
2200         if (!drv->bdrv_co_pwritev_compressed) {
2201             error_report("Compression not supported for this file format");
2202             ret = -1;
2203             goto out;
2204         }
2205 
2206         if (encryption) {
2207             error_report("Compression and encryption not supported at "
2208                          "the same time");
2209             ret = -1;
2210             goto out;
2211         }
2212 
2213         if (preallocation
2214             && strcmp(preallocation, "off"))
2215         {
2216             error_report("Compression and preallocation not supported at "
2217                          "the same time");
2218             ret = -1;
2219             goto out;
2220         }
2221     }
2222 
2223     if (!skip_create) {
2224         /* Create the new image */
2225         ret = bdrv_create(drv, out_filename, opts, &local_err);
2226         if (ret < 0) {
2227             error_reportf_err(local_err, "%s: error while converting %s: ",
2228                               out_filename, out_fmt);
2229             goto out;
2230         }
2231     }
2232 
2233     flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2234     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2235     if (ret < 0) {
2236         error_report("Invalid cache option: %s", cache);
2237         goto out;
2238     }
2239 
2240     /* XXX we should allow --image-opts to trigger use of
2241      * img_open() here, but then we have trouble with
2242      * the bdrv_create() call which takes different params.
2243      * Not critical right now, so fix can wait...
2244      */
2245     out_blk = img_open_file(out_filename, out_fmt, flags, writethrough, quiet);
2246     if (!out_blk) {
2247         ret = -1;
2248         goto out;
2249     }
2250     out_bs = blk_bs(out_blk);
2251 
2252     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2253      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
2254      * as maximum. */
2255     bufsectors = MIN(32768,
2256                      MAX(bufsectors,
2257                          MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2258                              out_bs->bl.pdiscard_alignment >>
2259                              BDRV_SECTOR_BITS)));
2260 
2261     if (skip_create) {
2262         int64_t output_sectors = blk_nb_sectors(out_blk);
2263         if (output_sectors < 0) {
2264             error_report("unable to get output image length: %s",
2265                          strerror(-output_sectors));
2266             ret = -1;
2267             goto out;
2268         } else if (output_sectors < total_sectors) {
2269             error_report("output file is smaller than input file");
2270             ret = -1;
2271             goto out;
2272         }
2273     }
2274 
2275     cluster_sectors = 0;
2276     ret = bdrv_get_info(out_bs, &bdi);
2277     if (ret < 0) {
2278         if (compress) {
2279             error_report("could not get block driver info");
2280             goto out;
2281         }
2282     } else {
2283         compress = compress || bdi.needs_compressed_writes;
2284         cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2285     }
2286 
2287     state = (ImgConvertState) {
2288         .src                = blk,
2289         .src_sectors        = bs_sectors,
2290         .src_num            = bs_n,
2291         .total_sectors      = total_sectors,
2292         .target             = out_blk,
2293         .compressed         = compress,
2294         .target_has_backing = (bool) out_baseimg,
2295         .min_sparse         = min_sparse,
2296         .cluster_sectors    = cluster_sectors,
2297         .buf_sectors        = bufsectors,
2298         .wr_in_order        = wr_in_order,
2299         .num_coroutines     = num_coroutines,
2300     };
2301     ret = convert_do_copy(&state);
2302 
2303 out:
2304     if (!ret) {
2305         qemu_progress_print(100, 0);
2306     }
2307     qemu_progress_end();
2308     qemu_opts_del(opts);
2309     qemu_opts_free(create_opts);
2310     qemu_opts_del(sn_opts);
2311     blk_unref(out_blk);
2312     g_free(bs);
2313     if (blk) {
2314         for (bs_i = 0; bs_i < bs_n; bs_i++) {
2315             blk_unref(blk[bs_i]);
2316         }
2317         g_free(blk);
2318     }
2319     g_free(bs_sectors);
2320 fail_getopt:
2321     g_free(options);
2322 
2323     if (ret) {
2324         return 1;
2325     }
2326     return 0;
2327 }
2328 
2329 
2330 static void dump_snapshots(BlockDriverState *bs)
2331 {
2332     QEMUSnapshotInfo *sn_tab, *sn;
2333     int nb_sns, i;
2334 
2335     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2336     if (nb_sns <= 0)
2337         return;
2338     printf("Snapshot list:\n");
2339     bdrv_snapshot_dump(fprintf, stdout, NULL);
2340     printf("\n");
2341     for(i = 0; i < nb_sns; i++) {
2342         sn = &sn_tab[i];
2343         bdrv_snapshot_dump(fprintf, stdout, sn);
2344         printf("\n");
2345     }
2346     g_free(sn_tab);
2347 }
2348 
2349 static void dump_json_image_info_list(ImageInfoList *list)
2350 {
2351     QString *str;
2352     QObject *obj;
2353     Visitor *v = qobject_output_visitor_new(&obj);
2354 
2355     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2356     visit_complete(v, &obj);
2357     str = qobject_to_json_pretty(obj);
2358     assert(str != NULL);
2359     printf("%s\n", qstring_get_str(str));
2360     qobject_decref(obj);
2361     visit_free(v);
2362     QDECREF(str);
2363 }
2364 
2365 static void dump_json_image_info(ImageInfo *info)
2366 {
2367     QString *str;
2368     QObject *obj;
2369     Visitor *v = qobject_output_visitor_new(&obj);
2370 
2371     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2372     visit_complete(v, &obj);
2373     str = qobject_to_json_pretty(obj);
2374     assert(str != NULL);
2375     printf("%s\n", qstring_get_str(str));
2376     qobject_decref(obj);
2377     visit_free(v);
2378     QDECREF(str);
2379 }
2380 
2381 static void dump_human_image_info_list(ImageInfoList *list)
2382 {
2383     ImageInfoList *elem;
2384     bool delim = false;
2385 
2386     for (elem = list; elem; elem = elem->next) {
2387         if (delim) {
2388             printf("\n");
2389         }
2390         delim = true;
2391 
2392         bdrv_image_info_dump(fprintf, stdout, elem->value);
2393     }
2394 }
2395 
2396 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2397 {
2398     return strcmp(a, b) == 0;
2399 }
2400 
2401 /**
2402  * Open an image file chain and return an ImageInfoList
2403  *
2404  * @filename: topmost image filename
2405  * @fmt: topmost image format (may be NULL to autodetect)
2406  * @chain: true  - enumerate entire backing file chain
2407  *         false - only topmost image file
2408  *
2409  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2410  * image file.  If there was an error a message will have been printed to
2411  * stderr.
2412  */
2413 static ImageInfoList *collect_image_info_list(bool image_opts,
2414                                               const char *filename,
2415                                               const char *fmt,
2416                                               bool chain)
2417 {
2418     ImageInfoList *head = NULL;
2419     ImageInfoList **last = &head;
2420     GHashTable *filenames;
2421     Error *err = NULL;
2422 
2423     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2424 
2425     while (filename) {
2426         BlockBackend *blk;
2427         BlockDriverState *bs;
2428         ImageInfo *info;
2429         ImageInfoList *elem;
2430 
2431         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2432             error_report("Backing file '%s' creates an infinite loop.",
2433                          filename);
2434             goto err;
2435         }
2436         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2437 
2438         blk = img_open(image_opts, filename, fmt,
2439                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false);
2440         if (!blk) {
2441             goto err;
2442         }
2443         bs = blk_bs(blk);
2444 
2445         bdrv_query_image_info(bs, &info, &err);
2446         if (err) {
2447             error_report_err(err);
2448             blk_unref(blk);
2449             goto err;
2450         }
2451 
2452         elem = g_new0(ImageInfoList, 1);
2453         elem->value = info;
2454         *last = elem;
2455         last = &elem->next;
2456 
2457         blk_unref(blk);
2458 
2459         filename = fmt = NULL;
2460         if (chain) {
2461             if (info->has_full_backing_filename) {
2462                 filename = info->full_backing_filename;
2463             } else if (info->has_backing_filename) {
2464                 error_report("Could not determine absolute backing filename,"
2465                              " but backing filename '%s' present",
2466                              info->backing_filename);
2467                 goto err;
2468             }
2469             if (info->has_backing_filename_format) {
2470                 fmt = info->backing_filename_format;
2471             }
2472         }
2473     }
2474     g_hash_table_destroy(filenames);
2475     return head;
2476 
2477 err:
2478     qapi_free_ImageInfoList(head);
2479     g_hash_table_destroy(filenames);
2480     return NULL;
2481 }
2482 
2483 static int img_info(int argc, char **argv)
2484 {
2485     int c;
2486     OutputFormat output_format = OFORMAT_HUMAN;
2487     bool chain = false;
2488     const char *filename, *fmt, *output;
2489     ImageInfoList *list;
2490     bool image_opts = false;
2491 
2492     fmt = NULL;
2493     output = NULL;
2494     for(;;) {
2495         int option_index = 0;
2496         static const struct option long_options[] = {
2497             {"help", no_argument, 0, 'h'},
2498             {"format", required_argument, 0, 'f'},
2499             {"output", required_argument, 0, OPTION_OUTPUT},
2500             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2501             {"object", required_argument, 0, OPTION_OBJECT},
2502             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2503             {0, 0, 0, 0}
2504         };
2505         c = getopt_long(argc, argv, "f:h",
2506                         long_options, &option_index);
2507         if (c == -1) {
2508             break;
2509         }
2510         switch(c) {
2511         case '?':
2512         case 'h':
2513             help();
2514             break;
2515         case 'f':
2516             fmt = optarg;
2517             break;
2518         case OPTION_OUTPUT:
2519             output = optarg;
2520             break;
2521         case OPTION_BACKING_CHAIN:
2522             chain = true;
2523             break;
2524         case OPTION_OBJECT: {
2525             QemuOpts *opts;
2526             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2527                                            optarg, true);
2528             if (!opts) {
2529                 return 1;
2530             }
2531         }   break;
2532         case OPTION_IMAGE_OPTS:
2533             image_opts = true;
2534             break;
2535         }
2536     }
2537     if (optind != argc - 1) {
2538         error_exit("Expecting one image file name");
2539     }
2540     filename = argv[optind++];
2541 
2542     if (output && !strcmp(output, "json")) {
2543         output_format = OFORMAT_JSON;
2544     } else if (output && !strcmp(output, "human")) {
2545         output_format = OFORMAT_HUMAN;
2546     } else if (output) {
2547         error_report("--output must be used with human or json as argument.");
2548         return 1;
2549     }
2550 
2551     if (qemu_opts_foreach(&qemu_object_opts,
2552                           user_creatable_add_opts_foreach,
2553                           NULL, NULL)) {
2554         return 1;
2555     }
2556 
2557     list = collect_image_info_list(image_opts, filename, fmt, chain);
2558     if (!list) {
2559         return 1;
2560     }
2561 
2562     switch (output_format) {
2563     case OFORMAT_HUMAN:
2564         dump_human_image_info_list(list);
2565         break;
2566     case OFORMAT_JSON:
2567         if (chain) {
2568             dump_json_image_info_list(list);
2569         } else {
2570             dump_json_image_info(list->value);
2571         }
2572         break;
2573     }
2574 
2575     qapi_free_ImageInfoList(list);
2576     return 0;
2577 }
2578 
2579 static void dump_map_entry(OutputFormat output_format, MapEntry *e,
2580                            MapEntry *next)
2581 {
2582     switch (output_format) {
2583     case OFORMAT_HUMAN:
2584         if (e->data && !e->has_offset) {
2585             error_report("File contains external, encrypted or compressed clusters.");
2586             exit(1);
2587         }
2588         if (e->data && !e->zero) {
2589             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2590                    e->start, e->length,
2591                    e->has_offset ? e->offset : 0,
2592                    e->has_filename ? e->filename : "");
2593         }
2594         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2595          * Modify the flags here to allow more coalescing.
2596          */
2597         if (next && (!next->data || next->zero)) {
2598             next->data = false;
2599             next->zero = true;
2600         }
2601         break;
2602     case OFORMAT_JSON:
2603         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2604                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2605                (e->start == 0 ? "[" : ",\n"),
2606                e->start, e->length, e->depth,
2607                e->zero ? "true" : "false",
2608                e->data ? "true" : "false");
2609         if (e->has_offset) {
2610             printf(", \"offset\": %"PRId64"", e->offset);
2611         }
2612         putchar('}');
2613 
2614         if (!next) {
2615             printf("]\n");
2616         }
2617         break;
2618     }
2619 }
2620 
2621 static int get_block_status(BlockDriverState *bs, int64_t sector_num,
2622                             int nb_sectors, MapEntry *e)
2623 {
2624     int64_t ret;
2625     int depth;
2626     BlockDriverState *file;
2627     bool has_offset;
2628 
2629     /* As an optimization, we could cache the current range of unallocated
2630      * clusters in each file of the chain, and avoid querying the same
2631      * range repeatedly.
2632      */
2633 
2634     depth = 0;
2635     for (;;) {
2636         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
2637                                     &file);
2638         if (ret < 0) {
2639             return ret;
2640         }
2641         assert(nb_sectors);
2642         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2643             break;
2644         }
2645         bs = backing_bs(bs);
2646         if (bs == NULL) {
2647             ret = 0;
2648             break;
2649         }
2650 
2651         depth++;
2652     }
2653 
2654     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2655 
2656     *e = (MapEntry) {
2657         .start = sector_num * BDRV_SECTOR_SIZE,
2658         .length = nb_sectors * BDRV_SECTOR_SIZE,
2659         .data = !!(ret & BDRV_BLOCK_DATA),
2660         .zero = !!(ret & BDRV_BLOCK_ZERO),
2661         .offset = ret & BDRV_BLOCK_OFFSET_MASK,
2662         .has_offset = has_offset,
2663         .depth = depth,
2664         .has_filename = file && has_offset,
2665         .filename = file && has_offset ? file->filename : NULL,
2666     };
2667 
2668     return 0;
2669 }
2670 
2671 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2672 {
2673     if (curr->length == 0) {
2674         return false;
2675     }
2676     if (curr->zero != next->zero ||
2677         curr->data != next->data ||
2678         curr->depth != next->depth ||
2679         curr->has_filename != next->has_filename ||
2680         curr->has_offset != next->has_offset) {
2681         return false;
2682     }
2683     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2684         return false;
2685     }
2686     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2687         return false;
2688     }
2689     return true;
2690 }
2691 
2692 static int img_map(int argc, char **argv)
2693 {
2694     int c;
2695     OutputFormat output_format = OFORMAT_HUMAN;
2696     BlockBackend *blk;
2697     BlockDriverState *bs;
2698     const char *filename, *fmt, *output;
2699     int64_t length;
2700     MapEntry curr = { .length = 0 }, next;
2701     int ret = 0;
2702     bool image_opts = false;
2703 
2704     fmt = NULL;
2705     output = NULL;
2706     for (;;) {
2707         int option_index = 0;
2708         static const struct option long_options[] = {
2709             {"help", no_argument, 0, 'h'},
2710             {"format", required_argument, 0, 'f'},
2711             {"output", required_argument, 0, OPTION_OUTPUT},
2712             {"object", required_argument, 0, OPTION_OBJECT},
2713             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2714             {0, 0, 0, 0}
2715         };
2716         c = getopt_long(argc, argv, "f:h",
2717                         long_options, &option_index);
2718         if (c == -1) {
2719             break;
2720         }
2721         switch (c) {
2722         case '?':
2723         case 'h':
2724             help();
2725             break;
2726         case 'f':
2727             fmt = optarg;
2728             break;
2729         case OPTION_OUTPUT:
2730             output = optarg;
2731             break;
2732         case OPTION_OBJECT: {
2733             QemuOpts *opts;
2734             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2735                                            optarg, true);
2736             if (!opts) {
2737                 return 1;
2738             }
2739         }   break;
2740         case OPTION_IMAGE_OPTS:
2741             image_opts = true;
2742             break;
2743         }
2744     }
2745     if (optind != argc - 1) {
2746         error_exit("Expecting one image file name");
2747     }
2748     filename = argv[optind];
2749 
2750     if (output && !strcmp(output, "json")) {
2751         output_format = OFORMAT_JSON;
2752     } else if (output && !strcmp(output, "human")) {
2753         output_format = OFORMAT_HUMAN;
2754     } else if (output) {
2755         error_report("--output must be used with human or json as argument.");
2756         return 1;
2757     }
2758 
2759     if (qemu_opts_foreach(&qemu_object_opts,
2760                           user_creatable_add_opts_foreach,
2761                           NULL, NULL)) {
2762         return 1;
2763     }
2764 
2765     blk = img_open(image_opts, filename, fmt, 0, false, false);
2766     if (!blk) {
2767         return 1;
2768     }
2769     bs = blk_bs(blk);
2770 
2771     if (output_format == OFORMAT_HUMAN) {
2772         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
2773     }
2774 
2775     length = blk_getlength(blk);
2776     while (curr.start + curr.length < length) {
2777         int64_t nsectors_left;
2778         int64_t sector_num;
2779         int n;
2780 
2781         sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
2782 
2783         /* Probe up to 1 GiB at a time.  */
2784         nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
2785         n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
2786         ret = get_block_status(bs, sector_num, n, &next);
2787 
2788         if (ret < 0) {
2789             error_report("Could not read file metadata: %s", strerror(-ret));
2790             goto out;
2791         }
2792 
2793         if (entry_mergeable(&curr, &next)) {
2794             curr.length += next.length;
2795             continue;
2796         }
2797 
2798         if (curr.length > 0) {
2799             dump_map_entry(output_format, &curr, &next);
2800         }
2801         curr = next;
2802     }
2803 
2804     dump_map_entry(output_format, &curr, NULL);
2805 
2806 out:
2807     blk_unref(blk);
2808     return ret < 0;
2809 }
2810 
2811 #define SNAPSHOT_LIST   1
2812 #define SNAPSHOT_CREATE 2
2813 #define SNAPSHOT_APPLY  3
2814 #define SNAPSHOT_DELETE 4
2815 
2816 static int img_snapshot(int argc, char **argv)
2817 {
2818     BlockBackend *blk;
2819     BlockDriverState *bs;
2820     QEMUSnapshotInfo sn;
2821     char *filename, *snapshot_name = NULL;
2822     int c, ret = 0, bdrv_oflags;
2823     int action = 0;
2824     qemu_timeval tv;
2825     bool quiet = false;
2826     Error *err = NULL;
2827     bool image_opts = false;
2828 
2829     bdrv_oflags = BDRV_O_RDWR;
2830     /* Parse commandline parameters */
2831     for(;;) {
2832         static const struct option long_options[] = {
2833             {"help", no_argument, 0, 'h'},
2834             {"object", required_argument, 0, OPTION_OBJECT},
2835             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2836             {0, 0, 0, 0}
2837         };
2838         c = getopt_long(argc, argv, "la:c:d:hq",
2839                         long_options, NULL);
2840         if (c == -1) {
2841             break;
2842         }
2843         switch(c) {
2844         case '?':
2845         case 'h':
2846             help();
2847             return 0;
2848         case 'l':
2849             if (action) {
2850                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2851                 return 0;
2852             }
2853             action = SNAPSHOT_LIST;
2854             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
2855             break;
2856         case 'a':
2857             if (action) {
2858                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2859                 return 0;
2860             }
2861             action = SNAPSHOT_APPLY;
2862             snapshot_name = optarg;
2863             break;
2864         case 'c':
2865             if (action) {
2866                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2867                 return 0;
2868             }
2869             action = SNAPSHOT_CREATE;
2870             snapshot_name = optarg;
2871             break;
2872         case 'd':
2873             if (action) {
2874                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2875                 return 0;
2876             }
2877             action = SNAPSHOT_DELETE;
2878             snapshot_name = optarg;
2879             break;
2880         case 'q':
2881             quiet = true;
2882             break;
2883         case OPTION_OBJECT: {
2884             QemuOpts *opts;
2885             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2886                                            optarg, true);
2887             if (!opts) {
2888                 return 1;
2889             }
2890         }   break;
2891         case OPTION_IMAGE_OPTS:
2892             image_opts = true;
2893             break;
2894         }
2895     }
2896 
2897     if (optind != argc - 1) {
2898         error_exit("Expecting one image file name");
2899     }
2900     filename = argv[optind++];
2901 
2902     if (qemu_opts_foreach(&qemu_object_opts,
2903                           user_creatable_add_opts_foreach,
2904                           NULL, NULL)) {
2905         return 1;
2906     }
2907 
2908     /* Open the image */
2909     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet);
2910     if (!blk) {
2911         return 1;
2912     }
2913     bs = blk_bs(blk);
2914 
2915     /* Perform the requested action */
2916     switch(action) {
2917     case SNAPSHOT_LIST:
2918         dump_snapshots(bs);
2919         break;
2920 
2921     case SNAPSHOT_CREATE:
2922         memset(&sn, 0, sizeof(sn));
2923         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
2924 
2925         qemu_gettimeofday(&tv);
2926         sn.date_sec = tv.tv_sec;
2927         sn.date_nsec = tv.tv_usec * 1000;
2928 
2929         ret = bdrv_snapshot_create(bs, &sn);
2930         if (ret) {
2931             error_report("Could not create snapshot '%s': %d (%s)",
2932                 snapshot_name, ret, strerror(-ret));
2933         }
2934         break;
2935 
2936     case SNAPSHOT_APPLY:
2937         ret = bdrv_snapshot_goto(bs, snapshot_name);
2938         if (ret) {
2939             error_report("Could not apply snapshot '%s': %d (%s)",
2940                 snapshot_name, ret, strerror(-ret));
2941         }
2942         break;
2943 
2944     case SNAPSHOT_DELETE:
2945         bdrv_snapshot_delete_by_id_or_name(bs, snapshot_name, &err);
2946         if (err) {
2947             error_reportf_err(err, "Could not delete snapshot '%s': ",
2948                               snapshot_name);
2949             ret = 1;
2950         }
2951         break;
2952     }
2953 
2954     /* Cleanup */
2955     blk_unref(blk);
2956     if (ret) {
2957         return 1;
2958     }
2959     return 0;
2960 }
2961 
2962 static int img_rebase(int argc, char **argv)
2963 {
2964     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
2965     uint8_t *buf_old = NULL;
2966     uint8_t *buf_new = NULL;
2967     BlockDriverState *bs = NULL;
2968     char *filename;
2969     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
2970     int c, flags, src_flags, ret;
2971     bool writethrough, src_writethrough;
2972     int unsafe = 0;
2973     int progress = 0;
2974     bool quiet = false;
2975     Error *local_err = NULL;
2976     bool image_opts = false;
2977 
2978     /* Parse commandline parameters */
2979     fmt = NULL;
2980     cache = BDRV_DEFAULT_CACHE;
2981     src_cache = BDRV_DEFAULT_CACHE;
2982     out_baseimg = NULL;
2983     out_basefmt = NULL;
2984     for(;;) {
2985         static const struct option long_options[] = {
2986             {"help", no_argument, 0, 'h'},
2987             {"object", required_argument, 0, OPTION_OBJECT},
2988             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2989             {0, 0, 0, 0}
2990         };
2991         c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
2992                         long_options, NULL);
2993         if (c == -1) {
2994             break;
2995         }
2996         switch(c) {
2997         case '?':
2998         case 'h':
2999             help();
3000             return 0;
3001         case 'f':
3002             fmt = optarg;
3003             break;
3004         case 'F':
3005             out_basefmt = optarg;
3006             break;
3007         case 'b':
3008             out_baseimg = optarg;
3009             break;
3010         case 'u':
3011             unsafe = 1;
3012             break;
3013         case 'p':
3014             progress = 1;
3015             break;
3016         case 't':
3017             cache = optarg;
3018             break;
3019         case 'T':
3020             src_cache = optarg;
3021             break;
3022         case 'q':
3023             quiet = true;
3024             break;
3025         case OPTION_OBJECT: {
3026             QemuOpts *opts;
3027             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3028                                            optarg, true);
3029             if (!opts) {
3030                 return 1;
3031             }
3032         }   break;
3033         case OPTION_IMAGE_OPTS:
3034             image_opts = true;
3035             break;
3036         }
3037     }
3038 
3039     if (quiet) {
3040         progress = 0;
3041     }
3042 
3043     if (optind != argc - 1) {
3044         error_exit("Expecting one image file name");
3045     }
3046     if (!unsafe && !out_baseimg) {
3047         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3048     }
3049     filename = argv[optind++];
3050 
3051     if (qemu_opts_foreach(&qemu_object_opts,
3052                           user_creatable_add_opts_foreach,
3053                           NULL, NULL)) {
3054         return 1;
3055     }
3056 
3057     qemu_progress_init(progress, 2.0);
3058     qemu_progress_print(0, 100);
3059 
3060     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3061     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3062     if (ret < 0) {
3063         error_report("Invalid cache option: %s", cache);
3064         goto out;
3065     }
3066 
3067     src_flags = 0;
3068     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3069     if (ret < 0) {
3070         error_report("Invalid source cache option: %s", src_cache);
3071         goto out;
3072     }
3073 
3074     /* The source files are opened read-only, don't care about WCE */
3075     assert((src_flags & BDRV_O_RDWR) == 0);
3076     (void) src_writethrough;
3077 
3078     /*
3079      * Open the images.
3080      *
3081      * Ignore the old backing file for unsafe rebase in case we want to correct
3082      * the reference to a renamed or moved backing file.
3083      */
3084     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3085     if (!blk) {
3086         ret = -1;
3087         goto out;
3088     }
3089     bs = blk_bs(blk);
3090 
3091     if (out_basefmt != NULL) {
3092         if (bdrv_find_format(out_basefmt) == NULL) {
3093             error_report("Invalid format name: '%s'", out_basefmt);
3094             ret = -1;
3095             goto out;
3096         }
3097     }
3098 
3099     /* For safe rebasing we need to compare old and new backing file */
3100     if (!unsafe) {
3101         char backing_name[PATH_MAX];
3102         QDict *options = NULL;
3103 
3104         if (bs->backing_format[0] != '\0') {
3105             options = qdict_new();
3106             qdict_put(options, "driver", qstring_from_str(bs->backing_format));
3107         }
3108 
3109         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3110         blk_old_backing = blk_new_open(backing_name, NULL,
3111                                        options, src_flags, &local_err);
3112         if (!blk_old_backing) {
3113             error_reportf_err(local_err,
3114                               "Could not open old backing file '%s': ",
3115                               backing_name);
3116             ret = -1;
3117             goto out;
3118         }
3119 
3120         if (out_baseimg[0]) {
3121             if (out_basefmt) {
3122                 options = qdict_new();
3123                 qdict_put(options, "driver", qstring_from_str(out_basefmt));
3124             } else {
3125                 options = NULL;
3126             }
3127 
3128             blk_new_backing = blk_new_open(out_baseimg, NULL,
3129                                            options, src_flags, &local_err);
3130             if (!blk_new_backing) {
3131                 error_reportf_err(local_err,
3132                                   "Could not open new backing file '%s': ",
3133                                   out_baseimg);
3134                 ret = -1;
3135                 goto out;
3136             }
3137         }
3138     }
3139 
3140     /*
3141      * Check each unallocated cluster in the COW file. If it is unallocated,
3142      * accesses go to the backing file. We must therefore compare this cluster
3143      * in the old and new backing file, and if they differ we need to copy it
3144      * from the old backing file into the COW file.
3145      *
3146      * If qemu-img crashes during this step, no harm is done. The content of
3147      * the image is the same as the original one at any time.
3148      */
3149     if (!unsafe) {
3150         int64_t num_sectors;
3151         int64_t old_backing_num_sectors;
3152         int64_t new_backing_num_sectors = 0;
3153         uint64_t sector;
3154         int n;
3155         float local_progress = 0;
3156 
3157         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3158         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3159 
3160         num_sectors = blk_nb_sectors(blk);
3161         if (num_sectors < 0) {
3162             error_report("Could not get size of '%s': %s",
3163                          filename, strerror(-num_sectors));
3164             ret = -1;
3165             goto out;
3166         }
3167         old_backing_num_sectors = blk_nb_sectors(blk_old_backing);
3168         if (old_backing_num_sectors < 0) {
3169             char backing_name[PATH_MAX];
3170 
3171             bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3172             error_report("Could not get size of '%s': %s",
3173                          backing_name, strerror(-old_backing_num_sectors));
3174             ret = -1;
3175             goto out;
3176         }
3177         if (blk_new_backing) {
3178             new_backing_num_sectors = blk_nb_sectors(blk_new_backing);
3179             if (new_backing_num_sectors < 0) {
3180                 error_report("Could not get size of '%s': %s",
3181                              out_baseimg, strerror(-new_backing_num_sectors));
3182                 ret = -1;
3183                 goto out;
3184             }
3185         }
3186 
3187         if (num_sectors != 0) {
3188             local_progress = (float)100 /
3189                 (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
3190         }
3191 
3192         for (sector = 0; sector < num_sectors; sector += n) {
3193 
3194             /* How many sectors can we handle with the next read? */
3195             if (sector + (IO_BUF_SIZE / 512) <= num_sectors) {
3196                 n = (IO_BUF_SIZE / 512);
3197             } else {
3198                 n = num_sectors - sector;
3199             }
3200 
3201             /* If the cluster is allocated, we don't need to take action */
3202             ret = bdrv_is_allocated(bs, sector, n, &n);
3203             if (ret < 0) {
3204                 error_report("error while reading image metadata: %s",
3205                              strerror(-ret));
3206                 goto out;
3207             }
3208             if (ret) {
3209                 continue;
3210             }
3211 
3212             /*
3213              * Read old and new backing file and take into consideration that
3214              * backing files may be smaller than the COW image.
3215              */
3216             if (sector >= old_backing_num_sectors) {
3217                 memset(buf_old, 0, n * BDRV_SECTOR_SIZE);
3218             } else {
3219                 if (sector + n > old_backing_num_sectors) {
3220                     n = old_backing_num_sectors - sector;
3221                 }
3222 
3223                 ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
3224                                 buf_old, n << BDRV_SECTOR_BITS);
3225                 if (ret < 0) {
3226                     error_report("error while reading from old backing file");
3227                     goto out;
3228                 }
3229             }
3230 
3231             if (sector >= new_backing_num_sectors || !blk_new_backing) {
3232                 memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
3233             } else {
3234                 if (sector + n > new_backing_num_sectors) {
3235                     n = new_backing_num_sectors - sector;
3236                 }
3237 
3238                 ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
3239                                 buf_new, n << BDRV_SECTOR_BITS);
3240                 if (ret < 0) {
3241                     error_report("error while reading from new backing file");
3242                     goto out;
3243                 }
3244             }
3245 
3246             /* If they differ, we need to write to the COW file */
3247             uint64_t written = 0;
3248 
3249             while (written < n) {
3250                 int pnum;
3251 
3252                 if (compare_sectors(buf_old + written * 512,
3253                     buf_new + written * 512, n - written, &pnum))
3254                 {
3255                     ret = blk_pwrite(blk,
3256                                      (sector + written) << BDRV_SECTOR_BITS,
3257                                      buf_old + written * 512,
3258                                      pnum << BDRV_SECTOR_BITS, 0);
3259                     if (ret < 0) {
3260                         error_report("Error while writing to COW image: %s",
3261                             strerror(-ret));
3262                         goto out;
3263                     }
3264                 }
3265 
3266                 written += pnum;
3267             }
3268             qemu_progress_print(local_progress, 100);
3269         }
3270     }
3271 
3272     /*
3273      * Change the backing file. All clusters that are different from the old
3274      * backing file are overwritten in the COW file now, so the visible content
3275      * doesn't change when we switch the backing file.
3276      */
3277     if (out_baseimg && *out_baseimg) {
3278         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3279     } else {
3280         ret = bdrv_change_backing_file(bs, NULL, NULL);
3281     }
3282 
3283     if (ret == -ENOSPC) {
3284         error_report("Could not change the backing file to '%s': No "
3285                      "space left in the file header", out_baseimg);
3286     } else if (ret < 0) {
3287         error_report("Could not change the backing file to '%s': %s",
3288             out_baseimg, strerror(-ret));
3289     }
3290 
3291     qemu_progress_print(100, 0);
3292     /*
3293      * TODO At this point it is possible to check if any clusters that are
3294      * allocated in the COW file are the same in the backing file. If so, they
3295      * could be dropped from the COW file. Don't do this before switching the
3296      * backing file, in case of a crash this would lead to corruption.
3297      */
3298 out:
3299     qemu_progress_end();
3300     /* Cleanup */
3301     if (!unsafe) {
3302         blk_unref(blk_old_backing);
3303         blk_unref(blk_new_backing);
3304     }
3305     qemu_vfree(buf_old);
3306     qemu_vfree(buf_new);
3307 
3308     blk_unref(blk);
3309     if (ret) {
3310         return 1;
3311     }
3312     return 0;
3313 }
3314 
3315 static int img_resize(int argc, char **argv)
3316 {
3317     Error *err = NULL;
3318     int c, ret, relative;
3319     const char *filename, *fmt, *size;
3320     int64_t n, total_size;
3321     bool quiet = false;
3322     BlockBackend *blk = NULL;
3323     QemuOpts *param;
3324 
3325     static QemuOptsList resize_options = {
3326         .name = "resize_options",
3327         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3328         .desc = {
3329             {
3330                 .name = BLOCK_OPT_SIZE,
3331                 .type = QEMU_OPT_SIZE,
3332                 .help = "Virtual disk size"
3333             }, {
3334                 /* end of list */
3335             }
3336         },
3337     };
3338     bool image_opts = false;
3339 
3340     /* Remove size from argv manually so that negative numbers are not treated
3341      * as options by getopt. */
3342     if (argc < 3) {
3343         error_exit("Not enough arguments");
3344         return 1;
3345     }
3346 
3347     size = argv[--argc];
3348 
3349     /* Parse getopt arguments */
3350     fmt = NULL;
3351     for(;;) {
3352         static const struct option long_options[] = {
3353             {"help", no_argument, 0, 'h'},
3354             {"object", required_argument, 0, OPTION_OBJECT},
3355             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3356             {0, 0, 0, 0}
3357         };
3358         c = getopt_long(argc, argv, "f:hq",
3359                         long_options, NULL);
3360         if (c == -1) {
3361             break;
3362         }
3363         switch(c) {
3364         case '?':
3365         case 'h':
3366             help();
3367             break;
3368         case 'f':
3369             fmt = optarg;
3370             break;
3371         case 'q':
3372             quiet = true;
3373             break;
3374         case OPTION_OBJECT: {
3375             QemuOpts *opts;
3376             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3377                                            optarg, true);
3378             if (!opts) {
3379                 return 1;
3380             }
3381         }   break;
3382         case OPTION_IMAGE_OPTS:
3383             image_opts = true;
3384             break;
3385         }
3386     }
3387     if (optind != argc - 1) {
3388         error_exit("Expecting one image file name");
3389     }
3390     filename = argv[optind++];
3391 
3392     if (qemu_opts_foreach(&qemu_object_opts,
3393                           user_creatable_add_opts_foreach,
3394                           NULL, NULL)) {
3395         return 1;
3396     }
3397 
3398     /* Choose grow, shrink, or absolute resize mode */
3399     switch (size[0]) {
3400     case '+':
3401         relative = 1;
3402         size++;
3403         break;
3404     case '-':
3405         relative = -1;
3406         size++;
3407         break;
3408     default:
3409         relative = 0;
3410         break;
3411     }
3412 
3413     /* Parse size */
3414     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3415     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3416     if (err) {
3417         error_report_err(err);
3418         ret = -1;
3419         qemu_opts_del(param);
3420         goto out;
3421     }
3422     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3423     qemu_opts_del(param);
3424 
3425     blk = img_open(image_opts, filename, fmt,
3426                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
3427     if (!blk) {
3428         ret = -1;
3429         goto out;
3430     }
3431 
3432     if (relative) {
3433         total_size = blk_getlength(blk) + n * relative;
3434     } else {
3435         total_size = n;
3436     }
3437     if (total_size <= 0) {
3438         error_report("New image size must be positive");
3439         ret = -1;
3440         goto out;
3441     }
3442 
3443     ret = blk_truncate(blk, total_size);
3444     switch (ret) {
3445     case 0:
3446         qprintf(quiet, "Image resized.\n");
3447         break;
3448     case -ENOTSUP:
3449         error_report("This image does not support resize");
3450         break;
3451     case -EACCES:
3452         error_report("Image is read-only");
3453         break;
3454     default:
3455         error_report("Error resizing image: %s", strerror(-ret));
3456         break;
3457     }
3458 out:
3459     blk_unref(blk);
3460     if (ret) {
3461         return 1;
3462     }
3463     return 0;
3464 }
3465 
3466 static void amend_status_cb(BlockDriverState *bs,
3467                             int64_t offset, int64_t total_work_size,
3468                             void *opaque)
3469 {
3470     qemu_progress_print(100.f * offset / total_work_size, 0);
3471 }
3472 
3473 static int img_amend(int argc, char **argv)
3474 {
3475     Error *err = NULL;
3476     int c, ret = 0;
3477     char *options = NULL;
3478     QemuOptsList *create_opts = NULL;
3479     QemuOpts *opts = NULL;
3480     const char *fmt = NULL, *filename, *cache;
3481     int flags;
3482     bool writethrough;
3483     bool quiet = false, progress = false;
3484     BlockBackend *blk = NULL;
3485     BlockDriverState *bs = NULL;
3486     bool image_opts = false;
3487 
3488     cache = BDRV_DEFAULT_CACHE;
3489     for (;;) {
3490         static const struct option long_options[] = {
3491             {"help", no_argument, 0, 'h'},
3492             {"object", required_argument, 0, OPTION_OBJECT},
3493             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3494             {0, 0, 0, 0}
3495         };
3496         c = getopt_long(argc, argv, "ho:f:t:pq",
3497                         long_options, NULL);
3498         if (c == -1) {
3499             break;
3500         }
3501 
3502         switch (c) {
3503             case 'h':
3504             case '?':
3505                 help();
3506                 break;
3507             case 'o':
3508                 if (!is_valid_option_list(optarg)) {
3509                     error_report("Invalid option list: %s", optarg);
3510                     ret = -1;
3511                     goto out_no_progress;
3512                 }
3513                 if (!options) {
3514                     options = g_strdup(optarg);
3515                 } else {
3516                     char *old_options = options;
3517                     options = g_strdup_printf("%s,%s", options, optarg);
3518                     g_free(old_options);
3519                 }
3520                 break;
3521             case 'f':
3522                 fmt = optarg;
3523                 break;
3524             case 't':
3525                 cache = optarg;
3526                 break;
3527             case 'p':
3528                 progress = true;
3529                 break;
3530             case 'q':
3531                 quiet = true;
3532                 break;
3533             case OPTION_OBJECT:
3534                 opts = qemu_opts_parse_noisily(&qemu_object_opts,
3535                                                optarg, true);
3536                 if (!opts) {
3537                     ret = -1;
3538                     goto out_no_progress;
3539                 }
3540                 break;
3541             case OPTION_IMAGE_OPTS:
3542                 image_opts = true;
3543                 break;
3544         }
3545     }
3546 
3547     if (!options) {
3548         error_exit("Must specify options (-o)");
3549     }
3550 
3551     if (qemu_opts_foreach(&qemu_object_opts,
3552                           user_creatable_add_opts_foreach,
3553                           NULL, NULL)) {
3554         ret = -1;
3555         goto out_no_progress;
3556     }
3557 
3558     if (quiet) {
3559         progress = false;
3560     }
3561     qemu_progress_init(progress, 1.0);
3562 
3563     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
3564     if (fmt && has_help_option(options)) {
3565         /* If a format is explicitly specified (and possibly no filename is
3566          * given), print option help here */
3567         ret = print_block_option_help(filename, fmt);
3568         goto out;
3569     }
3570 
3571     if (optind != argc - 1) {
3572         error_report("Expecting one image file name");
3573         ret = -1;
3574         goto out;
3575     }
3576 
3577     flags = BDRV_O_RDWR;
3578     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3579     if (ret < 0) {
3580         error_report("Invalid cache option: %s", cache);
3581         goto out;
3582     }
3583 
3584     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3585     if (!blk) {
3586         ret = -1;
3587         goto out;
3588     }
3589     bs = blk_bs(blk);
3590 
3591     fmt = bs->drv->format_name;
3592 
3593     if (has_help_option(options)) {
3594         /* If the format was auto-detected, print option help here */
3595         ret = print_block_option_help(filename, fmt);
3596         goto out;
3597     }
3598 
3599     if (!bs->drv->create_opts) {
3600         error_report("Format driver '%s' does not support any options to amend",
3601                      fmt);
3602         ret = -1;
3603         goto out;
3604     }
3605 
3606     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
3607     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3608     qemu_opts_do_parse(opts, options, NULL, &err);
3609     if (err) {
3610         error_report_err(err);
3611         ret = -1;
3612         goto out;
3613     }
3614 
3615     /* In case the driver does not call amend_status_cb() */
3616     qemu_progress_print(0.f, 0);
3617     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL);
3618     qemu_progress_print(100.f, 0);
3619     if (ret < 0) {
3620         error_report("Error while amending options: %s", strerror(-ret));
3621         goto out;
3622     }
3623 
3624 out:
3625     qemu_progress_end();
3626 
3627 out_no_progress:
3628     blk_unref(blk);
3629     qemu_opts_del(opts);
3630     qemu_opts_free(create_opts);
3631     g_free(options);
3632 
3633     if (ret) {
3634         return 1;
3635     }
3636     return 0;
3637 }
3638 
3639 typedef struct BenchData {
3640     BlockBackend *blk;
3641     uint64_t image_size;
3642     bool write;
3643     int bufsize;
3644     int step;
3645     int nrreq;
3646     int n;
3647     int flush_interval;
3648     bool drain_on_flush;
3649     uint8_t *buf;
3650     QEMUIOVector *qiov;
3651 
3652     int in_flight;
3653     bool in_flush;
3654     uint64_t offset;
3655 } BenchData;
3656 
3657 static void bench_undrained_flush_cb(void *opaque, int ret)
3658 {
3659     if (ret < 0) {
3660         error_report("Failed flush request: %s", strerror(-ret));
3661         exit(EXIT_FAILURE);
3662     }
3663 }
3664 
3665 static void bench_cb(void *opaque, int ret)
3666 {
3667     BenchData *b = opaque;
3668     BlockAIOCB *acb;
3669 
3670     if (ret < 0) {
3671         error_report("Failed request: %s", strerror(-ret));
3672         exit(EXIT_FAILURE);
3673     }
3674 
3675     if (b->in_flush) {
3676         /* Just finished a flush with drained queue: Start next requests */
3677         assert(b->in_flight == 0);
3678         b->in_flush = false;
3679     } else if (b->in_flight > 0) {
3680         int remaining = b->n - b->in_flight;
3681 
3682         b->n--;
3683         b->in_flight--;
3684 
3685         /* Time for flush? Drain queue if requested, then flush */
3686         if (b->flush_interval && remaining % b->flush_interval == 0) {
3687             if (!b->in_flight || !b->drain_on_flush) {
3688                 BlockCompletionFunc *cb;
3689 
3690                 if (b->drain_on_flush) {
3691                     b->in_flush = true;
3692                     cb = bench_cb;
3693                 } else {
3694                     cb = bench_undrained_flush_cb;
3695                 }
3696 
3697                 acb = blk_aio_flush(b->blk, cb, b);
3698                 if (!acb) {
3699                     error_report("Failed to issue flush request");
3700                     exit(EXIT_FAILURE);
3701                 }
3702             }
3703             if (b->drain_on_flush) {
3704                 return;
3705             }
3706         }
3707     }
3708 
3709     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
3710         int64_t offset = b->offset;
3711         /* blk_aio_* might look for completed I/Os and kick bench_cb
3712          * again, so make sure this operation is counted by in_flight
3713          * and b->offset is ready for the next submission.
3714          */
3715         b->in_flight++;
3716         b->offset += b->step;
3717         b->offset %= b->image_size;
3718         if (b->write) {
3719             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
3720         } else {
3721             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
3722         }
3723         if (!acb) {
3724             error_report("Failed to issue request");
3725             exit(EXIT_FAILURE);
3726         }
3727     }
3728 }
3729 
3730 static int img_bench(int argc, char **argv)
3731 {
3732     int c, ret = 0;
3733     const char *fmt = NULL, *filename;
3734     bool quiet = false;
3735     bool image_opts = false;
3736     bool is_write = false;
3737     int count = 75000;
3738     int depth = 64;
3739     int64_t offset = 0;
3740     size_t bufsize = 4096;
3741     int pattern = 0;
3742     size_t step = 0;
3743     int flush_interval = 0;
3744     bool drain_on_flush = true;
3745     int64_t image_size;
3746     BlockBackend *blk = NULL;
3747     BenchData data = {};
3748     int flags = 0;
3749     bool writethrough = false;
3750     struct timeval t1, t2;
3751     int i;
3752 
3753     for (;;) {
3754         static const struct option long_options[] = {
3755             {"help", no_argument, 0, 'h'},
3756             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
3757             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3758             {"pattern", required_argument, 0, OPTION_PATTERN},
3759             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
3760             {0, 0, 0, 0}
3761         };
3762         c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
3763         if (c == -1) {
3764             break;
3765         }
3766 
3767         switch (c) {
3768         case 'h':
3769         case '?':
3770             help();
3771             break;
3772         case 'c':
3773         {
3774             unsigned long res;
3775 
3776             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3777                 error_report("Invalid request count specified");
3778                 return 1;
3779             }
3780             count = res;
3781             break;
3782         }
3783         case 'd':
3784         {
3785             unsigned long res;
3786 
3787             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3788                 error_report("Invalid queue depth specified");
3789                 return 1;
3790             }
3791             depth = res;
3792             break;
3793         }
3794         case 'f':
3795             fmt = optarg;
3796             break;
3797         case 'n':
3798             flags |= BDRV_O_NATIVE_AIO;
3799             break;
3800         case 'o':
3801         {
3802             offset = cvtnum(optarg);
3803             if (offset < 0) {
3804                 error_report("Invalid offset specified");
3805                 return 1;
3806             }
3807             break;
3808         }
3809             break;
3810         case 'q':
3811             quiet = true;
3812             break;
3813         case 's':
3814         {
3815             int64_t sval;
3816 
3817             sval = cvtnum(optarg);
3818             if (sval < 0 || sval > INT_MAX) {
3819                 error_report("Invalid buffer size specified");
3820                 return 1;
3821             }
3822 
3823             bufsize = sval;
3824             break;
3825         }
3826         case 'S':
3827         {
3828             int64_t sval;
3829 
3830             sval = cvtnum(optarg);
3831             if (sval < 0 || sval > INT_MAX) {
3832                 error_report("Invalid step size specified");
3833                 return 1;
3834             }
3835 
3836             step = sval;
3837             break;
3838         }
3839         case 't':
3840             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
3841             if (ret < 0) {
3842                 error_report("Invalid cache mode");
3843                 ret = -1;
3844                 goto out;
3845             }
3846             break;
3847         case 'w':
3848             flags |= BDRV_O_RDWR;
3849             is_write = true;
3850             break;
3851         case OPTION_PATTERN:
3852         {
3853             unsigned long res;
3854 
3855             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
3856                 error_report("Invalid pattern byte specified");
3857                 return 1;
3858             }
3859             pattern = res;
3860             break;
3861         }
3862         case OPTION_FLUSH_INTERVAL:
3863         {
3864             unsigned long res;
3865 
3866             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3867                 error_report("Invalid flush interval specified");
3868                 return 1;
3869             }
3870             flush_interval = res;
3871             break;
3872         }
3873         case OPTION_NO_DRAIN:
3874             drain_on_flush = false;
3875             break;
3876         case OPTION_IMAGE_OPTS:
3877             image_opts = true;
3878             break;
3879         }
3880     }
3881 
3882     if (optind != argc - 1) {
3883         error_exit("Expecting one image file name");
3884     }
3885     filename = argv[argc - 1];
3886 
3887     if (!is_write && flush_interval) {
3888         error_report("--flush-interval is only available in write tests");
3889         ret = -1;
3890         goto out;
3891     }
3892     if (flush_interval && flush_interval < depth) {
3893         error_report("Flush interval can't be smaller than depth");
3894         ret = -1;
3895         goto out;
3896     }
3897 
3898     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3899     if (!blk) {
3900         ret = -1;
3901         goto out;
3902     }
3903 
3904     image_size = blk_getlength(blk);
3905     if (image_size < 0) {
3906         ret = image_size;
3907         goto out;
3908     }
3909 
3910     data = (BenchData) {
3911         .blk            = blk,
3912         .image_size     = image_size,
3913         .bufsize        = bufsize,
3914         .step           = step ?: bufsize,
3915         .nrreq          = depth,
3916         .n              = count,
3917         .offset         = offset,
3918         .write          = is_write,
3919         .flush_interval = flush_interval,
3920         .drain_on_flush = drain_on_flush,
3921     };
3922     printf("Sending %d %s requests, %d bytes each, %d in parallel "
3923            "(starting at offset %" PRId64 ", step size %d)\n",
3924            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
3925            data.offset, data.step);
3926     if (flush_interval) {
3927         printf("Sending flush every %d requests\n", flush_interval);
3928     }
3929 
3930     data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
3931     memset(data.buf, pattern, data.nrreq * data.bufsize);
3932 
3933     data.qiov = g_new(QEMUIOVector, data.nrreq);
3934     for (i = 0; i < data.nrreq; i++) {
3935         qemu_iovec_init(&data.qiov[i], 1);
3936         qemu_iovec_add(&data.qiov[i],
3937                        data.buf + i * data.bufsize, data.bufsize);
3938     }
3939 
3940     gettimeofday(&t1, NULL);
3941     bench_cb(&data, 0);
3942 
3943     while (data.n > 0) {
3944         main_loop_wait(false);
3945     }
3946     gettimeofday(&t2, NULL);
3947 
3948     printf("Run completed in %3.3f seconds.\n",
3949            (t2.tv_sec - t1.tv_sec)
3950            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
3951 
3952 out:
3953     qemu_vfree(data.buf);
3954     blk_unref(blk);
3955 
3956     if (ret) {
3957         return 1;
3958     }
3959     return 0;
3960 }
3961 
3962 #define C_BS      01
3963 #define C_COUNT   02
3964 #define C_IF      04
3965 #define C_OF      010
3966 #define C_SKIP    020
3967 
3968 struct DdInfo {
3969     unsigned int flags;
3970     int64_t count;
3971 };
3972 
3973 struct DdIo {
3974     int bsz;    /* Block size */
3975     char *filename;
3976     uint8_t *buf;
3977     int64_t offset;
3978 };
3979 
3980 struct DdOpts {
3981     const char *name;
3982     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
3983     unsigned int flag;
3984 };
3985 
3986 static int img_dd_bs(const char *arg,
3987                      struct DdIo *in, struct DdIo *out,
3988                      struct DdInfo *dd)
3989 {
3990     int64_t res;
3991 
3992     res = cvtnum(arg);
3993 
3994     if (res <= 0 || res > INT_MAX) {
3995         error_report("invalid number: '%s'", arg);
3996         return 1;
3997     }
3998     in->bsz = out->bsz = res;
3999 
4000     return 0;
4001 }
4002 
4003 static int img_dd_count(const char *arg,
4004                         struct DdIo *in, struct DdIo *out,
4005                         struct DdInfo *dd)
4006 {
4007     dd->count = cvtnum(arg);
4008 
4009     if (dd->count < 0) {
4010         error_report("invalid number: '%s'", arg);
4011         return 1;
4012     }
4013 
4014     return 0;
4015 }
4016 
4017 static int img_dd_if(const char *arg,
4018                      struct DdIo *in, struct DdIo *out,
4019                      struct DdInfo *dd)
4020 {
4021     in->filename = g_strdup(arg);
4022 
4023     return 0;
4024 }
4025 
4026 static int img_dd_of(const char *arg,
4027                      struct DdIo *in, struct DdIo *out,
4028                      struct DdInfo *dd)
4029 {
4030     out->filename = g_strdup(arg);
4031 
4032     return 0;
4033 }
4034 
4035 static int img_dd_skip(const char *arg,
4036                        struct DdIo *in, struct DdIo *out,
4037                        struct DdInfo *dd)
4038 {
4039     in->offset = cvtnum(arg);
4040 
4041     if (in->offset < 0) {
4042         error_report("invalid number: '%s'", arg);
4043         return 1;
4044     }
4045 
4046     return 0;
4047 }
4048 
4049 static int img_dd(int argc, char **argv)
4050 {
4051     int ret = 0;
4052     char *arg = NULL;
4053     char *tmp;
4054     BlockDriver *drv = NULL, *proto_drv = NULL;
4055     BlockBackend *blk1 = NULL, *blk2 = NULL;
4056     QemuOpts *opts = NULL;
4057     QemuOptsList *create_opts = NULL;
4058     Error *local_err = NULL;
4059     bool image_opts = false;
4060     int c, i;
4061     const char *out_fmt = "raw";
4062     const char *fmt = NULL;
4063     int64_t size = 0;
4064     int64_t block_count = 0, out_pos, in_pos;
4065     struct DdInfo dd = {
4066         .flags = 0,
4067         .count = 0,
4068     };
4069     struct DdIo in = {
4070         .bsz = 512, /* Block size is by default 512 bytes */
4071         .filename = NULL,
4072         .buf = NULL,
4073         .offset = 0
4074     };
4075     struct DdIo out = {
4076         .bsz = 512,
4077         .filename = NULL,
4078         .buf = NULL,
4079         .offset = 0
4080     };
4081 
4082     const struct DdOpts options[] = {
4083         { "bs", img_dd_bs, C_BS },
4084         { "count", img_dd_count, C_COUNT },
4085         { "if", img_dd_if, C_IF },
4086         { "of", img_dd_of, C_OF },
4087         { "skip", img_dd_skip, C_SKIP },
4088         { NULL, NULL, 0 }
4089     };
4090     const struct option long_options[] = {
4091         { "help", no_argument, 0, 'h'},
4092         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4093         { 0, 0, 0, 0 }
4094     };
4095 
4096     while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) {
4097         if (c == EOF) {
4098             break;
4099         }
4100         switch (c) {
4101         case 'O':
4102             out_fmt = optarg;
4103             break;
4104         case 'f':
4105             fmt = optarg;
4106             break;
4107         case '?':
4108             error_report("Try 'qemu-img --help' for more information.");
4109             ret = -1;
4110             goto out;
4111         case 'h':
4112             help();
4113             break;
4114         case OPTION_IMAGE_OPTS:
4115             image_opts = true;
4116             break;
4117         }
4118     }
4119 
4120     for (i = optind; i < argc; i++) {
4121         int j;
4122         arg = g_strdup(argv[i]);
4123 
4124         tmp = strchr(arg, '=');
4125         if (tmp == NULL) {
4126             error_report("unrecognized operand %s", arg);
4127             ret = -1;
4128             goto out;
4129         }
4130 
4131         *tmp++ = '\0';
4132 
4133         for (j = 0; options[j].name != NULL; j++) {
4134             if (!strcmp(arg, options[j].name)) {
4135                 break;
4136             }
4137         }
4138         if (options[j].name == NULL) {
4139             error_report("unrecognized operand %s", arg);
4140             ret = -1;
4141             goto out;
4142         }
4143 
4144         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4145             ret = -1;
4146             goto out;
4147         }
4148         dd.flags |= options[j].flag;
4149         g_free(arg);
4150         arg = NULL;
4151     }
4152 
4153     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4154         error_report("Must specify both input and output files");
4155         ret = -1;
4156         goto out;
4157     }
4158     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false);
4159 
4160     if (!blk1) {
4161         ret = -1;
4162         goto out;
4163     }
4164 
4165     drv = bdrv_find_format(out_fmt);
4166     if (!drv) {
4167         error_report("Unknown file format");
4168         ret = -1;
4169         goto out;
4170     }
4171     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4172 
4173     if (!proto_drv) {
4174         error_report_err(local_err);
4175         ret = -1;
4176         goto out;
4177     }
4178     if (!drv->create_opts) {
4179         error_report("Format driver '%s' does not support image creation",
4180                      drv->format_name);
4181         ret = -1;
4182         goto out;
4183     }
4184     if (!proto_drv->create_opts) {
4185         error_report("Protocol driver '%s' does not support image creation",
4186                      proto_drv->format_name);
4187         ret = -1;
4188         goto out;
4189     }
4190     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4191     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4192 
4193     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4194 
4195     size = blk_getlength(blk1);
4196     if (size < 0) {
4197         error_report("Failed to get size for '%s'", in.filename);
4198         ret = -1;
4199         goto out;
4200     }
4201 
4202     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4203         dd.count * in.bsz < size) {
4204         size = dd.count * in.bsz;
4205     }
4206 
4207     /* Overflow means the specified offset is beyond input image's size */
4208     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4209                               size < in.bsz * in.offset)) {
4210         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4211     } else {
4212         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4213                             size - in.bsz * in.offset, &error_abort);
4214     }
4215 
4216     ret = bdrv_create(drv, out.filename, opts, &local_err);
4217     if (ret < 0) {
4218         error_reportf_err(local_err,
4219                           "%s: error while creating output image: ",
4220                           out.filename);
4221         ret = -1;
4222         goto out;
4223     }
4224 
4225     blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
4226                     false, false);
4227 
4228     if (!blk2) {
4229         ret = -1;
4230         goto out;
4231     }
4232 
4233     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4234                               size < in.offset * in.bsz)) {
4235         /* We give a warning if the skip option is bigger than the input
4236          * size and create an empty output disk image (i.e. like dd(1)).
4237          */
4238         error_report("%s: cannot skip to specified offset", in.filename);
4239         in_pos = size;
4240     } else {
4241         in_pos = in.offset * in.bsz;
4242     }
4243 
4244     in.buf = g_new(uint8_t, in.bsz);
4245 
4246     for (out_pos = 0; in_pos < size; block_count++) {
4247         int in_ret, out_ret;
4248 
4249         if (in_pos + in.bsz > size) {
4250             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4251         } else {
4252             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4253         }
4254         if (in_ret < 0) {
4255             error_report("error while reading from input image file: %s",
4256                          strerror(-in_ret));
4257             ret = -1;
4258             goto out;
4259         }
4260         in_pos += in_ret;
4261 
4262         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4263 
4264         if (out_ret < 0) {
4265             error_report("error while writing to output image file: %s",
4266                          strerror(-out_ret));
4267             ret = -1;
4268             goto out;
4269         }
4270         out_pos += out_ret;
4271     }
4272 
4273 out:
4274     g_free(arg);
4275     qemu_opts_del(opts);
4276     qemu_opts_free(create_opts);
4277     blk_unref(blk1);
4278     blk_unref(blk2);
4279     g_free(in.filename);
4280     g_free(out.filename);
4281     g_free(in.buf);
4282     g_free(out.buf);
4283 
4284     if (ret) {
4285         return 1;
4286     }
4287     return 0;
4288 }
4289 
4290 
4291 static const img_cmd_t img_cmds[] = {
4292 #define DEF(option, callback, arg_string)        \
4293     { option, callback },
4294 #include "qemu-img-cmds.h"
4295 #undef DEF
4296 #undef GEN_DOCS
4297     { NULL, NULL, },
4298 };
4299 
4300 int main(int argc, char **argv)
4301 {
4302     const img_cmd_t *cmd;
4303     const char *cmdname;
4304     Error *local_error = NULL;
4305     char *trace_file = NULL;
4306     int c;
4307     static const struct option long_options[] = {
4308         {"help", no_argument, 0, 'h'},
4309         {"version", no_argument, 0, 'V'},
4310         {"trace", required_argument, NULL, 'T'},
4311         {0, 0, 0, 0}
4312     };
4313 
4314 #ifdef CONFIG_POSIX
4315     signal(SIGPIPE, SIG_IGN);
4316 #endif
4317 
4318     module_call_init(MODULE_INIT_TRACE);
4319     error_set_progname(argv[0]);
4320     qemu_init_exec_dir(argv[0]);
4321 
4322     if (qemu_init_main_loop(&local_error)) {
4323         error_report_err(local_error);
4324         exit(EXIT_FAILURE);
4325     }
4326 
4327     qcrypto_init(&error_fatal);
4328 
4329     module_call_init(MODULE_INIT_QOM);
4330     bdrv_init();
4331     if (argc < 2) {
4332         error_exit("Not enough arguments");
4333     }
4334 
4335     qemu_add_opts(&qemu_object_opts);
4336     qemu_add_opts(&qemu_source_opts);
4337     qemu_add_opts(&qemu_trace_opts);
4338 
4339     while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
4340         switch (c) {
4341         case 'h':
4342             help();
4343             return 0;
4344         case 'V':
4345             printf(QEMU_IMG_VERSION);
4346             return 0;
4347         case 'T':
4348             g_free(trace_file);
4349             trace_file = trace_opt_parse(optarg);
4350             break;
4351         }
4352     }
4353 
4354     cmdname = argv[optind];
4355 
4356     /* reset getopt_long scanning */
4357     argc -= optind;
4358     if (argc < 1) {
4359         return 0;
4360     }
4361     argv += optind;
4362     optind = 0;
4363 
4364     if (!trace_init_backends()) {
4365         exit(1);
4366     }
4367     trace_init_file(trace_file);
4368     qemu_set_log(LOG_TRACE);
4369 
4370     /* find the command */
4371     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
4372         if (!strcmp(cmdname, cmd->name)) {
4373             return cmd->handler(argc, argv);
4374         }
4375     }
4376 
4377     /* not found */
4378     error_exit("Command not found: %s", cmdname);
4379 }
4380