xref: /qemu/qemu-img.c (revision bf88c124)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-version.h"
26 #include "qapi/error.h"
27 #include "qapi-visit.h"
28 #include "qapi/qobject-output-visitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qapi/qmp/qjson.h"
31 #include "qemu/cutils.h"
32 #include "qemu/config-file.h"
33 #include "qemu/option.h"
34 #include "qemu/error-report.h"
35 #include "qemu/log.h"
36 #include "qom/object_interfaces.h"
37 #include "sysemu/sysemu.h"
38 #include "sysemu/block-backend.h"
39 #include "block/block_int.h"
40 #include "block/blockjob.h"
41 #include "block/qapi.h"
42 #include "crypto/init.h"
43 #include "trace/control.h"
44 #include <getopt.h>
45 
46 #define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
47                           "\n" QEMU_COPYRIGHT "\n"
48 
49 typedef struct img_cmd_t {
50     const char *name;
51     int (*handler)(int argc, char **argv);
52 } img_cmd_t;
53 
54 enum {
55     OPTION_OUTPUT = 256,
56     OPTION_BACKING_CHAIN = 257,
57     OPTION_OBJECT = 258,
58     OPTION_IMAGE_OPTS = 259,
59     OPTION_PATTERN = 260,
60     OPTION_FLUSH_INTERVAL = 261,
61     OPTION_NO_DRAIN = 262,
62 };
63 
64 typedef enum OutputFormat {
65     OFORMAT_JSON,
66     OFORMAT_HUMAN,
67 } OutputFormat;
68 
69 /* Default to cache=writeback as data integrity is not important for qemu-img */
70 #define BDRV_DEFAULT_CACHE "writeback"
71 
72 static void format_print(void *opaque, const char *name)
73 {
74     printf(" %s", name);
75 }
76 
77 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
78 {
79     va_list ap;
80 
81     error_printf("qemu-img: ");
82 
83     va_start(ap, fmt);
84     error_vprintf(fmt, ap);
85     va_end(ap);
86 
87     error_printf("\nTry 'qemu-img --help' for more information\n");
88     exit(EXIT_FAILURE);
89 }
90 
91 /* Please keep in synch with qemu-img.texi */
92 static void QEMU_NORETURN help(void)
93 {
94     const char *help_msg =
95            QEMU_IMG_VERSION
96            "usage: qemu-img [standard options] command [command options]\n"
97            "QEMU disk image utility\n"
98            "\n"
99            "    '-h', '--help'       display this help and exit\n"
100            "    '-V', '--version'    output version information and exit\n"
101            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
102            "                         specify tracing options\n"
103            "\n"
104            "Command syntax:\n"
105 #define DEF(option, callback, arg_string)        \
106            "  " arg_string "\n"
107 #include "qemu-img-cmds.h"
108 #undef DEF
109 #undef GEN_DOCS
110            "\n"
111            "Command parameters:\n"
112            "  'filename' is a disk image filename\n"
113            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
114            "    manual page for a description of the object properties. The most common\n"
115            "    object type is a 'secret', which is used to supply passwords and/or\n"
116            "    encryption keys.\n"
117            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
118            "  'cache' is the cache mode used to write the output disk image, the valid\n"
119            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
120            "    'directsync' and 'unsafe' (default for convert)\n"
121            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
122            "    options are the same as for the 'cache' option\n"
123            "  'size' is the disk image size in bytes. Optional suffixes\n"
124            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
125            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
126            "    supported. 'b' is ignored.\n"
127            "  'output_filename' is the destination disk image filename\n"
128            "  'output_fmt' is the destination format\n"
129            "  'options' is a comma separated list of format specific options in a\n"
130            "    name=value format. Use -o ? for an overview of the options supported by the\n"
131            "    used format\n"
132            "  'snapshot_param' is param used for internal snapshot, format\n"
133            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
134            "    '[ID_OR_NAME]'\n"
135            "  'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n"
136            "    instead\n"
137            "  '-c' indicates that target image must be compressed (qcow format only)\n"
138            "  '-u' enables unsafe rebasing. It is assumed that old and new backing file\n"
139            "       match exactly. The image doesn't need a working backing file before\n"
140            "       rebasing in this case (useful for renaming the backing file)\n"
141            "  '-h' with or without a command shows this help and lists the supported formats\n"
142            "  '-p' show progress of command (only certain commands)\n"
143            "  '-q' use Quiet mode - do not print any output (except errors)\n"
144            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
145            "       contain only zeros for qemu-img to create a sparse image during\n"
146            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
147            "       unallocated or zero sectors, and the destination image will always be\n"
148            "       fully allocated\n"
149            "  '--output' takes the format in which the output must be done (human or json)\n"
150            "  '-n' skips the target volume creation (useful if the volume is created\n"
151            "       prior to running qemu-img)\n"
152            "\n"
153            "Parameters to check subcommand:\n"
154            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
155            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
156            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
157            "       hiding corruption that has already occurred.\n"
158            "\n"
159            "Parameters to snapshot subcommand:\n"
160            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
161            "  '-a' applies a snapshot (revert disk to saved state)\n"
162            "  '-c' creates a snapshot\n"
163            "  '-d' deletes a snapshot\n"
164            "  '-l' lists all snapshots in the given image\n"
165            "\n"
166            "Parameters to compare subcommand:\n"
167            "  '-f' first image format\n"
168            "  '-F' second image format\n"
169            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
170            "\n"
171            "Parameters to dd subcommand:\n"
172            "  'bs=BYTES' read and write up to BYTES bytes at a time "
173            "(default: 512)\n"
174            "  'count=N' copy only N input blocks\n"
175            "  'if=FILE' read from FILE\n"
176            "  'of=FILE' write to FILE\n"
177            "  'skip=N' skip N bs-sized blocks at the start of input\n";
178 
179     printf("%s\nSupported formats:", help_msg);
180     bdrv_iterate_format(format_print, NULL);
181     printf("\n");
182     exit(EXIT_SUCCESS);
183 }
184 
185 static QemuOptsList qemu_object_opts = {
186     .name = "object",
187     .implied_opt_name = "qom-type",
188     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
189     .desc = {
190         { }
191     },
192 };
193 
194 static QemuOptsList qemu_source_opts = {
195     .name = "source",
196     .implied_opt_name = "file",
197     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
198     .desc = {
199         { }
200     },
201 };
202 
203 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
204 {
205     int ret = 0;
206     if (!quiet) {
207         va_list args;
208         va_start(args, fmt);
209         ret = vprintf(fmt, args);
210         va_end(args);
211     }
212     return ret;
213 }
214 
215 
216 static int print_block_option_help(const char *filename, const char *fmt)
217 {
218     BlockDriver *drv, *proto_drv;
219     QemuOptsList *create_opts = NULL;
220     Error *local_err = NULL;
221 
222     /* Find driver and parse its options */
223     drv = bdrv_find_format(fmt);
224     if (!drv) {
225         error_report("Unknown file format '%s'", fmt);
226         return 1;
227     }
228 
229     create_opts = qemu_opts_append(create_opts, drv->create_opts);
230     if (filename) {
231         proto_drv = bdrv_find_protocol(filename, true, &local_err);
232         if (!proto_drv) {
233             error_report_err(local_err);
234             qemu_opts_free(create_opts);
235             return 1;
236         }
237         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
238     }
239 
240     qemu_opts_print_help(create_opts);
241     qemu_opts_free(create_opts);
242     return 0;
243 }
244 
245 
246 static int img_open_password(BlockBackend *blk, const char *filename,
247                              int flags, bool quiet)
248 {
249     BlockDriverState *bs;
250     char password[256];
251 
252     bs = blk_bs(blk);
253     if (bdrv_is_encrypted(bs) && bdrv_key_required(bs) &&
254         !(flags & BDRV_O_NO_IO)) {
255         qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
256         if (qemu_read_password(password, sizeof(password)) < 0) {
257             error_report("No password given");
258             return -1;
259         }
260         if (bdrv_set_key(bs, password) < 0) {
261             error_report("invalid password");
262             return -1;
263         }
264     }
265     return 0;
266 }
267 
268 
269 static BlockBackend *img_open_opts(const char *optstr,
270                                    QemuOpts *opts, int flags, bool writethrough,
271                                    bool quiet)
272 {
273     QDict *options;
274     Error *local_err = NULL;
275     BlockBackend *blk;
276     options = qemu_opts_to_qdict(opts, NULL);
277     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
278     if (!blk) {
279         error_reportf_err(local_err, "Could not open '%s': ", optstr);
280         return NULL;
281     }
282     blk_set_enable_write_cache(blk, !writethrough);
283 
284     if (img_open_password(blk, optstr, flags, quiet) < 0) {
285         blk_unref(blk);
286         return NULL;
287     }
288     return blk;
289 }
290 
291 static BlockBackend *img_open_file(const char *filename,
292                                    const char *fmt, int flags,
293                                    bool writethrough, bool quiet)
294 {
295     BlockBackend *blk;
296     Error *local_err = NULL;
297     QDict *options = NULL;
298 
299     if (fmt) {
300         options = qdict_new();
301         qdict_put(options, "driver", qstring_from_str(fmt));
302     }
303 
304     blk = blk_new_open(filename, NULL, options, flags, &local_err);
305     if (!blk) {
306         error_reportf_err(local_err, "Could not open '%s': ", filename);
307         return NULL;
308     }
309     blk_set_enable_write_cache(blk, !writethrough);
310 
311     if (img_open_password(blk, filename, flags, quiet) < 0) {
312         blk_unref(blk);
313         return NULL;
314     }
315     return blk;
316 }
317 
318 
319 static BlockBackend *img_open(bool image_opts,
320                               const char *filename,
321                               const char *fmt, int flags, bool writethrough,
322                               bool quiet)
323 {
324     BlockBackend *blk;
325     if (image_opts) {
326         QemuOpts *opts;
327         if (fmt) {
328             error_report("--image-opts and --format are mutually exclusive");
329             return NULL;
330         }
331         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
332                                        filename, true);
333         if (!opts) {
334             return NULL;
335         }
336         blk = img_open_opts(filename, opts, flags, writethrough, quiet);
337     } else {
338         blk = img_open_file(filename, fmt, flags, writethrough, quiet);
339     }
340     return blk;
341 }
342 
343 
344 static int add_old_style_options(const char *fmt, QemuOpts *opts,
345                                  const char *base_filename,
346                                  const char *base_fmt)
347 {
348     Error *err = NULL;
349 
350     if (base_filename) {
351         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
352         if (err) {
353             error_report("Backing file not supported for file format '%s'",
354                          fmt);
355             error_free(err);
356             return -1;
357         }
358     }
359     if (base_fmt) {
360         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
361         if (err) {
362             error_report("Backing file format not supported for file "
363                          "format '%s'", fmt);
364             error_free(err);
365             return -1;
366         }
367     }
368     return 0;
369 }
370 
371 static int img_create(int argc, char **argv)
372 {
373     int c;
374     uint64_t img_size = -1;
375     const char *fmt = "raw";
376     const char *base_fmt = NULL;
377     const char *filename;
378     const char *base_filename = NULL;
379     char *options = NULL;
380     Error *local_err = NULL;
381     bool quiet = false;
382 
383     for(;;) {
384         static const struct option long_options[] = {
385             {"help", no_argument, 0, 'h'},
386             {"object", required_argument, 0, OPTION_OBJECT},
387             {0, 0, 0, 0}
388         };
389         c = getopt_long(argc, argv, "F:b:f:he6o:q",
390                         long_options, NULL);
391         if (c == -1) {
392             break;
393         }
394         switch(c) {
395         case '?':
396         case 'h':
397             help();
398             break;
399         case 'F':
400             base_fmt = optarg;
401             break;
402         case 'b':
403             base_filename = optarg;
404             break;
405         case 'f':
406             fmt = optarg;
407             break;
408         case 'e':
409             error_report("option -e is deprecated, please use \'-o "
410                   "encryption\' instead!");
411             goto fail;
412         case '6':
413             error_report("option -6 is deprecated, please use \'-o "
414                   "compat6\' instead!");
415             goto fail;
416         case 'o':
417             if (!is_valid_option_list(optarg)) {
418                 error_report("Invalid option list: %s", optarg);
419                 goto fail;
420             }
421             if (!options) {
422                 options = g_strdup(optarg);
423             } else {
424                 char *old_options = options;
425                 options = g_strdup_printf("%s,%s", options, optarg);
426                 g_free(old_options);
427             }
428             break;
429         case 'q':
430             quiet = true;
431             break;
432         case OPTION_OBJECT: {
433             QemuOpts *opts;
434             opts = qemu_opts_parse_noisily(&qemu_object_opts,
435                                            optarg, true);
436             if (!opts) {
437                 goto fail;
438             }
439         }   break;
440         }
441     }
442 
443     /* Get the filename */
444     filename = (optind < argc) ? argv[optind] : NULL;
445     if (options && has_help_option(options)) {
446         g_free(options);
447         return print_block_option_help(filename, fmt);
448     }
449 
450     if (optind >= argc) {
451         error_exit("Expecting image file name");
452     }
453     optind++;
454 
455     if (qemu_opts_foreach(&qemu_object_opts,
456                           user_creatable_add_opts_foreach,
457                           NULL, NULL)) {
458         goto fail;
459     }
460 
461     /* Get image size, if specified */
462     if (optind < argc) {
463         int64_t sval;
464         char *end;
465         sval = qemu_strtosz_suffix(argv[optind++], &end,
466                                    QEMU_STRTOSZ_DEFSUFFIX_B);
467         if (sval < 0 || *end) {
468             if (sval == -ERANGE) {
469                 error_report("Image size must be less than 8 EiB!");
470             } else {
471                 error_report("Invalid image size specified! You may use k, M, "
472                       "G, T, P or E suffixes for ");
473                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
474                              "petabytes and exabytes.");
475             }
476             goto fail;
477         }
478         img_size = (uint64_t)sval;
479     }
480     if (optind != argc) {
481         error_exit("Unexpected argument: %s", argv[optind]);
482     }
483 
484     bdrv_img_create(filename, fmt, base_filename, base_fmt,
485                     options, img_size, 0, &local_err, quiet);
486     if (local_err) {
487         error_reportf_err(local_err, "%s: ", filename);
488         goto fail;
489     }
490 
491     g_free(options);
492     return 0;
493 
494 fail:
495     g_free(options);
496     return 1;
497 }
498 
499 static void dump_json_image_check(ImageCheck *check, bool quiet)
500 {
501     QString *str;
502     QObject *obj;
503     Visitor *v = qobject_output_visitor_new(&obj);
504 
505     visit_type_ImageCheck(v, NULL, &check, &error_abort);
506     visit_complete(v, &obj);
507     str = qobject_to_json_pretty(obj);
508     assert(str != NULL);
509     qprintf(quiet, "%s\n", qstring_get_str(str));
510     qobject_decref(obj);
511     visit_free(v);
512     QDECREF(str);
513 }
514 
515 static void dump_human_image_check(ImageCheck *check, bool quiet)
516 {
517     if (!(check->corruptions || check->leaks || check->check_errors)) {
518         qprintf(quiet, "No errors were found on the image.\n");
519     } else {
520         if (check->corruptions) {
521             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
522                     "Data may be corrupted, or further writes to the image "
523                     "may corrupt it.\n",
524                     check->corruptions);
525         }
526 
527         if (check->leaks) {
528             qprintf(quiet,
529                     "\n%" PRId64 " leaked clusters were found on the image.\n"
530                     "This means waste of disk space, but no harm to data.\n",
531                     check->leaks);
532         }
533 
534         if (check->check_errors) {
535             qprintf(quiet,
536                     "\n%" PRId64
537                     " internal errors have occurred during the check.\n",
538                     check->check_errors);
539         }
540     }
541 
542     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
543         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
544                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
545                 check->allocated_clusters, check->total_clusters,
546                 check->allocated_clusters * 100.0 / check->total_clusters,
547                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
548                 check->compressed_clusters * 100.0 /
549                 check->allocated_clusters);
550     }
551 
552     if (check->image_end_offset) {
553         qprintf(quiet,
554                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
555     }
556 }
557 
558 static int collect_image_check(BlockDriverState *bs,
559                    ImageCheck *check,
560                    const char *filename,
561                    const char *fmt,
562                    int fix)
563 {
564     int ret;
565     BdrvCheckResult result;
566 
567     ret = bdrv_check(bs, &result, fix);
568     if (ret < 0) {
569         return ret;
570     }
571 
572     check->filename                 = g_strdup(filename);
573     check->format                   = g_strdup(bdrv_get_format_name(bs));
574     check->check_errors             = result.check_errors;
575     check->corruptions              = result.corruptions;
576     check->has_corruptions          = result.corruptions != 0;
577     check->leaks                    = result.leaks;
578     check->has_leaks                = result.leaks != 0;
579     check->corruptions_fixed        = result.corruptions_fixed;
580     check->has_corruptions_fixed    = result.corruptions != 0;
581     check->leaks_fixed              = result.leaks_fixed;
582     check->has_leaks_fixed          = result.leaks != 0;
583     check->image_end_offset         = result.image_end_offset;
584     check->has_image_end_offset     = result.image_end_offset != 0;
585     check->total_clusters           = result.bfi.total_clusters;
586     check->has_total_clusters       = result.bfi.total_clusters != 0;
587     check->allocated_clusters       = result.bfi.allocated_clusters;
588     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
589     check->fragmented_clusters      = result.bfi.fragmented_clusters;
590     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
591     check->compressed_clusters      = result.bfi.compressed_clusters;
592     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
593 
594     return 0;
595 }
596 
597 /*
598  * Checks an image for consistency. Exit codes:
599  *
600  *  0 - Check completed, image is good
601  *  1 - Check not completed because of internal errors
602  *  2 - Check completed, image is corrupted
603  *  3 - Check completed, image has leaked clusters, but is good otherwise
604  * 63 - Checks are not supported by the image format
605  */
606 static int img_check(int argc, char **argv)
607 {
608     int c, ret;
609     OutputFormat output_format = OFORMAT_HUMAN;
610     const char *filename, *fmt, *output, *cache;
611     BlockBackend *blk;
612     BlockDriverState *bs;
613     int fix = 0;
614     int flags = BDRV_O_CHECK;
615     bool writethrough;
616     ImageCheck *check;
617     bool quiet = false;
618     bool image_opts = false;
619 
620     fmt = NULL;
621     output = NULL;
622     cache = BDRV_DEFAULT_CACHE;
623 
624     for(;;) {
625         int option_index = 0;
626         static const struct option long_options[] = {
627             {"help", no_argument, 0, 'h'},
628             {"format", required_argument, 0, 'f'},
629             {"repair", required_argument, 0, 'r'},
630             {"output", required_argument, 0, OPTION_OUTPUT},
631             {"object", required_argument, 0, OPTION_OBJECT},
632             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
633             {0, 0, 0, 0}
634         };
635         c = getopt_long(argc, argv, "hf:r:T:q",
636                         long_options, &option_index);
637         if (c == -1) {
638             break;
639         }
640         switch(c) {
641         case '?':
642         case 'h':
643             help();
644             break;
645         case 'f':
646             fmt = optarg;
647             break;
648         case 'r':
649             flags |= BDRV_O_RDWR;
650 
651             if (!strcmp(optarg, "leaks")) {
652                 fix = BDRV_FIX_LEAKS;
653             } else if (!strcmp(optarg, "all")) {
654                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
655             } else {
656                 error_exit("Unknown option value for -r "
657                            "(expecting 'leaks' or 'all'): %s", optarg);
658             }
659             break;
660         case OPTION_OUTPUT:
661             output = optarg;
662             break;
663         case 'T':
664             cache = optarg;
665             break;
666         case 'q':
667             quiet = true;
668             break;
669         case OPTION_OBJECT: {
670             QemuOpts *opts;
671             opts = qemu_opts_parse_noisily(&qemu_object_opts,
672                                            optarg, true);
673             if (!opts) {
674                 return 1;
675             }
676         }   break;
677         case OPTION_IMAGE_OPTS:
678             image_opts = true;
679             break;
680         }
681     }
682     if (optind != argc - 1) {
683         error_exit("Expecting one image file name");
684     }
685     filename = argv[optind++];
686 
687     if (output && !strcmp(output, "json")) {
688         output_format = OFORMAT_JSON;
689     } else if (output && !strcmp(output, "human")) {
690         output_format = OFORMAT_HUMAN;
691     } else if (output) {
692         error_report("--output must be used with human or json as argument.");
693         return 1;
694     }
695 
696     if (qemu_opts_foreach(&qemu_object_opts,
697                           user_creatable_add_opts_foreach,
698                           NULL, NULL)) {
699         return 1;
700     }
701 
702     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
703     if (ret < 0) {
704         error_report("Invalid source cache option: %s", cache);
705         return 1;
706     }
707 
708     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
709     if (!blk) {
710         return 1;
711     }
712     bs = blk_bs(blk);
713 
714     check = g_new0(ImageCheck, 1);
715     ret = collect_image_check(bs, check, filename, fmt, fix);
716 
717     if (ret == -ENOTSUP) {
718         error_report("This image format does not support checks");
719         ret = 63;
720         goto fail;
721     }
722 
723     if (check->corruptions_fixed || check->leaks_fixed) {
724         int corruptions_fixed, leaks_fixed;
725 
726         leaks_fixed         = check->leaks_fixed;
727         corruptions_fixed   = check->corruptions_fixed;
728 
729         if (output_format == OFORMAT_HUMAN) {
730             qprintf(quiet,
731                     "The following inconsistencies were found and repaired:\n\n"
732                     "    %" PRId64 " leaked clusters\n"
733                     "    %" PRId64 " corruptions\n\n"
734                     "Double checking the fixed image now...\n",
735                     check->leaks_fixed,
736                     check->corruptions_fixed);
737         }
738 
739         ret = collect_image_check(bs, check, filename, fmt, 0);
740 
741         check->leaks_fixed          = leaks_fixed;
742         check->corruptions_fixed    = corruptions_fixed;
743     }
744 
745     if (!ret) {
746         switch (output_format) {
747         case OFORMAT_HUMAN:
748             dump_human_image_check(check, quiet);
749             break;
750         case OFORMAT_JSON:
751             dump_json_image_check(check, quiet);
752             break;
753         }
754     }
755 
756     if (ret || check->check_errors) {
757         if (ret) {
758             error_report("Check failed: %s", strerror(-ret));
759         } else {
760             error_report("Check failed");
761         }
762         ret = 1;
763         goto fail;
764     }
765 
766     if (check->corruptions) {
767         ret = 2;
768     } else if (check->leaks) {
769         ret = 3;
770     } else {
771         ret = 0;
772     }
773 
774 fail:
775     qapi_free_ImageCheck(check);
776     blk_unref(blk);
777     return ret;
778 }
779 
780 typedef struct CommonBlockJobCBInfo {
781     BlockDriverState *bs;
782     Error **errp;
783 } CommonBlockJobCBInfo;
784 
785 static void common_block_job_cb(void *opaque, int ret)
786 {
787     CommonBlockJobCBInfo *cbi = opaque;
788 
789     if (ret < 0) {
790         error_setg_errno(cbi->errp, -ret, "Block job failed");
791     }
792 }
793 
794 static void run_block_job(BlockJob *job, Error **errp)
795 {
796     AioContext *aio_context = blk_get_aio_context(job->blk);
797 
798     aio_context_acquire(aio_context);
799     do {
800         aio_poll(aio_context, true);
801         qemu_progress_print(job->len ?
802                             ((float)job->offset / job->len * 100.f) : 0.0f, 0);
803     } while (!job->ready);
804 
805     block_job_complete_sync(job, errp);
806     aio_context_release(aio_context);
807 
808     /* A block job may finish instantaneously without publishing any progress,
809      * so just signal completion here */
810     qemu_progress_print(100.f, 0);
811 }
812 
813 static int img_commit(int argc, char **argv)
814 {
815     int c, ret, flags;
816     const char *filename, *fmt, *cache, *base;
817     BlockBackend *blk;
818     BlockDriverState *bs, *base_bs;
819     bool progress = false, quiet = false, drop = false;
820     bool writethrough;
821     Error *local_err = NULL;
822     CommonBlockJobCBInfo cbi;
823     bool image_opts = false;
824     AioContext *aio_context;
825 
826     fmt = NULL;
827     cache = BDRV_DEFAULT_CACHE;
828     base = NULL;
829     for(;;) {
830         static const struct option long_options[] = {
831             {"help", no_argument, 0, 'h'},
832             {"object", required_argument, 0, OPTION_OBJECT},
833             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
834             {0, 0, 0, 0}
835         };
836         c = getopt_long(argc, argv, "f:ht:b:dpq",
837                         long_options, NULL);
838         if (c == -1) {
839             break;
840         }
841         switch(c) {
842         case '?':
843         case 'h':
844             help();
845             break;
846         case 'f':
847             fmt = optarg;
848             break;
849         case 't':
850             cache = optarg;
851             break;
852         case 'b':
853             base = optarg;
854             /* -b implies -d */
855             drop = true;
856             break;
857         case 'd':
858             drop = true;
859             break;
860         case 'p':
861             progress = true;
862             break;
863         case 'q':
864             quiet = true;
865             break;
866         case OPTION_OBJECT: {
867             QemuOpts *opts;
868             opts = qemu_opts_parse_noisily(&qemu_object_opts,
869                                            optarg, true);
870             if (!opts) {
871                 return 1;
872             }
873         }   break;
874         case OPTION_IMAGE_OPTS:
875             image_opts = true;
876             break;
877         }
878     }
879 
880     /* Progress is not shown in Quiet mode */
881     if (quiet) {
882         progress = false;
883     }
884 
885     if (optind != argc - 1) {
886         error_exit("Expecting one image file name");
887     }
888     filename = argv[optind++];
889 
890     if (qemu_opts_foreach(&qemu_object_opts,
891                           user_creatable_add_opts_foreach,
892                           NULL, NULL)) {
893         return 1;
894     }
895 
896     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
897     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
898     if (ret < 0) {
899         error_report("Invalid cache option: %s", cache);
900         return 1;
901     }
902 
903     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
904     if (!blk) {
905         return 1;
906     }
907     bs = blk_bs(blk);
908 
909     qemu_progress_init(progress, 1.f);
910     qemu_progress_print(0.f, 100);
911 
912     if (base) {
913         base_bs = bdrv_find_backing_image(bs, base);
914         if (!base_bs) {
915             error_setg(&local_err,
916                        "Did not find '%s' in the backing chain of '%s'",
917                        base, filename);
918             goto done;
919         }
920     } else {
921         /* This is different from QMP, which by default uses the deepest file in
922          * the backing chain (i.e., the very base); however, the traditional
923          * behavior of qemu-img commit is using the immediate backing file. */
924         base_bs = backing_bs(bs);
925         if (!base_bs) {
926             error_setg(&local_err, "Image does not have a backing file");
927             goto done;
928         }
929     }
930 
931     cbi = (CommonBlockJobCBInfo){
932         .errp = &local_err,
933         .bs   = bs,
934     };
935 
936     aio_context = bdrv_get_aio_context(bs);
937     aio_context_acquire(aio_context);
938     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
939                         BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
940                         &local_err, false);
941     aio_context_release(aio_context);
942     if (local_err) {
943         goto done;
944     }
945 
946     /* When the block job completes, the BlockBackend reference will point to
947      * the old backing file. In order to avoid that the top image is already
948      * deleted, so we can still empty it afterwards, increment the reference
949      * counter here preemptively. */
950     if (!drop) {
951         bdrv_ref(bs);
952     }
953 
954     run_block_job(bs->job, &local_err);
955     if (local_err) {
956         goto unref_backing;
957     }
958 
959     if (!drop && bs->drv->bdrv_make_empty) {
960         ret = bs->drv->bdrv_make_empty(bs);
961         if (ret) {
962             error_setg_errno(&local_err, -ret, "Could not empty %s",
963                              filename);
964             goto unref_backing;
965         }
966     }
967 
968 unref_backing:
969     if (!drop) {
970         bdrv_unref(bs);
971     }
972 
973 done:
974     qemu_progress_end();
975 
976     blk_unref(blk);
977 
978     if (local_err) {
979         error_report_err(local_err);
980         return 1;
981     }
982 
983     qprintf(quiet, "Image committed.\n");
984     return 0;
985 }
986 
987 /*
988  * Returns true iff the first sector pointed to by 'buf' contains at least
989  * a non-NUL byte.
990  *
991  * 'pnum' is set to the number of sectors (including and immediately following
992  * the first one) that are known to be in the same allocated/unallocated state.
993  */
994 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
995 {
996     bool is_zero;
997     int i;
998 
999     if (n <= 0) {
1000         *pnum = 0;
1001         return 0;
1002     }
1003     is_zero = buffer_is_zero(buf, 512);
1004     for(i = 1; i < n; i++) {
1005         buf += 512;
1006         if (is_zero != buffer_is_zero(buf, 512)) {
1007             break;
1008         }
1009     }
1010     *pnum = i;
1011     return !is_zero;
1012 }
1013 
1014 /*
1015  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1016  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1017  * breaking up write requests for only small sparse areas.
1018  */
1019 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1020     int min)
1021 {
1022     int ret;
1023     int num_checked, num_used;
1024 
1025     if (n < min) {
1026         min = n;
1027     }
1028 
1029     ret = is_allocated_sectors(buf, n, pnum);
1030     if (!ret) {
1031         return ret;
1032     }
1033 
1034     num_used = *pnum;
1035     buf += BDRV_SECTOR_SIZE * *pnum;
1036     n -= *pnum;
1037     num_checked = num_used;
1038 
1039     while (n > 0) {
1040         ret = is_allocated_sectors(buf, n, pnum);
1041 
1042         buf += BDRV_SECTOR_SIZE * *pnum;
1043         n -= *pnum;
1044         num_checked += *pnum;
1045         if (ret) {
1046             num_used = num_checked;
1047         } else if (*pnum >= min) {
1048             break;
1049         }
1050     }
1051 
1052     *pnum = num_used;
1053     return 1;
1054 }
1055 
1056 /*
1057  * Compares two buffers sector by sector. Returns 0 if the first sector of both
1058  * buffers matches, non-zero otherwise.
1059  *
1060  * pnum is set to the number of sectors (including and immediately following
1061  * the first one) that are known to have the same comparison result
1062  */
1063 static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
1064     int *pnum)
1065 {
1066     bool res;
1067     int i;
1068 
1069     if (n <= 0) {
1070         *pnum = 0;
1071         return 0;
1072     }
1073 
1074     res = !!memcmp(buf1, buf2, 512);
1075     for(i = 1; i < n; i++) {
1076         buf1 += 512;
1077         buf2 += 512;
1078 
1079         if (!!memcmp(buf1, buf2, 512) != res) {
1080             break;
1081         }
1082     }
1083 
1084     *pnum = i;
1085     return res;
1086 }
1087 
1088 #define IO_BUF_SIZE (2 * 1024 * 1024)
1089 
1090 static int64_t sectors_to_bytes(int64_t sectors)
1091 {
1092     return sectors << BDRV_SECTOR_BITS;
1093 }
1094 
1095 static int64_t sectors_to_process(int64_t total, int64_t from)
1096 {
1097     return MIN(total - from, IO_BUF_SIZE >> BDRV_SECTOR_BITS);
1098 }
1099 
1100 /*
1101  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1102  *
1103  * Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero
1104  * data and negative value on error.
1105  *
1106  * @param blk:  BlockBackend for the image
1107  * @param sect_num: Number of first sector to check
1108  * @param sect_count: Number of sectors to check
1109  * @param filename: Name of disk file we are checking (logging purpose)
1110  * @param buffer: Allocated buffer for storing read data
1111  * @param quiet: Flag for quiet mode
1112  */
1113 static int check_empty_sectors(BlockBackend *blk, int64_t sect_num,
1114                                int sect_count, const char *filename,
1115                                uint8_t *buffer, bool quiet)
1116 {
1117     int pnum, ret = 0;
1118     ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
1119                     sect_count << BDRV_SECTOR_BITS);
1120     if (ret < 0) {
1121         error_report("Error while reading offset %" PRId64 " of %s: %s",
1122                      sectors_to_bytes(sect_num), filename, strerror(-ret));
1123         return ret;
1124     }
1125     ret = is_allocated_sectors(buffer, sect_count, &pnum);
1126     if (ret || pnum != sect_count) {
1127         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1128                 sectors_to_bytes(ret ? sect_num : sect_num + pnum));
1129         return 1;
1130     }
1131 
1132     return 0;
1133 }
1134 
1135 /*
1136  * Compares two images. Exit codes:
1137  *
1138  * 0 - Images are identical
1139  * 1 - Images differ
1140  * >1 - Error occurred
1141  */
1142 static int img_compare(int argc, char **argv)
1143 {
1144     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1145     BlockBackend *blk1, *blk2;
1146     BlockDriverState *bs1, *bs2;
1147     int64_t total_sectors1, total_sectors2;
1148     uint8_t *buf1 = NULL, *buf2 = NULL;
1149     int pnum1, pnum2;
1150     int allocated1, allocated2;
1151     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1152     bool progress = false, quiet = false, strict = false;
1153     int flags;
1154     bool writethrough;
1155     int64_t total_sectors;
1156     int64_t sector_num = 0;
1157     int64_t nb_sectors;
1158     int c, pnum;
1159     uint64_t progress_base;
1160     bool image_opts = false;
1161 
1162     cache = BDRV_DEFAULT_CACHE;
1163     for (;;) {
1164         static const struct option long_options[] = {
1165             {"help", no_argument, 0, 'h'},
1166             {"object", required_argument, 0, OPTION_OBJECT},
1167             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1168             {0, 0, 0, 0}
1169         };
1170         c = getopt_long(argc, argv, "hf:F:T:pqs",
1171                         long_options, NULL);
1172         if (c == -1) {
1173             break;
1174         }
1175         switch (c) {
1176         case '?':
1177         case 'h':
1178             help();
1179             break;
1180         case 'f':
1181             fmt1 = optarg;
1182             break;
1183         case 'F':
1184             fmt2 = optarg;
1185             break;
1186         case 'T':
1187             cache = optarg;
1188             break;
1189         case 'p':
1190             progress = true;
1191             break;
1192         case 'q':
1193             quiet = true;
1194             break;
1195         case 's':
1196             strict = true;
1197             break;
1198         case OPTION_OBJECT: {
1199             QemuOpts *opts;
1200             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1201                                            optarg, true);
1202             if (!opts) {
1203                 ret = 2;
1204                 goto out4;
1205             }
1206         }   break;
1207         case OPTION_IMAGE_OPTS:
1208             image_opts = true;
1209             break;
1210         }
1211     }
1212 
1213     /* Progress is not shown in Quiet mode */
1214     if (quiet) {
1215         progress = false;
1216     }
1217 
1218 
1219     if (optind != argc - 2) {
1220         error_exit("Expecting two image file names");
1221     }
1222     filename1 = argv[optind++];
1223     filename2 = argv[optind++];
1224 
1225     if (qemu_opts_foreach(&qemu_object_opts,
1226                           user_creatable_add_opts_foreach,
1227                           NULL, NULL)) {
1228         ret = 2;
1229         goto out4;
1230     }
1231 
1232     /* Initialize before goto out */
1233     qemu_progress_init(progress, 2.0);
1234 
1235     flags = 0;
1236     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1237     if (ret < 0) {
1238         error_report("Invalid source cache option: %s", cache);
1239         ret = 2;
1240         goto out3;
1241     }
1242 
1243     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet);
1244     if (!blk1) {
1245         ret = 2;
1246         goto out3;
1247     }
1248 
1249     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet);
1250     if (!blk2) {
1251         ret = 2;
1252         goto out2;
1253     }
1254     bs1 = blk_bs(blk1);
1255     bs2 = blk_bs(blk2);
1256 
1257     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1258     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1259     total_sectors1 = blk_nb_sectors(blk1);
1260     if (total_sectors1 < 0) {
1261         error_report("Can't get size of %s: %s",
1262                      filename1, strerror(-total_sectors1));
1263         ret = 4;
1264         goto out;
1265     }
1266     total_sectors2 = blk_nb_sectors(blk2);
1267     if (total_sectors2 < 0) {
1268         error_report("Can't get size of %s: %s",
1269                      filename2, strerror(-total_sectors2));
1270         ret = 4;
1271         goto out;
1272     }
1273     total_sectors = MIN(total_sectors1, total_sectors2);
1274     progress_base = MAX(total_sectors1, total_sectors2);
1275 
1276     qemu_progress_print(0, 100);
1277 
1278     if (strict && total_sectors1 != total_sectors2) {
1279         ret = 1;
1280         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1281         goto out;
1282     }
1283 
1284     for (;;) {
1285         int64_t status1, status2;
1286         BlockDriverState *file;
1287 
1288         nb_sectors = sectors_to_process(total_sectors, sector_num);
1289         if (nb_sectors <= 0) {
1290             break;
1291         }
1292         status1 = bdrv_get_block_status_above(bs1, NULL, sector_num,
1293                                               total_sectors1 - sector_num,
1294                                               &pnum1, &file);
1295         if (status1 < 0) {
1296             ret = 3;
1297             error_report("Sector allocation test failed for %s", filename1);
1298             goto out;
1299         }
1300         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1301 
1302         status2 = bdrv_get_block_status_above(bs2, NULL, sector_num,
1303                                               total_sectors2 - sector_num,
1304                                               &pnum2, &file);
1305         if (status2 < 0) {
1306             ret = 3;
1307             error_report("Sector allocation test failed for %s", filename2);
1308             goto out;
1309         }
1310         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1311         if (pnum1) {
1312             nb_sectors = MIN(nb_sectors, pnum1);
1313         }
1314         if (pnum2) {
1315             nb_sectors = MIN(nb_sectors, pnum2);
1316         }
1317 
1318         if (strict) {
1319             if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) !=
1320                 (status2 & ~BDRV_BLOCK_OFFSET_MASK)) {
1321                 ret = 1;
1322                 qprintf(quiet, "Strict mode: Offset %" PRId64
1323                         " block status mismatch!\n",
1324                         sectors_to_bytes(sector_num));
1325                 goto out;
1326             }
1327         }
1328         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1329             nb_sectors = MIN(pnum1, pnum2);
1330         } else if (allocated1 == allocated2) {
1331             if (allocated1) {
1332                 ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
1333                                 nb_sectors << BDRV_SECTOR_BITS);
1334                 if (ret < 0) {
1335                     error_report("Error while reading offset %" PRId64 " of %s:"
1336                                  " %s", sectors_to_bytes(sector_num), filename1,
1337                                  strerror(-ret));
1338                     ret = 4;
1339                     goto out;
1340                 }
1341                 ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
1342                                 nb_sectors << BDRV_SECTOR_BITS);
1343                 if (ret < 0) {
1344                     error_report("Error while reading offset %" PRId64
1345                                  " of %s: %s", sectors_to_bytes(sector_num),
1346                                  filename2, strerror(-ret));
1347                     ret = 4;
1348                     goto out;
1349                 }
1350                 ret = compare_sectors(buf1, buf2, nb_sectors, &pnum);
1351                 if (ret || pnum != nb_sectors) {
1352                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1353                             sectors_to_bytes(
1354                                 ret ? sector_num : sector_num + pnum));
1355                     ret = 1;
1356                     goto out;
1357                 }
1358             }
1359         } else {
1360 
1361             if (allocated1) {
1362                 ret = check_empty_sectors(blk1, sector_num, nb_sectors,
1363                                           filename1, buf1, quiet);
1364             } else {
1365                 ret = check_empty_sectors(blk2, sector_num, nb_sectors,
1366                                           filename2, buf1, quiet);
1367             }
1368             if (ret) {
1369                 if (ret < 0) {
1370                     error_report("Error while reading offset %" PRId64 ": %s",
1371                                  sectors_to_bytes(sector_num), strerror(-ret));
1372                     ret = 4;
1373                 }
1374                 goto out;
1375             }
1376         }
1377         sector_num += nb_sectors;
1378         qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1379     }
1380 
1381     if (total_sectors1 != total_sectors2) {
1382         BlockBackend *blk_over;
1383         int64_t total_sectors_over;
1384         const char *filename_over;
1385 
1386         qprintf(quiet, "Warning: Image size mismatch!\n");
1387         if (total_sectors1 > total_sectors2) {
1388             total_sectors_over = total_sectors1;
1389             blk_over = blk1;
1390             filename_over = filename1;
1391         } else {
1392             total_sectors_over = total_sectors2;
1393             blk_over = blk2;
1394             filename_over = filename2;
1395         }
1396 
1397         for (;;) {
1398             nb_sectors = sectors_to_process(total_sectors_over, sector_num);
1399             if (nb_sectors <= 0) {
1400                 break;
1401             }
1402             ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
1403                                           nb_sectors, &pnum);
1404             if (ret < 0) {
1405                 ret = 3;
1406                 error_report("Sector allocation test failed for %s",
1407                              filename_over);
1408                 goto out;
1409 
1410             }
1411             nb_sectors = pnum;
1412             if (ret) {
1413                 ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
1414                                           filename_over, buf1, quiet);
1415                 if (ret) {
1416                     if (ret < 0) {
1417                         error_report("Error while reading offset %" PRId64
1418                                      " of %s: %s", sectors_to_bytes(sector_num),
1419                                      filename_over, strerror(-ret));
1420                         ret = 4;
1421                     }
1422                     goto out;
1423                 }
1424             }
1425             sector_num += nb_sectors;
1426             qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1427         }
1428     }
1429 
1430     qprintf(quiet, "Images are identical.\n");
1431     ret = 0;
1432 
1433 out:
1434     qemu_vfree(buf1);
1435     qemu_vfree(buf2);
1436     blk_unref(blk2);
1437 out2:
1438     blk_unref(blk1);
1439 out3:
1440     qemu_progress_end();
1441 out4:
1442     return ret;
1443 }
1444 
1445 enum ImgConvertBlockStatus {
1446     BLK_DATA,
1447     BLK_ZERO,
1448     BLK_BACKING_FILE,
1449 };
1450 
1451 typedef struct ImgConvertState {
1452     BlockBackend **src;
1453     int64_t *src_sectors;
1454     int src_cur, src_num;
1455     int64_t src_cur_offset;
1456     int64_t total_sectors;
1457     int64_t allocated_sectors;
1458     enum ImgConvertBlockStatus status;
1459     int64_t sector_next_status;
1460     BlockBackend *target;
1461     bool has_zero_init;
1462     bool compressed;
1463     bool target_has_backing;
1464     int min_sparse;
1465     size_t cluster_sectors;
1466     size_t buf_sectors;
1467 } ImgConvertState;
1468 
1469 static void convert_select_part(ImgConvertState *s, int64_t sector_num)
1470 {
1471     assert(sector_num >= s->src_cur_offset);
1472     while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
1473         s->src_cur_offset += s->src_sectors[s->src_cur];
1474         s->src_cur++;
1475         assert(s->src_cur < s->src_num);
1476     }
1477 }
1478 
1479 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1480 {
1481     int64_t ret;
1482     int n;
1483 
1484     convert_select_part(s, sector_num);
1485 
1486     assert(s->total_sectors > sector_num);
1487     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1488 
1489     if (s->sector_next_status <= sector_num) {
1490         BlockDriverState *file;
1491         ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
1492                                     sector_num - s->src_cur_offset,
1493                                     n, &n, &file);
1494         if (ret < 0) {
1495             return ret;
1496         }
1497 
1498         if (ret & BDRV_BLOCK_ZERO) {
1499             s->status = BLK_ZERO;
1500         } else if (ret & BDRV_BLOCK_DATA) {
1501             s->status = BLK_DATA;
1502         } else if (!s->target_has_backing) {
1503             /* Without a target backing file we must copy over the contents of
1504              * the backing file as well. */
1505             /* Check block status of the backing file chain to avoid
1506              * needlessly reading zeroes and limiting the iteration to the
1507              * buffer size */
1508             ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
1509                                               sector_num - s->src_cur_offset,
1510                                               n, &n, &file);
1511             if (ret < 0) {
1512                 return ret;
1513             }
1514 
1515             if (ret & BDRV_BLOCK_ZERO) {
1516                 s->status = BLK_ZERO;
1517             } else {
1518                 s->status = BLK_DATA;
1519             }
1520         } else {
1521             s->status = BLK_BACKING_FILE;
1522         }
1523 
1524         s->sector_next_status = sector_num + n;
1525     }
1526 
1527     n = MIN(n, s->sector_next_status - sector_num);
1528     if (s->status == BLK_DATA) {
1529         n = MIN(n, s->buf_sectors);
1530     }
1531 
1532     /* We need to write complete clusters for compressed images, so if an
1533      * unallocated area is shorter than that, we must consider the whole
1534      * cluster allocated. */
1535     if (s->compressed) {
1536         if (n < s->cluster_sectors) {
1537             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1538             s->status = BLK_DATA;
1539         } else {
1540             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1541         }
1542     }
1543 
1544     return n;
1545 }
1546 
1547 static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
1548                         uint8_t *buf)
1549 {
1550     int n;
1551     int ret;
1552 
1553     assert(nb_sectors <= s->buf_sectors);
1554     while (nb_sectors > 0) {
1555         BlockBackend *blk;
1556         int64_t bs_sectors;
1557 
1558         /* In the case of compression with multiple source files, we can get a
1559          * nb_sectors that spreads into the next part. So we must be able to
1560          * read across multiple BDSes for one convert_read() call. */
1561         convert_select_part(s, sector_num);
1562         blk = s->src[s->src_cur];
1563         bs_sectors = s->src_sectors[s->src_cur];
1564 
1565         n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
1566         ret = blk_pread(blk,
1567                         (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
1568                         buf, n << BDRV_SECTOR_BITS);
1569         if (ret < 0) {
1570             return ret;
1571         }
1572 
1573         sector_num += n;
1574         nb_sectors -= n;
1575         buf += n * BDRV_SECTOR_SIZE;
1576     }
1577 
1578     return 0;
1579 }
1580 
1581 static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
1582                          const uint8_t *buf)
1583 {
1584     int ret;
1585 
1586     while (nb_sectors > 0) {
1587         int n = nb_sectors;
1588 
1589         switch (s->status) {
1590         case BLK_BACKING_FILE:
1591             /* If we have a backing file, leave clusters unallocated that are
1592              * unallocated in the source image, so that the backing file is
1593              * visible at the respective offset. */
1594             assert(s->target_has_backing);
1595             break;
1596 
1597         case BLK_DATA:
1598             /* We must always write compressed clusters as a whole, so don't
1599              * try to find zeroed parts in the buffer. We can only save the
1600              * write if the buffer is completely zeroed and we're allowed to
1601              * keep the target sparse. */
1602             if (s->compressed) {
1603                 if (s->has_zero_init && s->min_sparse &&
1604                     buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
1605                 {
1606                     assert(!s->target_has_backing);
1607                     break;
1608                 }
1609 
1610                 ret = blk_pwrite_compressed(s->target,
1611                                             sector_num << BDRV_SECTOR_BITS,
1612                                             buf, n << BDRV_SECTOR_BITS);
1613                 if (ret < 0) {
1614                     return ret;
1615                 }
1616                 break;
1617             }
1618 
1619             /* If there is real non-zero data or we're told to keep the target
1620              * fully allocated (-S 0), we must write it. Otherwise we can treat
1621              * it as zero sectors. */
1622             if (!s->min_sparse ||
1623                 is_allocated_sectors_min(buf, n, &n, s->min_sparse))
1624             {
1625                 ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1626                                  buf, n << BDRV_SECTOR_BITS, 0);
1627                 if (ret < 0) {
1628                     return ret;
1629                 }
1630                 break;
1631             }
1632             /* fall-through */
1633 
1634         case BLK_ZERO:
1635             if (s->has_zero_init) {
1636                 break;
1637             }
1638             ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
1639                                     n << BDRV_SECTOR_BITS, 0);
1640             if (ret < 0) {
1641                 return ret;
1642             }
1643             break;
1644         }
1645 
1646         sector_num += n;
1647         nb_sectors -= n;
1648         buf += n * BDRV_SECTOR_SIZE;
1649     }
1650 
1651     return 0;
1652 }
1653 
1654 static int convert_do_copy(ImgConvertState *s)
1655 {
1656     uint8_t *buf = NULL;
1657     int64_t sector_num, allocated_done;
1658     int ret;
1659     int n;
1660 
1661     /* Check whether we have zero initialisation or can get it efficiently */
1662     s->has_zero_init = s->min_sparse && !s->target_has_backing
1663                      ? bdrv_has_zero_init(blk_bs(s->target))
1664                      : false;
1665 
1666     if (!s->has_zero_init && !s->target_has_backing &&
1667         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1668     {
1669         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
1670         if (ret == 0) {
1671             s->has_zero_init = true;
1672         }
1673     }
1674 
1675     /* Allocate buffer for copied data. For compressed images, only one cluster
1676      * can be copied at a time. */
1677     if (s->compressed) {
1678         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
1679             error_report("invalid cluster size");
1680             ret = -EINVAL;
1681             goto fail;
1682         }
1683         s->buf_sectors = s->cluster_sectors;
1684     }
1685     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1686 
1687     /* Calculate allocated sectors for progress */
1688     s->allocated_sectors = 0;
1689     sector_num = 0;
1690     while (sector_num < s->total_sectors) {
1691         n = convert_iteration_sectors(s, sector_num);
1692         if (n < 0) {
1693             ret = n;
1694             goto fail;
1695         }
1696         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1697         {
1698             s->allocated_sectors += n;
1699         }
1700         sector_num += n;
1701     }
1702 
1703     /* Do the copy */
1704     s->src_cur = 0;
1705     s->src_cur_offset = 0;
1706     s->sector_next_status = 0;
1707 
1708     sector_num = 0;
1709     allocated_done = 0;
1710 
1711     while (sector_num < s->total_sectors) {
1712         n = convert_iteration_sectors(s, sector_num);
1713         if (n < 0) {
1714             ret = n;
1715             goto fail;
1716         }
1717         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1718         {
1719             allocated_done += n;
1720             qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
1721                                 0);
1722         }
1723 
1724         if (s->status == BLK_DATA) {
1725             ret = convert_read(s, sector_num, n, buf);
1726             if (ret < 0) {
1727                 error_report("error while reading sector %" PRId64
1728                              ": %s", sector_num, strerror(-ret));
1729                 goto fail;
1730             }
1731         } else if (!s->min_sparse && s->status == BLK_ZERO) {
1732             n = MIN(n, s->buf_sectors);
1733             memset(buf, 0, n * BDRV_SECTOR_SIZE);
1734             s->status = BLK_DATA;
1735         }
1736 
1737         ret = convert_write(s, sector_num, n, buf);
1738         if (ret < 0) {
1739             error_report("error while writing sector %" PRId64
1740                          ": %s", sector_num, strerror(-ret));
1741             goto fail;
1742         }
1743 
1744         sector_num += n;
1745     }
1746 
1747     if (s->compressed) {
1748         /* signal EOF to align */
1749         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
1750         if (ret < 0) {
1751             goto fail;
1752         }
1753     }
1754 
1755     ret = 0;
1756 fail:
1757     qemu_vfree(buf);
1758     return ret;
1759 }
1760 
1761 static int img_convert(int argc, char **argv)
1762 {
1763     int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
1764     int64_t ret = 0;
1765     int progress = 0, flags, src_flags;
1766     bool writethrough, src_writethrough;
1767     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
1768     BlockDriver *drv, *proto_drv;
1769     BlockBackend **blk = NULL, *out_blk = NULL;
1770     BlockDriverState **bs = NULL, *out_bs = NULL;
1771     int64_t total_sectors;
1772     int64_t *bs_sectors = NULL;
1773     size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
1774     BlockDriverInfo bdi;
1775     QemuOpts *opts = NULL;
1776     QemuOptsList *create_opts = NULL;
1777     const char *out_baseimg_param;
1778     char *options = NULL;
1779     const char *snapshot_name = NULL;
1780     int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
1781     bool quiet = false;
1782     Error *local_err = NULL;
1783     QemuOpts *sn_opts = NULL;
1784     ImgConvertState state;
1785     bool image_opts = false;
1786 
1787     fmt = NULL;
1788     out_fmt = "raw";
1789     cache = "unsafe";
1790     src_cache = BDRV_DEFAULT_CACHE;
1791     out_baseimg = NULL;
1792     compress = 0;
1793     skip_create = 0;
1794     for(;;) {
1795         static const struct option long_options[] = {
1796             {"help", no_argument, 0, 'h'},
1797             {"object", required_argument, 0, OPTION_OBJECT},
1798             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1799             {0, 0, 0, 0}
1800         };
1801         c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
1802                         long_options, NULL);
1803         if (c == -1) {
1804             break;
1805         }
1806         switch(c) {
1807         case '?':
1808         case 'h':
1809             help();
1810             break;
1811         case 'f':
1812             fmt = optarg;
1813             break;
1814         case 'O':
1815             out_fmt = optarg;
1816             break;
1817         case 'B':
1818             out_baseimg = optarg;
1819             break;
1820         case 'c':
1821             compress = 1;
1822             break;
1823         case 'e':
1824             error_report("option -e is deprecated, please use \'-o "
1825                   "encryption\' instead!");
1826             ret = -1;
1827             goto fail_getopt;
1828         case '6':
1829             error_report("option -6 is deprecated, please use \'-o "
1830                   "compat6\' instead!");
1831             ret = -1;
1832             goto fail_getopt;
1833         case 'o':
1834             if (!is_valid_option_list(optarg)) {
1835                 error_report("Invalid option list: %s", optarg);
1836                 ret = -1;
1837                 goto fail_getopt;
1838             }
1839             if (!options) {
1840                 options = g_strdup(optarg);
1841             } else {
1842                 char *old_options = options;
1843                 options = g_strdup_printf("%s,%s", options, optarg);
1844                 g_free(old_options);
1845             }
1846             break;
1847         case 's':
1848             snapshot_name = optarg;
1849             break;
1850         case 'l':
1851             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
1852                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
1853                                                   optarg, false);
1854                 if (!sn_opts) {
1855                     error_report("Failed in parsing snapshot param '%s'",
1856                                  optarg);
1857                     ret = -1;
1858                     goto fail_getopt;
1859                 }
1860             } else {
1861                 snapshot_name = optarg;
1862             }
1863             break;
1864         case 'S':
1865         {
1866             int64_t sval;
1867             char *end;
1868             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
1869             if (sval < 0 || *end) {
1870                 error_report("Invalid minimum zero buffer size for sparse output specified");
1871                 ret = -1;
1872                 goto fail_getopt;
1873             }
1874 
1875             min_sparse = sval / BDRV_SECTOR_SIZE;
1876             break;
1877         }
1878         case 'p':
1879             progress = 1;
1880             break;
1881         case 't':
1882             cache = optarg;
1883             break;
1884         case 'T':
1885             src_cache = optarg;
1886             break;
1887         case 'q':
1888             quiet = true;
1889             break;
1890         case 'n':
1891             skip_create = 1;
1892             break;
1893         case OPTION_OBJECT:
1894             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1895                                            optarg, true);
1896             if (!opts) {
1897                 goto fail_getopt;
1898             }
1899             break;
1900         case OPTION_IMAGE_OPTS:
1901             image_opts = true;
1902             break;
1903         }
1904     }
1905 
1906     if (qemu_opts_foreach(&qemu_object_opts,
1907                           user_creatable_add_opts_foreach,
1908                           NULL, NULL)) {
1909         goto fail_getopt;
1910     }
1911 
1912     /* Initialize before goto out */
1913     if (quiet) {
1914         progress = 0;
1915     }
1916     qemu_progress_init(progress, 1.0);
1917 
1918     bs_n = argc - optind - 1;
1919     out_filename = bs_n >= 1 ? argv[argc - 1] : NULL;
1920 
1921     if (options && has_help_option(options)) {
1922         ret = print_block_option_help(out_filename, out_fmt);
1923         goto out;
1924     }
1925 
1926     if (bs_n < 1) {
1927         error_exit("Must specify image file name");
1928     }
1929 
1930 
1931     if (bs_n > 1 && out_baseimg) {
1932         error_report("-B makes no sense when concatenating multiple input "
1933                      "images");
1934         ret = -1;
1935         goto out;
1936     }
1937 
1938     src_flags = 0;
1939     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
1940     if (ret < 0) {
1941         error_report("Invalid source cache option: %s", src_cache);
1942         goto out;
1943     }
1944 
1945     qemu_progress_print(0, 100);
1946 
1947     blk = g_new0(BlockBackend *, bs_n);
1948     bs = g_new0(BlockDriverState *, bs_n);
1949     bs_sectors = g_new(int64_t, bs_n);
1950 
1951     total_sectors = 0;
1952     for (bs_i = 0; bs_i < bs_n; bs_i++) {
1953         blk[bs_i] = img_open(image_opts, argv[optind + bs_i],
1954                              fmt, src_flags, src_writethrough, quiet);
1955         if (!blk[bs_i]) {
1956             ret = -1;
1957             goto out;
1958         }
1959         bs[bs_i] = blk_bs(blk[bs_i]);
1960         bs_sectors[bs_i] = blk_nb_sectors(blk[bs_i]);
1961         if (bs_sectors[bs_i] < 0) {
1962             error_report("Could not get size of %s: %s",
1963                          argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
1964             ret = -1;
1965             goto out;
1966         }
1967         total_sectors += bs_sectors[bs_i];
1968     }
1969 
1970     if (sn_opts) {
1971         bdrv_snapshot_load_tmp(bs[0],
1972                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
1973                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
1974                                &local_err);
1975     } else if (snapshot_name != NULL) {
1976         if (bs_n > 1) {
1977             error_report("No support for concatenating multiple snapshot");
1978             ret = -1;
1979             goto out;
1980         }
1981 
1982         bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, &local_err);
1983     }
1984     if (local_err) {
1985         error_reportf_err(local_err, "Failed to load snapshot: ");
1986         ret = -1;
1987         goto out;
1988     }
1989 
1990     /* Find driver and parse its options */
1991     drv = bdrv_find_format(out_fmt);
1992     if (!drv) {
1993         error_report("Unknown file format '%s'", out_fmt);
1994         ret = -1;
1995         goto out;
1996     }
1997 
1998     proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
1999     if (!proto_drv) {
2000         error_report_err(local_err);
2001         ret = -1;
2002         goto out;
2003     }
2004 
2005     if (!skip_create) {
2006         if (!drv->create_opts) {
2007             error_report("Format driver '%s' does not support image creation",
2008                          drv->format_name);
2009             ret = -1;
2010             goto out;
2011         }
2012 
2013         if (!proto_drv->create_opts) {
2014             error_report("Protocol driver '%s' does not support image creation",
2015                          proto_drv->format_name);
2016             ret = -1;
2017             goto out;
2018         }
2019 
2020         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2021         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2022 
2023         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2024         if (options) {
2025             qemu_opts_do_parse(opts, options, NULL, &local_err);
2026             if (local_err) {
2027                 error_report_err(local_err);
2028                 ret = -1;
2029                 goto out;
2030             }
2031         }
2032 
2033         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512,
2034                             &error_abort);
2035         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2036         if (ret < 0) {
2037             goto out;
2038         }
2039     }
2040 
2041     /* Get backing file name if -o backing_file was used */
2042     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2043     if (out_baseimg_param) {
2044         out_baseimg = out_baseimg_param;
2045     }
2046 
2047     /* Check if compression is supported */
2048     if (compress) {
2049         bool encryption =
2050             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2051         const char *preallocation =
2052             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2053 
2054         if (!drv->bdrv_co_pwritev_compressed) {
2055             error_report("Compression not supported for this file format");
2056             ret = -1;
2057             goto out;
2058         }
2059 
2060         if (encryption) {
2061             error_report("Compression and encryption not supported at "
2062                          "the same time");
2063             ret = -1;
2064             goto out;
2065         }
2066 
2067         if (preallocation
2068             && strcmp(preallocation, "off"))
2069         {
2070             error_report("Compression and preallocation not supported at "
2071                          "the same time");
2072             ret = -1;
2073             goto out;
2074         }
2075     }
2076 
2077     if (!skip_create) {
2078         /* Create the new image */
2079         ret = bdrv_create(drv, out_filename, opts, &local_err);
2080         if (ret < 0) {
2081             error_reportf_err(local_err, "%s: error while converting %s: ",
2082                               out_filename, out_fmt);
2083             goto out;
2084         }
2085     }
2086 
2087     flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2088     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2089     if (ret < 0) {
2090         error_report("Invalid cache option: %s", cache);
2091         goto out;
2092     }
2093 
2094     /* XXX we should allow --image-opts to trigger use of
2095      * img_open() here, but then we have trouble with
2096      * the bdrv_create() call which takes different params.
2097      * Not critical right now, so fix can wait...
2098      */
2099     out_blk = img_open_file(out_filename, out_fmt, flags, writethrough, quiet);
2100     if (!out_blk) {
2101         ret = -1;
2102         goto out;
2103     }
2104     out_bs = blk_bs(out_blk);
2105 
2106     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2107      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
2108      * as maximum. */
2109     bufsectors = MIN(32768,
2110                      MAX(bufsectors,
2111                          MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2112                              out_bs->bl.pdiscard_alignment >>
2113                              BDRV_SECTOR_BITS)));
2114 
2115     if (skip_create) {
2116         int64_t output_sectors = blk_nb_sectors(out_blk);
2117         if (output_sectors < 0) {
2118             error_report("unable to get output image length: %s",
2119                          strerror(-output_sectors));
2120             ret = -1;
2121             goto out;
2122         } else if (output_sectors < total_sectors) {
2123             error_report("output file is smaller than input file");
2124             ret = -1;
2125             goto out;
2126         }
2127     }
2128 
2129     cluster_sectors = 0;
2130     ret = bdrv_get_info(out_bs, &bdi);
2131     if (ret < 0) {
2132         if (compress) {
2133             error_report("could not get block driver info");
2134             goto out;
2135         }
2136     } else {
2137         compress = compress || bdi.needs_compressed_writes;
2138         cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2139     }
2140 
2141     state = (ImgConvertState) {
2142         .src                = blk,
2143         .src_sectors        = bs_sectors,
2144         .src_num            = bs_n,
2145         .total_sectors      = total_sectors,
2146         .target             = out_blk,
2147         .compressed         = compress,
2148         .target_has_backing = (bool) out_baseimg,
2149         .min_sparse         = min_sparse,
2150         .cluster_sectors    = cluster_sectors,
2151         .buf_sectors        = bufsectors,
2152     };
2153     ret = convert_do_copy(&state);
2154 
2155 out:
2156     if (!ret) {
2157         qemu_progress_print(100, 0);
2158     }
2159     qemu_progress_end();
2160     qemu_opts_del(opts);
2161     qemu_opts_free(create_opts);
2162     qemu_opts_del(sn_opts);
2163     blk_unref(out_blk);
2164     g_free(bs);
2165     if (blk) {
2166         for (bs_i = 0; bs_i < bs_n; bs_i++) {
2167             blk_unref(blk[bs_i]);
2168         }
2169         g_free(blk);
2170     }
2171     g_free(bs_sectors);
2172 fail_getopt:
2173     g_free(options);
2174 
2175     if (ret) {
2176         return 1;
2177     }
2178     return 0;
2179 }
2180 
2181 
2182 static void dump_snapshots(BlockDriverState *bs)
2183 {
2184     QEMUSnapshotInfo *sn_tab, *sn;
2185     int nb_sns, i;
2186 
2187     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2188     if (nb_sns <= 0)
2189         return;
2190     printf("Snapshot list:\n");
2191     bdrv_snapshot_dump(fprintf, stdout, NULL);
2192     printf("\n");
2193     for(i = 0; i < nb_sns; i++) {
2194         sn = &sn_tab[i];
2195         bdrv_snapshot_dump(fprintf, stdout, sn);
2196         printf("\n");
2197     }
2198     g_free(sn_tab);
2199 }
2200 
2201 static void dump_json_image_info_list(ImageInfoList *list)
2202 {
2203     QString *str;
2204     QObject *obj;
2205     Visitor *v = qobject_output_visitor_new(&obj);
2206 
2207     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2208     visit_complete(v, &obj);
2209     str = qobject_to_json_pretty(obj);
2210     assert(str != NULL);
2211     printf("%s\n", qstring_get_str(str));
2212     qobject_decref(obj);
2213     visit_free(v);
2214     QDECREF(str);
2215 }
2216 
2217 static void dump_json_image_info(ImageInfo *info)
2218 {
2219     QString *str;
2220     QObject *obj;
2221     Visitor *v = qobject_output_visitor_new(&obj);
2222 
2223     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2224     visit_complete(v, &obj);
2225     str = qobject_to_json_pretty(obj);
2226     assert(str != NULL);
2227     printf("%s\n", qstring_get_str(str));
2228     qobject_decref(obj);
2229     visit_free(v);
2230     QDECREF(str);
2231 }
2232 
2233 static void dump_human_image_info_list(ImageInfoList *list)
2234 {
2235     ImageInfoList *elem;
2236     bool delim = false;
2237 
2238     for (elem = list; elem; elem = elem->next) {
2239         if (delim) {
2240             printf("\n");
2241         }
2242         delim = true;
2243 
2244         bdrv_image_info_dump(fprintf, stdout, elem->value);
2245     }
2246 }
2247 
2248 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2249 {
2250     return strcmp(a, b) == 0;
2251 }
2252 
2253 /**
2254  * Open an image file chain and return an ImageInfoList
2255  *
2256  * @filename: topmost image filename
2257  * @fmt: topmost image format (may be NULL to autodetect)
2258  * @chain: true  - enumerate entire backing file chain
2259  *         false - only topmost image file
2260  *
2261  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2262  * image file.  If there was an error a message will have been printed to
2263  * stderr.
2264  */
2265 static ImageInfoList *collect_image_info_list(bool image_opts,
2266                                               const char *filename,
2267                                               const char *fmt,
2268                                               bool chain)
2269 {
2270     ImageInfoList *head = NULL;
2271     ImageInfoList **last = &head;
2272     GHashTable *filenames;
2273     Error *err = NULL;
2274 
2275     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2276 
2277     while (filename) {
2278         BlockBackend *blk;
2279         BlockDriverState *bs;
2280         ImageInfo *info;
2281         ImageInfoList *elem;
2282 
2283         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2284             error_report("Backing file '%s' creates an infinite loop.",
2285                          filename);
2286             goto err;
2287         }
2288         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2289 
2290         blk = img_open(image_opts, filename, fmt,
2291                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false);
2292         if (!blk) {
2293             goto err;
2294         }
2295         bs = blk_bs(blk);
2296 
2297         bdrv_query_image_info(bs, &info, &err);
2298         if (err) {
2299             error_report_err(err);
2300             blk_unref(blk);
2301             goto err;
2302         }
2303 
2304         elem = g_new0(ImageInfoList, 1);
2305         elem->value = info;
2306         *last = elem;
2307         last = &elem->next;
2308 
2309         blk_unref(blk);
2310 
2311         filename = fmt = NULL;
2312         if (chain) {
2313             if (info->has_full_backing_filename) {
2314                 filename = info->full_backing_filename;
2315             } else if (info->has_backing_filename) {
2316                 error_report("Could not determine absolute backing filename,"
2317                              " but backing filename '%s' present",
2318                              info->backing_filename);
2319                 goto err;
2320             }
2321             if (info->has_backing_filename_format) {
2322                 fmt = info->backing_filename_format;
2323             }
2324         }
2325     }
2326     g_hash_table_destroy(filenames);
2327     return head;
2328 
2329 err:
2330     qapi_free_ImageInfoList(head);
2331     g_hash_table_destroy(filenames);
2332     return NULL;
2333 }
2334 
2335 static int img_info(int argc, char **argv)
2336 {
2337     int c;
2338     OutputFormat output_format = OFORMAT_HUMAN;
2339     bool chain = false;
2340     const char *filename, *fmt, *output;
2341     ImageInfoList *list;
2342     bool image_opts = false;
2343 
2344     fmt = NULL;
2345     output = NULL;
2346     for(;;) {
2347         int option_index = 0;
2348         static const struct option long_options[] = {
2349             {"help", no_argument, 0, 'h'},
2350             {"format", required_argument, 0, 'f'},
2351             {"output", required_argument, 0, OPTION_OUTPUT},
2352             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2353             {"object", required_argument, 0, OPTION_OBJECT},
2354             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2355             {0, 0, 0, 0}
2356         };
2357         c = getopt_long(argc, argv, "f:h",
2358                         long_options, &option_index);
2359         if (c == -1) {
2360             break;
2361         }
2362         switch(c) {
2363         case '?':
2364         case 'h':
2365             help();
2366             break;
2367         case 'f':
2368             fmt = optarg;
2369             break;
2370         case OPTION_OUTPUT:
2371             output = optarg;
2372             break;
2373         case OPTION_BACKING_CHAIN:
2374             chain = true;
2375             break;
2376         case OPTION_OBJECT: {
2377             QemuOpts *opts;
2378             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2379                                            optarg, true);
2380             if (!opts) {
2381                 return 1;
2382             }
2383         }   break;
2384         case OPTION_IMAGE_OPTS:
2385             image_opts = true;
2386             break;
2387         }
2388     }
2389     if (optind != argc - 1) {
2390         error_exit("Expecting one image file name");
2391     }
2392     filename = argv[optind++];
2393 
2394     if (output && !strcmp(output, "json")) {
2395         output_format = OFORMAT_JSON;
2396     } else if (output && !strcmp(output, "human")) {
2397         output_format = OFORMAT_HUMAN;
2398     } else if (output) {
2399         error_report("--output must be used with human or json as argument.");
2400         return 1;
2401     }
2402 
2403     if (qemu_opts_foreach(&qemu_object_opts,
2404                           user_creatable_add_opts_foreach,
2405                           NULL, NULL)) {
2406         return 1;
2407     }
2408 
2409     list = collect_image_info_list(image_opts, filename, fmt, chain);
2410     if (!list) {
2411         return 1;
2412     }
2413 
2414     switch (output_format) {
2415     case OFORMAT_HUMAN:
2416         dump_human_image_info_list(list);
2417         break;
2418     case OFORMAT_JSON:
2419         if (chain) {
2420             dump_json_image_info_list(list);
2421         } else {
2422             dump_json_image_info(list->value);
2423         }
2424         break;
2425     }
2426 
2427     qapi_free_ImageInfoList(list);
2428     return 0;
2429 }
2430 
2431 static void dump_map_entry(OutputFormat output_format, MapEntry *e,
2432                            MapEntry *next)
2433 {
2434     switch (output_format) {
2435     case OFORMAT_HUMAN:
2436         if (e->data && !e->has_offset) {
2437             error_report("File contains external, encrypted or compressed clusters.");
2438             exit(1);
2439         }
2440         if (e->data && !e->zero) {
2441             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2442                    e->start, e->length,
2443                    e->has_offset ? e->offset : 0,
2444                    e->has_filename ? e->filename : "");
2445         }
2446         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2447          * Modify the flags here to allow more coalescing.
2448          */
2449         if (next && (!next->data || next->zero)) {
2450             next->data = false;
2451             next->zero = true;
2452         }
2453         break;
2454     case OFORMAT_JSON:
2455         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2456                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2457                (e->start == 0 ? "[" : ",\n"),
2458                e->start, e->length, e->depth,
2459                e->zero ? "true" : "false",
2460                e->data ? "true" : "false");
2461         if (e->has_offset) {
2462             printf(", \"offset\": %"PRId64"", e->offset);
2463         }
2464         putchar('}');
2465 
2466         if (!next) {
2467             printf("]\n");
2468         }
2469         break;
2470     }
2471 }
2472 
2473 static int get_block_status(BlockDriverState *bs, int64_t sector_num,
2474                             int nb_sectors, MapEntry *e)
2475 {
2476     int64_t ret;
2477     int depth;
2478     BlockDriverState *file;
2479     bool has_offset;
2480 
2481     /* As an optimization, we could cache the current range of unallocated
2482      * clusters in each file of the chain, and avoid querying the same
2483      * range repeatedly.
2484      */
2485 
2486     depth = 0;
2487     for (;;) {
2488         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
2489                                     &file);
2490         if (ret < 0) {
2491             return ret;
2492         }
2493         assert(nb_sectors);
2494         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2495             break;
2496         }
2497         bs = backing_bs(bs);
2498         if (bs == NULL) {
2499             ret = 0;
2500             break;
2501         }
2502 
2503         depth++;
2504     }
2505 
2506     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2507 
2508     *e = (MapEntry) {
2509         .start = sector_num * BDRV_SECTOR_SIZE,
2510         .length = nb_sectors * BDRV_SECTOR_SIZE,
2511         .data = !!(ret & BDRV_BLOCK_DATA),
2512         .zero = !!(ret & BDRV_BLOCK_ZERO),
2513         .offset = ret & BDRV_BLOCK_OFFSET_MASK,
2514         .has_offset = has_offset,
2515         .depth = depth,
2516         .has_filename = file && has_offset,
2517         .filename = file && has_offset ? file->filename : NULL,
2518     };
2519 
2520     return 0;
2521 }
2522 
2523 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2524 {
2525     if (curr->length == 0) {
2526         return false;
2527     }
2528     if (curr->zero != next->zero ||
2529         curr->data != next->data ||
2530         curr->depth != next->depth ||
2531         curr->has_filename != next->has_filename ||
2532         curr->has_offset != next->has_offset) {
2533         return false;
2534     }
2535     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2536         return false;
2537     }
2538     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2539         return false;
2540     }
2541     return true;
2542 }
2543 
2544 static int img_map(int argc, char **argv)
2545 {
2546     int c;
2547     OutputFormat output_format = OFORMAT_HUMAN;
2548     BlockBackend *blk;
2549     BlockDriverState *bs;
2550     const char *filename, *fmt, *output;
2551     int64_t length;
2552     MapEntry curr = { .length = 0 }, next;
2553     int ret = 0;
2554     bool image_opts = false;
2555 
2556     fmt = NULL;
2557     output = NULL;
2558     for (;;) {
2559         int option_index = 0;
2560         static const struct option long_options[] = {
2561             {"help", no_argument, 0, 'h'},
2562             {"format", required_argument, 0, 'f'},
2563             {"output", required_argument, 0, OPTION_OUTPUT},
2564             {"object", required_argument, 0, OPTION_OBJECT},
2565             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2566             {0, 0, 0, 0}
2567         };
2568         c = getopt_long(argc, argv, "f:h",
2569                         long_options, &option_index);
2570         if (c == -1) {
2571             break;
2572         }
2573         switch (c) {
2574         case '?':
2575         case 'h':
2576             help();
2577             break;
2578         case 'f':
2579             fmt = optarg;
2580             break;
2581         case OPTION_OUTPUT:
2582             output = optarg;
2583             break;
2584         case OPTION_OBJECT: {
2585             QemuOpts *opts;
2586             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2587                                            optarg, true);
2588             if (!opts) {
2589                 return 1;
2590             }
2591         }   break;
2592         case OPTION_IMAGE_OPTS:
2593             image_opts = true;
2594             break;
2595         }
2596     }
2597     if (optind != argc - 1) {
2598         error_exit("Expecting one image file name");
2599     }
2600     filename = argv[optind];
2601 
2602     if (output && !strcmp(output, "json")) {
2603         output_format = OFORMAT_JSON;
2604     } else if (output && !strcmp(output, "human")) {
2605         output_format = OFORMAT_HUMAN;
2606     } else if (output) {
2607         error_report("--output must be used with human or json as argument.");
2608         return 1;
2609     }
2610 
2611     if (qemu_opts_foreach(&qemu_object_opts,
2612                           user_creatable_add_opts_foreach,
2613                           NULL, NULL)) {
2614         return 1;
2615     }
2616 
2617     blk = img_open(image_opts, filename, fmt, 0, false, false);
2618     if (!blk) {
2619         return 1;
2620     }
2621     bs = blk_bs(blk);
2622 
2623     if (output_format == OFORMAT_HUMAN) {
2624         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
2625     }
2626 
2627     length = blk_getlength(blk);
2628     while (curr.start + curr.length < length) {
2629         int64_t nsectors_left;
2630         int64_t sector_num;
2631         int n;
2632 
2633         sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
2634 
2635         /* Probe up to 1 GiB at a time.  */
2636         nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
2637         n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
2638         ret = get_block_status(bs, sector_num, n, &next);
2639 
2640         if (ret < 0) {
2641             error_report("Could not read file metadata: %s", strerror(-ret));
2642             goto out;
2643         }
2644 
2645         if (entry_mergeable(&curr, &next)) {
2646             curr.length += next.length;
2647             continue;
2648         }
2649 
2650         if (curr.length > 0) {
2651             dump_map_entry(output_format, &curr, &next);
2652         }
2653         curr = next;
2654     }
2655 
2656     dump_map_entry(output_format, &curr, NULL);
2657 
2658 out:
2659     blk_unref(blk);
2660     return ret < 0;
2661 }
2662 
2663 #define SNAPSHOT_LIST   1
2664 #define SNAPSHOT_CREATE 2
2665 #define SNAPSHOT_APPLY  3
2666 #define SNAPSHOT_DELETE 4
2667 
2668 static int img_snapshot(int argc, char **argv)
2669 {
2670     BlockBackend *blk;
2671     BlockDriverState *bs;
2672     QEMUSnapshotInfo sn;
2673     char *filename, *snapshot_name = NULL;
2674     int c, ret = 0, bdrv_oflags;
2675     int action = 0;
2676     qemu_timeval tv;
2677     bool quiet = false;
2678     Error *err = NULL;
2679     bool image_opts = false;
2680 
2681     bdrv_oflags = BDRV_O_RDWR;
2682     /* Parse commandline parameters */
2683     for(;;) {
2684         static const struct option long_options[] = {
2685             {"help", no_argument, 0, 'h'},
2686             {"object", required_argument, 0, OPTION_OBJECT},
2687             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2688             {0, 0, 0, 0}
2689         };
2690         c = getopt_long(argc, argv, "la:c:d:hq",
2691                         long_options, NULL);
2692         if (c == -1) {
2693             break;
2694         }
2695         switch(c) {
2696         case '?':
2697         case 'h':
2698             help();
2699             return 0;
2700         case 'l':
2701             if (action) {
2702                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2703                 return 0;
2704             }
2705             action = SNAPSHOT_LIST;
2706             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
2707             break;
2708         case 'a':
2709             if (action) {
2710                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2711                 return 0;
2712             }
2713             action = SNAPSHOT_APPLY;
2714             snapshot_name = optarg;
2715             break;
2716         case 'c':
2717             if (action) {
2718                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2719                 return 0;
2720             }
2721             action = SNAPSHOT_CREATE;
2722             snapshot_name = optarg;
2723             break;
2724         case 'd':
2725             if (action) {
2726                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2727                 return 0;
2728             }
2729             action = SNAPSHOT_DELETE;
2730             snapshot_name = optarg;
2731             break;
2732         case 'q':
2733             quiet = true;
2734             break;
2735         case OPTION_OBJECT: {
2736             QemuOpts *opts;
2737             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2738                                            optarg, true);
2739             if (!opts) {
2740                 return 1;
2741             }
2742         }   break;
2743         case OPTION_IMAGE_OPTS:
2744             image_opts = true;
2745             break;
2746         }
2747     }
2748 
2749     if (optind != argc - 1) {
2750         error_exit("Expecting one image file name");
2751     }
2752     filename = argv[optind++];
2753 
2754     if (qemu_opts_foreach(&qemu_object_opts,
2755                           user_creatable_add_opts_foreach,
2756                           NULL, NULL)) {
2757         return 1;
2758     }
2759 
2760     /* Open the image */
2761     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet);
2762     if (!blk) {
2763         return 1;
2764     }
2765     bs = blk_bs(blk);
2766 
2767     /* Perform the requested action */
2768     switch(action) {
2769     case SNAPSHOT_LIST:
2770         dump_snapshots(bs);
2771         break;
2772 
2773     case SNAPSHOT_CREATE:
2774         memset(&sn, 0, sizeof(sn));
2775         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
2776 
2777         qemu_gettimeofday(&tv);
2778         sn.date_sec = tv.tv_sec;
2779         sn.date_nsec = tv.tv_usec * 1000;
2780 
2781         ret = bdrv_snapshot_create(bs, &sn);
2782         if (ret) {
2783             error_report("Could not create snapshot '%s': %d (%s)",
2784                 snapshot_name, ret, strerror(-ret));
2785         }
2786         break;
2787 
2788     case SNAPSHOT_APPLY:
2789         ret = bdrv_snapshot_goto(bs, snapshot_name);
2790         if (ret) {
2791             error_report("Could not apply snapshot '%s': %d (%s)",
2792                 snapshot_name, ret, strerror(-ret));
2793         }
2794         break;
2795 
2796     case SNAPSHOT_DELETE:
2797         bdrv_snapshot_delete_by_id_or_name(bs, snapshot_name, &err);
2798         if (err) {
2799             error_reportf_err(err, "Could not delete snapshot '%s': ",
2800                               snapshot_name);
2801             ret = 1;
2802         }
2803         break;
2804     }
2805 
2806     /* Cleanup */
2807     blk_unref(blk);
2808     if (ret) {
2809         return 1;
2810     }
2811     return 0;
2812 }
2813 
2814 static int img_rebase(int argc, char **argv)
2815 {
2816     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
2817     uint8_t *buf_old = NULL;
2818     uint8_t *buf_new = NULL;
2819     BlockDriverState *bs = NULL;
2820     char *filename;
2821     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
2822     int c, flags, src_flags, ret;
2823     bool writethrough, src_writethrough;
2824     int unsafe = 0;
2825     int progress = 0;
2826     bool quiet = false;
2827     Error *local_err = NULL;
2828     bool image_opts = false;
2829 
2830     /* Parse commandline parameters */
2831     fmt = NULL;
2832     cache = BDRV_DEFAULT_CACHE;
2833     src_cache = BDRV_DEFAULT_CACHE;
2834     out_baseimg = NULL;
2835     out_basefmt = NULL;
2836     for(;;) {
2837         static const struct option long_options[] = {
2838             {"help", no_argument, 0, 'h'},
2839             {"object", required_argument, 0, OPTION_OBJECT},
2840             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2841             {0, 0, 0, 0}
2842         };
2843         c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
2844                         long_options, NULL);
2845         if (c == -1) {
2846             break;
2847         }
2848         switch(c) {
2849         case '?':
2850         case 'h':
2851             help();
2852             return 0;
2853         case 'f':
2854             fmt = optarg;
2855             break;
2856         case 'F':
2857             out_basefmt = optarg;
2858             break;
2859         case 'b':
2860             out_baseimg = optarg;
2861             break;
2862         case 'u':
2863             unsafe = 1;
2864             break;
2865         case 'p':
2866             progress = 1;
2867             break;
2868         case 't':
2869             cache = optarg;
2870             break;
2871         case 'T':
2872             src_cache = optarg;
2873             break;
2874         case 'q':
2875             quiet = true;
2876             break;
2877         case OPTION_OBJECT: {
2878             QemuOpts *opts;
2879             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2880                                            optarg, true);
2881             if (!opts) {
2882                 return 1;
2883             }
2884         }   break;
2885         case OPTION_IMAGE_OPTS:
2886             image_opts = true;
2887             break;
2888         }
2889     }
2890 
2891     if (quiet) {
2892         progress = 0;
2893     }
2894 
2895     if (optind != argc - 1) {
2896         error_exit("Expecting one image file name");
2897     }
2898     if (!unsafe && !out_baseimg) {
2899         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
2900     }
2901     filename = argv[optind++];
2902 
2903     if (qemu_opts_foreach(&qemu_object_opts,
2904                           user_creatable_add_opts_foreach,
2905                           NULL, NULL)) {
2906         return 1;
2907     }
2908 
2909     qemu_progress_init(progress, 2.0);
2910     qemu_progress_print(0, 100);
2911 
2912     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
2913     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2914     if (ret < 0) {
2915         error_report("Invalid cache option: %s", cache);
2916         goto out;
2917     }
2918 
2919     src_flags = 0;
2920     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2921     if (ret < 0) {
2922         error_report("Invalid source cache option: %s", src_cache);
2923         goto out;
2924     }
2925 
2926     /* The source files are opened read-only, don't care about WCE */
2927     assert((src_flags & BDRV_O_RDWR) == 0);
2928     (void) src_writethrough;
2929 
2930     /*
2931      * Open the images.
2932      *
2933      * Ignore the old backing file for unsafe rebase in case we want to correct
2934      * the reference to a renamed or moved backing file.
2935      */
2936     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
2937     if (!blk) {
2938         ret = -1;
2939         goto out;
2940     }
2941     bs = blk_bs(blk);
2942 
2943     if (out_basefmt != NULL) {
2944         if (bdrv_find_format(out_basefmt) == NULL) {
2945             error_report("Invalid format name: '%s'", out_basefmt);
2946             ret = -1;
2947             goto out;
2948         }
2949     }
2950 
2951     /* For safe rebasing we need to compare old and new backing file */
2952     if (!unsafe) {
2953         char backing_name[PATH_MAX];
2954         QDict *options = NULL;
2955 
2956         if (bs->backing_format[0] != '\0') {
2957             options = qdict_new();
2958             qdict_put(options, "driver", qstring_from_str(bs->backing_format));
2959         }
2960 
2961         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
2962         blk_old_backing = blk_new_open(backing_name, NULL,
2963                                        options, src_flags, &local_err);
2964         if (!blk_old_backing) {
2965             error_reportf_err(local_err,
2966                               "Could not open old backing file '%s': ",
2967                               backing_name);
2968             ret = -1;
2969             goto out;
2970         }
2971 
2972         if (out_baseimg[0]) {
2973             if (out_basefmt) {
2974                 options = qdict_new();
2975                 qdict_put(options, "driver", qstring_from_str(out_basefmt));
2976             } else {
2977                 options = NULL;
2978             }
2979 
2980             blk_new_backing = blk_new_open(out_baseimg, NULL,
2981                                            options, src_flags, &local_err);
2982             if (!blk_new_backing) {
2983                 error_reportf_err(local_err,
2984                                   "Could not open new backing file '%s': ",
2985                                   out_baseimg);
2986                 ret = -1;
2987                 goto out;
2988             }
2989         }
2990     }
2991 
2992     /*
2993      * Check each unallocated cluster in the COW file. If it is unallocated,
2994      * accesses go to the backing file. We must therefore compare this cluster
2995      * in the old and new backing file, and if they differ we need to copy it
2996      * from the old backing file into the COW file.
2997      *
2998      * If qemu-img crashes during this step, no harm is done. The content of
2999      * the image is the same as the original one at any time.
3000      */
3001     if (!unsafe) {
3002         int64_t num_sectors;
3003         int64_t old_backing_num_sectors;
3004         int64_t new_backing_num_sectors = 0;
3005         uint64_t sector;
3006         int n;
3007         float local_progress = 0;
3008 
3009         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3010         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3011 
3012         num_sectors = blk_nb_sectors(blk);
3013         if (num_sectors < 0) {
3014             error_report("Could not get size of '%s': %s",
3015                          filename, strerror(-num_sectors));
3016             ret = -1;
3017             goto out;
3018         }
3019         old_backing_num_sectors = blk_nb_sectors(blk_old_backing);
3020         if (old_backing_num_sectors < 0) {
3021             char backing_name[PATH_MAX];
3022 
3023             bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3024             error_report("Could not get size of '%s': %s",
3025                          backing_name, strerror(-old_backing_num_sectors));
3026             ret = -1;
3027             goto out;
3028         }
3029         if (blk_new_backing) {
3030             new_backing_num_sectors = blk_nb_sectors(blk_new_backing);
3031             if (new_backing_num_sectors < 0) {
3032                 error_report("Could not get size of '%s': %s",
3033                              out_baseimg, strerror(-new_backing_num_sectors));
3034                 ret = -1;
3035                 goto out;
3036             }
3037         }
3038 
3039         if (num_sectors != 0) {
3040             local_progress = (float)100 /
3041                 (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
3042         }
3043 
3044         for (sector = 0; sector < num_sectors; sector += n) {
3045 
3046             /* How many sectors can we handle with the next read? */
3047             if (sector + (IO_BUF_SIZE / 512) <= num_sectors) {
3048                 n = (IO_BUF_SIZE / 512);
3049             } else {
3050                 n = num_sectors - sector;
3051             }
3052 
3053             /* If the cluster is allocated, we don't need to take action */
3054             ret = bdrv_is_allocated(bs, sector, n, &n);
3055             if (ret < 0) {
3056                 error_report("error while reading image metadata: %s",
3057                              strerror(-ret));
3058                 goto out;
3059             }
3060             if (ret) {
3061                 continue;
3062             }
3063 
3064             /*
3065              * Read old and new backing file and take into consideration that
3066              * backing files may be smaller than the COW image.
3067              */
3068             if (sector >= old_backing_num_sectors) {
3069                 memset(buf_old, 0, n * BDRV_SECTOR_SIZE);
3070             } else {
3071                 if (sector + n > old_backing_num_sectors) {
3072                     n = old_backing_num_sectors - sector;
3073                 }
3074 
3075                 ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
3076                                 buf_old, n << BDRV_SECTOR_BITS);
3077                 if (ret < 0) {
3078                     error_report("error while reading from old backing file");
3079                     goto out;
3080                 }
3081             }
3082 
3083             if (sector >= new_backing_num_sectors || !blk_new_backing) {
3084                 memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
3085             } else {
3086                 if (sector + n > new_backing_num_sectors) {
3087                     n = new_backing_num_sectors - sector;
3088                 }
3089 
3090                 ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
3091                                 buf_new, n << BDRV_SECTOR_BITS);
3092                 if (ret < 0) {
3093                     error_report("error while reading from new backing file");
3094                     goto out;
3095                 }
3096             }
3097 
3098             /* If they differ, we need to write to the COW file */
3099             uint64_t written = 0;
3100 
3101             while (written < n) {
3102                 int pnum;
3103 
3104                 if (compare_sectors(buf_old + written * 512,
3105                     buf_new + written * 512, n - written, &pnum))
3106                 {
3107                     ret = blk_pwrite(blk,
3108                                      (sector + written) << BDRV_SECTOR_BITS,
3109                                      buf_old + written * 512,
3110                                      pnum << BDRV_SECTOR_BITS, 0);
3111                     if (ret < 0) {
3112                         error_report("Error while writing to COW image: %s",
3113                             strerror(-ret));
3114                         goto out;
3115                     }
3116                 }
3117 
3118                 written += pnum;
3119             }
3120             qemu_progress_print(local_progress, 100);
3121         }
3122     }
3123 
3124     /*
3125      * Change the backing file. All clusters that are different from the old
3126      * backing file are overwritten in the COW file now, so the visible content
3127      * doesn't change when we switch the backing file.
3128      */
3129     if (out_baseimg && *out_baseimg) {
3130         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3131     } else {
3132         ret = bdrv_change_backing_file(bs, NULL, NULL);
3133     }
3134 
3135     if (ret == -ENOSPC) {
3136         error_report("Could not change the backing file to '%s': No "
3137                      "space left in the file header", out_baseimg);
3138     } else if (ret < 0) {
3139         error_report("Could not change the backing file to '%s': %s",
3140             out_baseimg, strerror(-ret));
3141     }
3142 
3143     qemu_progress_print(100, 0);
3144     /*
3145      * TODO At this point it is possible to check if any clusters that are
3146      * allocated in the COW file are the same in the backing file. If so, they
3147      * could be dropped from the COW file. Don't do this before switching the
3148      * backing file, in case of a crash this would lead to corruption.
3149      */
3150 out:
3151     qemu_progress_end();
3152     /* Cleanup */
3153     if (!unsafe) {
3154         blk_unref(blk_old_backing);
3155         blk_unref(blk_new_backing);
3156     }
3157     qemu_vfree(buf_old);
3158     qemu_vfree(buf_new);
3159 
3160     blk_unref(blk);
3161     if (ret) {
3162         return 1;
3163     }
3164     return 0;
3165 }
3166 
3167 static int img_resize(int argc, char **argv)
3168 {
3169     Error *err = NULL;
3170     int c, ret, relative;
3171     const char *filename, *fmt, *size;
3172     int64_t n, total_size;
3173     bool quiet = false;
3174     BlockBackend *blk = NULL;
3175     QemuOpts *param;
3176 
3177     static QemuOptsList resize_options = {
3178         .name = "resize_options",
3179         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3180         .desc = {
3181             {
3182                 .name = BLOCK_OPT_SIZE,
3183                 .type = QEMU_OPT_SIZE,
3184                 .help = "Virtual disk size"
3185             }, {
3186                 /* end of list */
3187             }
3188         },
3189     };
3190     bool image_opts = false;
3191 
3192     /* Remove size from argv manually so that negative numbers are not treated
3193      * as options by getopt. */
3194     if (argc < 3) {
3195         error_exit("Not enough arguments");
3196         return 1;
3197     }
3198 
3199     size = argv[--argc];
3200 
3201     /* Parse getopt arguments */
3202     fmt = NULL;
3203     for(;;) {
3204         static const struct option long_options[] = {
3205             {"help", no_argument, 0, 'h'},
3206             {"object", required_argument, 0, OPTION_OBJECT},
3207             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3208             {0, 0, 0, 0}
3209         };
3210         c = getopt_long(argc, argv, "f:hq",
3211                         long_options, NULL);
3212         if (c == -1) {
3213             break;
3214         }
3215         switch(c) {
3216         case '?':
3217         case 'h':
3218             help();
3219             break;
3220         case 'f':
3221             fmt = optarg;
3222             break;
3223         case 'q':
3224             quiet = true;
3225             break;
3226         case OPTION_OBJECT: {
3227             QemuOpts *opts;
3228             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3229                                            optarg, true);
3230             if (!opts) {
3231                 return 1;
3232             }
3233         }   break;
3234         case OPTION_IMAGE_OPTS:
3235             image_opts = true;
3236             break;
3237         }
3238     }
3239     if (optind != argc - 1) {
3240         error_exit("Expecting one image file name");
3241     }
3242     filename = argv[optind++];
3243 
3244     if (qemu_opts_foreach(&qemu_object_opts,
3245                           user_creatable_add_opts_foreach,
3246                           NULL, NULL)) {
3247         return 1;
3248     }
3249 
3250     /* Choose grow, shrink, or absolute resize mode */
3251     switch (size[0]) {
3252     case '+':
3253         relative = 1;
3254         size++;
3255         break;
3256     case '-':
3257         relative = -1;
3258         size++;
3259         break;
3260     default:
3261         relative = 0;
3262         break;
3263     }
3264 
3265     /* Parse size */
3266     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3267     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3268     if (err) {
3269         error_report_err(err);
3270         ret = -1;
3271         qemu_opts_del(param);
3272         goto out;
3273     }
3274     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3275     qemu_opts_del(param);
3276 
3277     blk = img_open(image_opts, filename, fmt,
3278                    BDRV_O_RDWR, false, quiet);
3279     if (!blk) {
3280         ret = -1;
3281         goto out;
3282     }
3283 
3284     if (relative) {
3285         total_size = blk_getlength(blk) + n * relative;
3286     } else {
3287         total_size = n;
3288     }
3289     if (total_size <= 0) {
3290         error_report("New image size must be positive");
3291         ret = -1;
3292         goto out;
3293     }
3294 
3295     ret = blk_truncate(blk, total_size);
3296     switch (ret) {
3297     case 0:
3298         qprintf(quiet, "Image resized.\n");
3299         break;
3300     case -ENOTSUP:
3301         error_report("This image does not support resize");
3302         break;
3303     case -EACCES:
3304         error_report("Image is read-only");
3305         break;
3306     default:
3307         error_report("Error resizing image: %s", strerror(-ret));
3308         break;
3309     }
3310 out:
3311     blk_unref(blk);
3312     if (ret) {
3313         return 1;
3314     }
3315     return 0;
3316 }
3317 
3318 static void amend_status_cb(BlockDriverState *bs,
3319                             int64_t offset, int64_t total_work_size,
3320                             void *opaque)
3321 {
3322     qemu_progress_print(100.f * offset / total_work_size, 0);
3323 }
3324 
3325 static int img_amend(int argc, char **argv)
3326 {
3327     Error *err = NULL;
3328     int c, ret = 0;
3329     char *options = NULL;
3330     QemuOptsList *create_opts = NULL;
3331     QemuOpts *opts = NULL;
3332     const char *fmt = NULL, *filename, *cache;
3333     int flags;
3334     bool writethrough;
3335     bool quiet = false, progress = false;
3336     BlockBackend *blk = NULL;
3337     BlockDriverState *bs = NULL;
3338     bool image_opts = false;
3339 
3340     cache = BDRV_DEFAULT_CACHE;
3341     for (;;) {
3342         static const struct option long_options[] = {
3343             {"help", no_argument, 0, 'h'},
3344             {"object", required_argument, 0, OPTION_OBJECT},
3345             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3346             {0, 0, 0, 0}
3347         };
3348         c = getopt_long(argc, argv, "ho:f:t:pq",
3349                         long_options, NULL);
3350         if (c == -1) {
3351             break;
3352         }
3353 
3354         switch (c) {
3355             case 'h':
3356             case '?':
3357                 help();
3358                 break;
3359             case 'o':
3360                 if (!is_valid_option_list(optarg)) {
3361                     error_report("Invalid option list: %s", optarg);
3362                     ret = -1;
3363                     goto out_no_progress;
3364                 }
3365                 if (!options) {
3366                     options = g_strdup(optarg);
3367                 } else {
3368                     char *old_options = options;
3369                     options = g_strdup_printf("%s,%s", options, optarg);
3370                     g_free(old_options);
3371                 }
3372                 break;
3373             case 'f':
3374                 fmt = optarg;
3375                 break;
3376             case 't':
3377                 cache = optarg;
3378                 break;
3379             case 'p':
3380                 progress = true;
3381                 break;
3382             case 'q':
3383                 quiet = true;
3384                 break;
3385             case OPTION_OBJECT:
3386                 opts = qemu_opts_parse_noisily(&qemu_object_opts,
3387                                                optarg, true);
3388                 if (!opts) {
3389                     ret = -1;
3390                     goto out_no_progress;
3391                 }
3392                 break;
3393             case OPTION_IMAGE_OPTS:
3394                 image_opts = true;
3395                 break;
3396         }
3397     }
3398 
3399     if (!options) {
3400         error_exit("Must specify options (-o)");
3401     }
3402 
3403     if (qemu_opts_foreach(&qemu_object_opts,
3404                           user_creatable_add_opts_foreach,
3405                           NULL, NULL)) {
3406         ret = -1;
3407         goto out_no_progress;
3408     }
3409 
3410     if (quiet) {
3411         progress = false;
3412     }
3413     qemu_progress_init(progress, 1.0);
3414 
3415     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
3416     if (fmt && has_help_option(options)) {
3417         /* If a format is explicitly specified (and possibly no filename is
3418          * given), print option help here */
3419         ret = print_block_option_help(filename, fmt);
3420         goto out;
3421     }
3422 
3423     if (optind != argc - 1) {
3424         error_report("Expecting one image file name");
3425         ret = -1;
3426         goto out;
3427     }
3428 
3429     flags = BDRV_O_RDWR;
3430     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3431     if (ret < 0) {
3432         error_report("Invalid cache option: %s", cache);
3433         goto out;
3434     }
3435 
3436     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3437     if (!blk) {
3438         ret = -1;
3439         goto out;
3440     }
3441     bs = blk_bs(blk);
3442 
3443     fmt = bs->drv->format_name;
3444 
3445     if (has_help_option(options)) {
3446         /* If the format was auto-detected, print option help here */
3447         ret = print_block_option_help(filename, fmt);
3448         goto out;
3449     }
3450 
3451     if (!bs->drv->create_opts) {
3452         error_report("Format driver '%s' does not support any options to amend",
3453                      fmt);
3454         ret = -1;
3455         goto out;
3456     }
3457 
3458     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
3459     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3460     qemu_opts_do_parse(opts, options, NULL, &err);
3461     if (err) {
3462         error_report_err(err);
3463         ret = -1;
3464         goto out;
3465     }
3466 
3467     /* In case the driver does not call amend_status_cb() */
3468     qemu_progress_print(0.f, 0);
3469     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL);
3470     qemu_progress_print(100.f, 0);
3471     if (ret < 0) {
3472         error_report("Error while amending options: %s", strerror(-ret));
3473         goto out;
3474     }
3475 
3476 out:
3477     qemu_progress_end();
3478 
3479 out_no_progress:
3480     blk_unref(blk);
3481     qemu_opts_del(opts);
3482     qemu_opts_free(create_opts);
3483     g_free(options);
3484 
3485     if (ret) {
3486         return 1;
3487     }
3488     return 0;
3489 }
3490 
3491 typedef struct BenchData {
3492     BlockBackend *blk;
3493     uint64_t image_size;
3494     bool write;
3495     int bufsize;
3496     int step;
3497     int nrreq;
3498     int n;
3499     int flush_interval;
3500     bool drain_on_flush;
3501     uint8_t *buf;
3502     QEMUIOVector *qiov;
3503 
3504     int in_flight;
3505     bool in_flush;
3506     uint64_t offset;
3507 } BenchData;
3508 
3509 static void bench_undrained_flush_cb(void *opaque, int ret)
3510 {
3511     if (ret < 0) {
3512         error_report("Failed flush request: %s", strerror(-ret));
3513         exit(EXIT_FAILURE);
3514     }
3515 }
3516 
3517 static void bench_cb(void *opaque, int ret)
3518 {
3519     BenchData *b = opaque;
3520     BlockAIOCB *acb;
3521 
3522     if (ret < 0) {
3523         error_report("Failed request: %s", strerror(-ret));
3524         exit(EXIT_FAILURE);
3525     }
3526 
3527     if (b->in_flush) {
3528         /* Just finished a flush with drained queue: Start next requests */
3529         assert(b->in_flight == 0);
3530         b->in_flush = false;
3531     } else if (b->in_flight > 0) {
3532         int remaining = b->n - b->in_flight;
3533 
3534         b->n--;
3535         b->in_flight--;
3536 
3537         /* Time for flush? Drain queue if requested, then flush */
3538         if (b->flush_interval && remaining % b->flush_interval == 0) {
3539             if (!b->in_flight || !b->drain_on_flush) {
3540                 BlockCompletionFunc *cb;
3541 
3542                 if (b->drain_on_flush) {
3543                     b->in_flush = true;
3544                     cb = bench_cb;
3545                 } else {
3546                     cb = bench_undrained_flush_cb;
3547                 }
3548 
3549                 acb = blk_aio_flush(b->blk, cb, b);
3550                 if (!acb) {
3551                     error_report("Failed to issue flush request");
3552                     exit(EXIT_FAILURE);
3553                 }
3554             }
3555             if (b->drain_on_flush) {
3556                 return;
3557             }
3558         }
3559     }
3560 
3561     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
3562         int64_t offset = b->offset;
3563         /* blk_aio_* might look for completed I/Os and kick bench_cb
3564          * again, so make sure this operation is counted by in_flight
3565          * and b->offset is ready for the next submission.
3566          */
3567         b->in_flight++;
3568         b->offset += b->step;
3569         b->offset %= b->image_size;
3570         if (b->write) {
3571             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
3572         } else {
3573             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
3574         }
3575         if (!acb) {
3576             error_report("Failed to issue request");
3577             exit(EXIT_FAILURE);
3578         }
3579     }
3580 }
3581 
3582 static int img_bench(int argc, char **argv)
3583 {
3584     int c, ret = 0;
3585     const char *fmt = NULL, *filename;
3586     bool quiet = false;
3587     bool image_opts = false;
3588     bool is_write = false;
3589     int count = 75000;
3590     int depth = 64;
3591     int64_t offset = 0;
3592     size_t bufsize = 4096;
3593     int pattern = 0;
3594     size_t step = 0;
3595     int flush_interval = 0;
3596     bool drain_on_flush = true;
3597     int64_t image_size;
3598     BlockBackend *blk = NULL;
3599     BenchData data = {};
3600     int flags = 0;
3601     bool writethrough = false;
3602     struct timeval t1, t2;
3603     int i;
3604 
3605     for (;;) {
3606         static const struct option long_options[] = {
3607             {"help", no_argument, 0, 'h'},
3608             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
3609             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3610             {"pattern", required_argument, 0, OPTION_PATTERN},
3611             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
3612             {0, 0, 0, 0}
3613         };
3614         c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
3615         if (c == -1) {
3616             break;
3617         }
3618 
3619         switch (c) {
3620         case 'h':
3621         case '?':
3622             help();
3623             break;
3624         case 'c':
3625         {
3626             unsigned long res;
3627 
3628             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3629                 error_report("Invalid request count specified");
3630                 return 1;
3631             }
3632             count = res;
3633             break;
3634         }
3635         case 'd':
3636         {
3637             unsigned long res;
3638 
3639             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3640                 error_report("Invalid queue depth specified");
3641                 return 1;
3642             }
3643             depth = res;
3644             break;
3645         }
3646         case 'f':
3647             fmt = optarg;
3648             break;
3649         case 'n':
3650             flags |= BDRV_O_NATIVE_AIO;
3651             break;
3652         case 'o':
3653         {
3654             char *end;
3655             errno = 0;
3656             offset = qemu_strtosz_suffix(optarg, &end,
3657                                          QEMU_STRTOSZ_DEFSUFFIX_B);
3658             if (offset < 0|| *end) {
3659                 error_report("Invalid offset specified");
3660                 return 1;
3661             }
3662             break;
3663         }
3664             break;
3665         case 'q':
3666             quiet = true;
3667             break;
3668         case 's':
3669         {
3670             int64_t sval;
3671             char *end;
3672 
3673             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3674             if (sval < 0 || sval > INT_MAX || *end) {
3675                 error_report("Invalid buffer size specified");
3676                 return 1;
3677             }
3678 
3679             bufsize = sval;
3680             break;
3681         }
3682         case 'S':
3683         {
3684             int64_t sval;
3685             char *end;
3686 
3687             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3688             if (sval < 0 || sval > INT_MAX || *end) {
3689                 error_report("Invalid step size specified");
3690                 return 1;
3691             }
3692 
3693             step = sval;
3694             break;
3695         }
3696         case 't':
3697             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
3698             if (ret < 0) {
3699                 error_report("Invalid cache mode");
3700                 ret = -1;
3701                 goto out;
3702             }
3703             break;
3704         case 'w':
3705             flags |= BDRV_O_RDWR;
3706             is_write = true;
3707             break;
3708         case OPTION_PATTERN:
3709         {
3710             unsigned long res;
3711 
3712             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
3713                 error_report("Invalid pattern byte specified");
3714                 return 1;
3715             }
3716             pattern = res;
3717             break;
3718         }
3719         case OPTION_FLUSH_INTERVAL:
3720         {
3721             unsigned long res;
3722 
3723             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3724                 error_report("Invalid flush interval specified");
3725                 return 1;
3726             }
3727             flush_interval = res;
3728             break;
3729         }
3730         case OPTION_NO_DRAIN:
3731             drain_on_flush = false;
3732             break;
3733         case OPTION_IMAGE_OPTS:
3734             image_opts = true;
3735             break;
3736         }
3737     }
3738 
3739     if (optind != argc - 1) {
3740         error_exit("Expecting one image file name");
3741     }
3742     filename = argv[argc - 1];
3743 
3744     if (!is_write && flush_interval) {
3745         error_report("--flush-interval is only available in write tests");
3746         ret = -1;
3747         goto out;
3748     }
3749     if (flush_interval && flush_interval < depth) {
3750         error_report("Flush interval can't be smaller than depth");
3751         ret = -1;
3752         goto out;
3753     }
3754 
3755     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3756     if (!blk) {
3757         ret = -1;
3758         goto out;
3759     }
3760 
3761     image_size = blk_getlength(blk);
3762     if (image_size < 0) {
3763         ret = image_size;
3764         goto out;
3765     }
3766 
3767     data = (BenchData) {
3768         .blk            = blk,
3769         .image_size     = image_size,
3770         .bufsize        = bufsize,
3771         .step           = step ?: bufsize,
3772         .nrreq          = depth,
3773         .n              = count,
3774         .offset         = offset,
3775         .write          = is_write,
3776         .flush_interval = flush_interval,
3777         .drain_on_flush = drain_on_flush,
3778     };
3779     printf("Sending %d %s requests, %d bytes each, %d in parallel "
3780            "(starting at offset %" PRId64 ", step size %d)\n",
3781            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
3782            data.offset, data.step);
3783     if (flush_interval) {
3784         printf("Sending flush every %d requests\n", flush_interval);
3785     }
3786 
3787     data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
3788     memset(data.buf, pattern, data.nrreq * data.bufsize);
3789 
3790     data.qiov = g_new(QEMUIOVector, data.nrreq);
3791     for (i = 0; i < data.nrreq; i++) {
3792         qemu_iovec_init(&data.qiov[i], 1);
3793         qemu_iovec_add(&data.qiov[i],
3794                        data.buf + i * data.bufsize, data.bufsize);
3795     }
3796 
3797     gettimeofday(&t1, NULL);
3798     bench_cb(&data, 0);
3799 
3800     while (data.n > 0) {
3801         main_loop_wait(false);
3802     }
3803     gettimeofday(&t2, NULL);
3804 
3805     printf("Run completed in %3.3f seconds.\n",
3806            (t2.tv_sec - t1.tv_sec)
3807            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
3808 
3809 out:
3810     qemu_vfree(data.buf);
3811     blk_unref(blk);
3812 
3813     if (ret) {
3814         return 1;
3815     }
3816     return 0;
3817 }
3818 
3819 #define C_BS      01
3820 #define C_COUNT   02
3821 #define C_IF      04
3822 #define C_OF      010
3823 #define C_SKIP    020
3824 
3825 struct DdInfo {
3826     unsigned int flags;
3827     int64_t count;
3828 };
3829 
3830 struct DdIo {
3831     int bsz;    /* Block size */
3832     char *filename;
3833     uint8_t *buf;
3834     int64_t offset;
3835 };
3836 
3837 struct DdOpts {
3838     const char *name;
3839     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
3840     unsigned int flag;
3841 };
3842 
3843 static int img_dd_bs(const char *arg,
3844                      struct DdIo *in, struct DdIo *out,
3845                      struct DdInfo *dd)
3846 {
3847     char *end;
3848     int64_t res;
3849 
3850     res = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3851 
3852     if (res <= 0 || res > INT_MAX || *end) {
3853         error_report("invalid number: '%s'", arg);
3854         return 1;
3855     }
3856     in->bsz = out->bsz = res;
3857 
3858     return 0;
3859 }
3860 
3861 static int img_dd_count(const char *arg,
3862                         struct DdIo *in, struct DdIo *out,
3863                         struct DdInfo *dd)
3864 {
3865     char *end;
3866 
3867     dd->count = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3868 
3869     if (dd->count < 0 || *end) {
3870         error_report("invalid number: '%s'", arg);
3871         return 1;
3872     }
3873 
3874     return 0;
3875 }
3876 
3877 static int img_dd_if(const char *arg,
3878                      struct DdIo *in, struct DdIo *out,
3879                      struct DdInfo *dd)
3880 {
3881     in->filename = g_strdup(arg);
3882 
3883     return 0;
3884 }
3885 
3886 static int img_dd_of(const char *arg,
3887                      struct DdIo *in, struct DdIo *out,
3888                      struct DdInfo *dd)
3889 {
3890     out->filename = g_strdup(arg);
3891 
3892     return 0;
3893 }
3894 
3895 static int img_dd_skip(const char *arg,
3896                        struct DdIo *in, struct DdIo *out,
3897                        struct DdInfo *dd)
3898 {
3899     char *end;
3900 
3901     in->offset = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3902 
3903     if (in->offset < 0 || *end) {
3904         error_report("invalid number: '%s'", arg);
3905         return 1;
3906     }
3907 
3908     return 0;
3909 }
3910 
3911 static int img_dd(int argc, char **argv)
3912 {
3913     int ret = 0;
3914     char *arg = NULL;
3915     char *tmp;
3916     BlockDriver *drv = NULL, *proto_drv = NULL;
3917     BlockBackend *blk1 = NULL, *blk2 = NULL;
3918     QemuOpts *opts = NULL;
3919     QemuOptsList *create_opts = NULL;
3920     Error *local_err = NULL;
3921     bool image_opts = false;
3922     int c, i;
3923     const char *out_fmt = "raw";
3924     const char *fmt = NULL;
3925     int64_t size = 0;
3926     int64_t block_count = 0, out_pos, in_pos;
3927     struct DdInfo dd = {
3928         .flags = 0,
3929         .count = 0,
3930     };
3931     struct DdIo in = {
3932         .bsz = 512, /* Block size is by default 512 bytes */
3933         .filename = NULL,
3934         .buf = NULL,
3935         .offset = 0
3936     };
3937     struct DdIo out = {
3938         .bsz = 512,
3939         .filename = NULL,
3940         .buf = NULL,
3941         .offset = 0
3942     };
3943 
3944     const struct DdOpts options[] = {
3945         { "bs", img_dd_bs, C_BS },
3946         { "count", img_dd_count, C_COUNT },
3947         { "if", img_dd_if, C_IF },
3948         { "of", img_dd_of, C_OF },
3949         { "skip", img_dd_skip, C_SKIP },
3950         { NULL, NULL, 0 }
3951     };
3952     const struct option long_options[] = {
3953         { "help", no_argument, 0, 'h'},
3954         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3955         { 0, 0, 0, 0 }
3956     };
3957 
3958     while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) {
3959         if (c == EOF) {
3960             break;
3961         }
3962         switch (c) {
3963         case 'O':
3964             out_fmt = optarg;
3965             break;
3966         case 'f':
3967             fmt = optarg;
3968             break;
3969         case '?':
3970             error_report("Try 'qemu-img --help' for more information.");
3971             ret = -1;
3972             goto out;
3973         case 'h':
3974             help();
3975             break;
3976         case OPTION_IMAGE_OPTS:
3977             image_opts = true;
3978             break;
3979         }
3980     }
3981 
3982     for (i = optind; i < argc; i++) {
3983         int j;
3984         arg = g_strdup(argv[i]);
3985 
3986         tmp = strchr(arg, '=');
3987         if (tmp == NULL) {
3988             error_report("unrecognized operand %s", arg);
3989             ret = -1;
3990             goto out;
3991         }
3992 
3993         *tmp++ = '\0';
3994 
3995         for (j = 0; options[j].name != NULL; j++) {
3996             if (!strcmp(arg, options[j].name)) {
3997                 break;
3998             }
3999         }
4000         if (options[j].name == NULL) {
4001             error_report("unrecognized operand %s", arg);
4002             ret = -1;
4003             goto out;
4004         }
4005 
4006         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4007             ret = -1;
4008             goto out;
4009         }
4010         dd.flags |= options[j].flag;
4011         g_free(arg);
4012         arg = NULL;
4013     }
4014 
4015     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4016         error_report("Must specify both input and output files");
4017         ret = -1;
4018         goto out;
4019     }
4020     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false);
4021 
4022     if (!blk1) {
4023         ret = -1;
4024         goto out;
4025     }
4026 
4027     drv = bdrv_find_format(out_fmt);
4028     if (!drv) {
4029         error_report("Unknown file format");
4030         ret = -1;
4031         goto out;
4032     }
4033     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4034 
4035     if (!proto_drv) {
4036         error_report_err(local_err);
4037         ret = -1;
4038         goto out;
4039     }
4040     if (!drv->create_opts) {
4041         error_report("Format driver '%s' does not support image creation",
4042                      drv->format_name);
4043         ret = -1;
4044         goto out;
4045     }
4046     if (!proto_drv->create_opts) {
4047         error_report("Protocol driver '%s' does not support image creation",
4048                      proto_drv->format_name);
4049         ret = -1;
4050         goto out;
4051     }
4052     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4053     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4054 
4055     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4056 
4057     size = blk_getlength(blk1);
4058     if (size < 0) {
4059         error_report("Failed to get size for '%s'", in.filename);
4060         ret = -1;
4061         goto out;
4062     }
4063 
4064     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4065         dd.count * in.bsz < size) {
4066         size = dd.count * in.bsz;
4067     }
4068 
4069     /* Overflow means the specified offset is beyond input image's size */
4070     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4071                               size < in.bsz * in.offset)) {
4072         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4073     } else {
4074         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4075                             size - in.bsz * in.offset, &error_abort);
4076     }
4077 
4078     ret = bdrv_create(drv, out.filename, opts, &local_err);
4079     if (ret < 0) {
4080         error_reportf_err(local_err,
4081                           "%s: error while creating output image: ",
4082                           out.filename);
4083         ret = -1;
4084         goto out;
4085     }
4086 
4087     blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
4088                     false, false);
4089 
4090     if (!blk2) {
4091         ret = -1;
4092         goto out;
4093     }
4094 
4095     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4096                               size < in.offset * in.bsz)) {
4097         /* We give a warning if the skip option is bigger than the input
4098          * size and create an empty output disk image (i.e. like dd(1)).
4099          */
4100         error_report("%s: cannot skip to specified offset", in.filename);
4101         in_pos = size;
4102     } else {
4103         in_pos = in.offset * in.bsz;
4104     }
4105 
4106     in.buf = g_new(uint8_t, in.bsz);
4107 
4108     for (out_pos = 0; in_pos < size; block_count++) {
4109         int in_ret, out_ret;
4110 
4111         if (in_pos + in.bsz > size) {
4112             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4113         } else {
4114             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4115         }
4116         if (in_ret < 0) {
4117             error_report("error while reading from input image file: %s",
4118                          strerror(-in_ret));
4119             ret = -1;
4120             goto out;
4121         }
4122         in_pos += in_ret;
4123 
4124         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4125 
4126         if (out_ret < 0) {
4127             error_report("error while writing to output image file: %s",
4128                          strerror(-out_ret));
4129             ret = -1;
4130             goto out;
4131         }
4132         out_pos += out_ret;
4133     }
4134 
4135 out:
4136     g_free(arg);
4137     qemu_opts_del(opts);
4138     qemu_opts_free(create_opts);
4139     blk_unref(blk1);
4140     blk_unref(blk2);
4141     g_free(in.filename);
4142     g_free(out.filename);
4143     g_free(in.buf);
4144     g_free(out.buf);
4145 
4146     if (ret) {
4147         return 1;
4148     }
4149     return 0;
4150 }
4151 
4152 
4153 static const img_cmd_t img_cmds[] = {
4154 #define DEF(option, callback, arg_string)        \
4155     { option, callback },
4156 #include "qemu-img-cmds.h"
4157 #undef DEF
4158 #undef GEN_DOCS
4159     { NULL, NULL, },
4160 };
4161 
4162 int main(int argc, char **argv)
4163 {
4164     const img_cmd_t *cmd;
4165     const char *cmdname;
4166     Error *local_error = NULL;
4167     char *trace_file = NULL;
4168     int c;
4169     static const struct option long_options[] = {
4170         {"help", no_argument, 0, 'h'},
4171         {"version", no_argument, 0, 'V'},
4172         {"trace", required_argument, NULL, 'T'},
4173         {0, 0, 0, 0}
4174     };
4175 
4176 #ifdef CONFIG_POSIX
4177     signal(SIGPIPE, SIG_IGN);
4178 #endif
4179 
4180     module_call_init(MODULE_INIT_TRACE);
4181     error_set_progname(argv[0]);
4182     qemu_init_exec_dir(argv[0]);
4183 
4184     if (qemu_init_main_loop(&local_error)) {
4185         error_report_err(local_error);
4186         exit(EXIT_FAILURE);
4187     }
4188 
4189     qcrypto_init(&error_fatal);
4190 
4191     module_call_init(MODULE_INIT_QOM);
4192     bdrv_init();
4193     if (argc < 2) {
4194         error_exit("Not enough arguments");
4195     }
4196 
4197     qemu_add_opts(&qemu_object_opts);
4198     qemu_add_opts(&qemu_source_opts);
4199     qemu_add_opts(&qemu_trace_opts);
4200 
4201     while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
4202         switch (c) {
4203         case 'h':
4204             help();
4205             return 0;
4206         case 'V':
4207             printf(QEMU_IMG_VERSION);
4208             return 0;
4209         case 'T':
4210             g_free(trace_file);
4211             trace_file = trace_opt_parse(optarg);
4212             break;
4213         }
4214     }
4215 
4216     cmdname = argv[optind];
4217 
4218     /* reset getopt_long scanning */
4219     argc -= optind;
4220     if (argc < 1) {
4221         return 0;
4222     }
4223     argv += optind;
4224     optind = 0;
4225 
4226     if (!trace_init_backends()) {
4227         exit(1);
4228     }
4229     trace_init_file(trace_file);
4230     qemu_set_log(LOG_TRACE);
4231 
4232     /* find the command */
4233     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
4234         if (!strcmp(cmdname, cmd->name)) {
4235             return cmd->handler(argc, argv);
4236         }
4237     }
4238 
4239     /* not found */
4240     error_exit("Command not found: %s", cmdname);
4241 }
4242