xref: /qemu/block/commit.c (revision 4b028cbe)
1747ff602SJeff Cody /*
2747ff602SJeff Cody  * Live block commit
3747ff602SJeff Cody  *
4747ff602SJeff Cody  * Copyright Red Hat, Inc. 2012
5747ff602SJeff Cody  *
6747ff602SJeff Cody  * Authors:
7747ff602SJeff Cody  *  Jeff Cody   <jcody@redhat.com>
8747ff602SJeff Cody  *  Based on stream.c by Stefan Hajnoczi
9747ff602SJeff Cody  *
10747ff602SJeff Cody  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11747ff602SJeff Cody  * See the COPYING.LIB file in the top-level directory.
12747ff602SJeff Cody  *
13747ff602SJeff Cody  */
14747ff602SJeff Cody 
1580c71a24SPeter Maydell #include "qemu/osdep.h"
16dcbf37ceSKevin Wolf #include "qemu/cutils.h"
17747ff602SJeff Cody #include "trace.h"
18737e150eSPaolo Bonzini #include "block/block_int.h"
19c87621eaSJohn Snow #include "block/blockjob_int.h"
20da34e65cSMarkus Armbruster #include "qapi/error.h"
21747ff602SJeff Cody #include "qemu/ratelimit.h"
225df022cfSPeter Maydell #include "qemu/memalign.h"
23373340b2SMax Reitz #include "sysemu/block-backend.h"
24747ff602SJeff Cody 
25747ff602SJeff Cody enum {
26747ff602SJeff Cody     /*
27747ff602SJeff Cody      * Size of data buffer for populating the image file.  This should be large
28747ff602SJeff Cody      * enough to process multiple clusters in a single call, so that populating
29747ff602SJeff Cody      * contiguous regions of the image is efficient.
30747ff602SJeff Cody      */
31747ff602SJeff Cody     COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
32747ff602SJeff Cody };
33747ff602SJeff Cody 
34747ff602SJeff Cody typedef struct CommitBlockJob {
35747ff602SJeff Cody     BlockJob common;
368dfba279SKevin Wolf     BlockDriverState *commit_top_bs;
374653456aSKevin Wolf     BlockBackend *top;
384653456aSKevin Wolf     BlockBackend *base;
3922dffcbeSJohn Snow     BlockDriverState *base_bs;
409a71b9deSMax Reitz     BlockDriverState *base_overlay;
4192aa5c6dSPaolo Bonzini     BlockdevOnError on_error;
42e70cdc57SAlberto Garcia     bool base_read_only;
43df827336SAlberto Garcia     bool chain_frozen;
4454e26900SJeff Cody     char *backing_file_str;
454b028cbeSPeter Krempa     bool backing_mask_protocol;
46747ff602SJeff Cody } CommitBlockJob;
47747ff602SJeff Cody 
commit_prepare(Job * job)4822dffcbeSJohn Snow static int commit_prepare(Job *job)
49747ff602SJeff Cody {
501908a559SKevin Wolf     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
5119ebd13eSKevin Wolf 
529275fc72SKevin Wolf     bdrv_graph_rdlock_main_loop();
53df827336SAlberto Garcia     bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
54df827336SAlberto Garcia     s->chain_frozen = false;
559275fc72SKevin Wolf     bdrv_graph_rdunlock_main_loop();
56df827336SAlberto Garcia 
578dfba279SKevin Wolf     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
588dfba279SKevin Wolf      * the normal backing chain can be restored. */
598dfba279SKevin Wolf     blk_unref(s->base);
6022dffcbeSJohn Snow     s->base = NULL;
619e85cd5cSStefan Hajnoczi 
6222dffcbeSJohn Snow     /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
6322dffcbeSJohn Snow      * identically. Further work is needed to disambiguate these cases. */
6422dffcbeSJohn Snow     return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
654b028cbeSPeter Krempa                                   s->backing_file_str,
664b028cbeSPeter Krempa                                   s->backing_mask_protocol);
6722dffcbeSJohn Snow }
6822dffcbeSJohn Snow 
commit_abort(Job * job)6922dffcbeSJohn Snow static void commit_abort(Job *job)
7022dffcbeSJohn Snow {
7122dffcbeSJohn Snow     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
7222dffcbeSJohn Snow     BlockDriverState *top_bs = blk_bs(s->top);
73ccd6a379SKevin Wolf     BlockDriverState *commit_top_backing_bs;
7422dffcbeSJohn Snow 
75df827336SAlberto Garcia     if (s->chain_frozen) {
769275fc72SKevin Wolf         bdrv_graph_rdlock_main_loop();
77df827336SAlberto Garcia         bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
789275fc72SKevin Wolf         bdrv_graph_rdunlock_main_loop();
79df827336SAlberto Garcia     }
80df827336SAlberto Garcia 
8122dffcbeSJohn Snow     /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
8222dffcbeSJohn Snow     bdrv_ref(top_bs);
8322dffcbeSJohn Snow     bdrv_ref(s->commit_top_bs);
8422dffcbeSJohn Snow 
8522dffcbeSJohn Snow     if (s->base) {
8622dffcbeSJohn Snow         blk_unref(s->base);
8722dffcbeSJohn Snow     }
8822dffcbeSJohn Snow 
8922dffcbeSJohn Snow     /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
9022dffcbeSJohn Snow      * can succeed */
9122dffcbeSJohn Snow     block_job_remove_all_bdrv(&s->common);
9222dffcbeSJohn Snow 
9322dffcbeSJohn Snow     /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
9422dffcbeSJohn Snow      * commit filter driver from the backing chain now. Do this as the final
9522dffcbeSJohn Snow      * step so that the 'consistent read' permission can be granted.
9622dffcbeSJohn Snow      *
9722dffcbeSJohn Snow      * XXX Can (or should) we somehow keep 'consistent read' blocked even
988dfba279SKevin Wolf      * after the failed/cancelled commit job is gone? If we already wrote
998dfba279SKevin Wolf      * something to base, the intermediate images aren't valid any more. */
100004915a9SKevin Wolf     bdrv_graph_rdlock_main_loop();
101ccd6a379SKevin Wolf     commit_top_backing_bs = s->commit_top_bs->backing->bs;
102004915a9SKevin Wolf     bdrv_graph_rdunlock_main_loop();
103004915a9SKevin Wolf 
104ccd6a379SKevin Wolf     bdrv_drained_begin(commit_top_backing_bs);
1056bc30f19SStefan Hajnoczi     bdrv_graph_wrlock();
106ccd6a379SKevin Wolf     bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
1076bc30f19SStefan Hajnoczi     bdrv_graph_wrunlock();
108ccd6a379SKevin Wolf     bdrv_drained_end(commit_top_backing_bs);
10922dffcbeSJohn Snow 
11022dffcbeSJohn Snow     bdrv_unref(s->commit_top_bs);
11122dffcbeSJohn Snow     bdrv_unref(top_bs);
1129e85cd5cSStefan Hajnoczi }
1139e85cd5cSStefan Hajnoczi 
commit_clean(Job * job)11422dffcbeSJohn Snow static void commit_clean(Job *job)
11522dffcbeSJohn Snow {
11622dffcbeSJohn Snow     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
11722dffcbeSJohn Snow 
1189e85cd5cSStefan Hajnoczi     /* restore base open flags here if appropriate (e.g., change the base back
1199e85cd5cSStefan Hajnoczi      * to r/o). These reopens do not need to be atomic, since we won't abort
1209e85cd5cSStefan Hajnoczi      * even on failure here */
121e70cdc57SAlberto Garcia     if (s->base_read_only) {
122e70cdc57SAlberto Garcia         bdrv_reopen_set_read_only(s->base_bs, true, NULL);
1239e85cd5cSStefan Hajnoczi     }
12422dffcbeSJohn Snow 
1259e85cd5cSStefan Hajnoczi     g_free(s->backing_file_str);
1264653456aSKevin Wolf     blk_unref(s->top);
1279e85cd5cSStefan Hajnoczi }
1289e85cd5cSStefan Hajnoczi 
commit_run(Job * job,Error ** errp)129f67432a2SJohn Snow static int coroutine_fn commit_run(Job *job, Error **errp)
1309e85cd5cSStefan Hajnoczi {
131f67432a2SJohn Snow     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
132317a6676SEric Blake     int64_t offset;
133747ff602SJeff Cody     int ret = 0;
13451b0a488SEric Blake     int64_t n = 0; /* bytes */
13571701708SVladimir Sementsov-Ogievskiy     QEMU_AUTO_VFREE void *buf = NULL;
13605df8a6aSKevin Wolf     int64_t len, base_len;
137747ff602SJeff Cody 
138c86422c5SEmanuele Giuseppe Esposito     len = blk_co_getlength(s->top);
13905df8a6aSKevin Wolf     if (len < 0) {
14071701708SVladimir Sementsov-Ogievskiy         return len;
141747ff602SJeff Cody     }
14230a5c887SKevin Wolf     job_progress_set_remaining(&s->common.job, len);
143747ff602SJeff Cody 
144c86422c5SEmanuele Giuseppe Esposito     base_len = blk_co_getlength(s->base);
145747ff602SJeff Cody     if (base_len < 0) {
14671701708SVladimir Sementsov-Ogievskiy         return base_len;
147747ff602SJeff Cody     }
148747ff602SJeff Cody 
14905df8a6aSKevin Wolf     if (base_len < len) {
150a0667887SAlberto Faria         ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
151747ff602SJeff Cody         if (ret) {
15271701708SVladimir Sementsov-Ogievskiy             return ret;
153747ff602SJeff Cody         }
154747ff602SJeff Cody     }
155747ff602SJeff Cody 
1564653456aSKevin Wolf     buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
157747ff602SJeff Cody 
15805df8a6aSKevin Wolf     for (offset = 0; offset < len; offset += n) {
159747ff602SJeff Cody         bool copy;
1609ad1e79fSKevin Wolf         bool error_in_source = true;
161747ff602SJeff Cody 
162747ff602SJeff Cody         /* Note that even when no rate limit is applied we need to yield
163c57b6656SKevin Wolf          * with no pending I/O here so that bdrv_drain_all() returns.
164747ff602SJeff Cody          */
165018e5987SKevin Wolf         block_job_ratelimit_sleep(&s->common);
166daa7f2f9SKevin Wolf         if (job_is_cancelled(&s->common.job)) {
167747ff602SJeff Cody             break;
168747ff602SJeff Cody         }
169747ff602SJeff Cody         /* Copy if allocated above the base */
170ff7e261bSEmanuele Giuseppe Esposito         ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
17151b0a488SEric Blake                                         offset, COMMIT_BUFFER_SIZE, &n);
172a92b1b06SEric Blake         copy = (ret > 0);
17351b0a488SEric Blake         trace_commit_one_iteration(s, offset, n, ret);
174747ff602SJeff Cody         if (copy) {
1750c42e175SKevin Wolf             assert(n < SIZE_MAX);
1760c42e175SKevin Wolf 
1770c42e175SKevin Wolf             ret = blk_co_pread(s->top, offset, n, buf, 0);
1780c42e175SKevin Wolf             if (ret >= 0) {
1790c42e175SKevin Wolf                 ret = blk_co_pwrite(s->base, offset, n, buf, 0);
1809ad1e79fSKevin Wolf                 if (ret < 0) {
1819ad1e79fSKevin Wolf                     error_in_source = false;
1829ad1e79fSKevin Wolf                 }
1830c42e175SKevin Wolf             }
184747ff602SJeff Cody         }
185747ff602SJeff Cody         if (ret < 0) {
1861e8fb7f1SKevin Wolf             BlockErrorAction action =
1879ad1e79fSKevin Wolf                 block_job_error_action(&s->common, s->on_error,
1889ad1e79fSKevin Wolf                                        error_in_source, -ret);
1891e8fb7f1SKevin Wolf             if (action == BLOCK_ERROR_ACTION_REPORT) {
19071701708SVladimir Sementsov-Ogievskiy                 return ret;
191747ff602SJeff Cody             } else {
192747ff602SJeff Cody                 n = 0;
193747ff602SJeff Cody                 continue;
194747ff602SJeff Cody             }
195747ff602SJeff Cody         }
196747ff602SJeff Cody         /* Publish progress */
19730a5c887SKevin Wolf         job_progress_update(&s->common.job, n);
198f14a39ccSSascha Silbe 
199dee81d51SKevin Wolf         if (copy) {
200018e5987SKevin Wolf             block_job_ratelimit_processed_bytes(&s->common, n);
201f14a39ccSSascha Silbe         }
202747ff602SJeff Cody     }
203747ff602SJeff Cody 
20471701708SVladimir Sementsov-Ogievskiy     return 0;
205747ff602SJeff Cody }
206747ff602SJeff Cody 
2073fc4b10aSFam Zheng static const BlockJobDriver commit_job_driver = {
20833e9e9bdSKevin Wolf     .job_driver = {
209747ff602SJeff Cody         .instance_size = sizeof(CommitBlockJob),
2108e4c8700SKevin Wolf         .job_type      = JOB_TYPE_COMMIT,
21180fa2c75SKevin Wolf         .free          = block_job_free,
212b15de828SKevin Wolf         .user_resume   = block_job_user_resume,
213f67432a2SJohn Snow         .run           = commit_run,
21422dffcbeSJohn Snow         .prepare       = commit_prepare,
21522dffcbeSJohn Snow         .abort         = commit_abort,
21622dffcbeSJohn Snow         .clean         = commit_clean
217da01ff7fSKevin Wolf     },
218747ff602SJeff Cody };
219747ff602SJeff Cody 
220b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
bdrv_commit_top_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)221b9b10c35SKevin Wolf bdrv_commit_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
222b9b10c35SKevin Wolf                        QEMUIOVector *qiov, BdrvRequestFlags flags)
2238dfba279SKevin Wolf {
2248dfba279SKevin Wolf     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
2258dfba279SKevin Wolf }
2268dfba279SKevin Wolf 
bdrv_commit_top_refresh_filename(BlockDriverState * bs)227004915a9SKevin Wolf static GRAPH_RDLOCK void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
228dcbf37ceSKevin Wolf {
229dcbf37ceSKevin Wolf     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
230dcbf37ceSKevin Wolf             bs->backing->bs->filename);
231dcbf37ceSKevin Wolf }
23291965658SKevin Wolf 
bdrv_commit_top_child_perm(BlockDriverState * bs,BdrvChild * c,BdrvChildRole role,BlockReopenQueue * reopen_queue,uint64_t perm,uint64_t shared,uint64_t * nperm,uint64_t * nshared)2338dfba279SKevin Wolf static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
234bf8e925eSMax Reitz                                        BdrvChildRole role,
235e0995dc3SKevin Wolf                                        BlockReopenQueue *reopen_queue,
2368dfba279SKevin Wolf                                        uint64_t perm, uint64_t shared,
2378dfba279SKevin Wolf                                        uint64_t *nperm, uint64_t *nshared)
2388dfba279SKevin Wolf {
2398dfba279SKevin Wolf     *nperm = 0;
2408dfba279SKevin Wolf     *nshared = BLK_PERM_ALL;
2418dfba279SKevin Wolf }
2428dfba279SKevin Wolf 
2438dfba279SKevin Wolf /* Dummy node that provides consistent read to its users without requiring it
2448dfba279SKevin Wolf  * from its backing file and that allows writes on the backing file chain. */
2458dfba279SKevin Wolf static BlockDriver bdrv_commit_top = {
2468dfba279SKevin Wolf     .format_name                = "commit_top",
2478dfba279SKevin Wolf     .bdrv_co_preadv             = bdrv_commit_top_preadv,
248dcbf37ceSKevin Wolf     .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
2498dfba279SKevin Wolf     .bdrv_child_perm            = bdrv_commit_top_child_perm,
2506540fd15SMax Reitz 
2516540fd15SMax Reitz     .is_filter                  = true,
252046fd84fSVladimir Sementsov-Ogievskiy     .filtered_child_is_backing  = true,
2538dfba279SKevin Wolf };
2548dfba279SKevin Wolf 
commit_start(const char * job_id,BlockDriverState * bs,BlockDriverState * base,BlockDriverState * top,int creation_flags,int64_t speed,BlockdevOnError on_error,const char * backing_file_str,bool backing_mask_protocol,const char * filter_node_name,Error ** errp)255fd62c609SAlberto Garcia void commit_start(const char *job_id, BlockDriverState *bs,
2565360782dSJohn Snow                   BlockDriverState *base, BlockDriverState *top,
2575360782dSJohn Snow                   int creation_flags, int64_t speed,
2588254b6d9SJohn Snow                   BlockdevOnError on_error, const char *backing_file_str,
2594b028cbeSPeter Krempa                   bool backing_mask_protocol,
2600db832f4SKevin Wolf                   const char *filter_node_name, Error **errp)
261747ff602SJeff Cody {
262747ff602SJeff Cody     CommitBlockJob *s;
2633e4c5122SAlberto Garcia     BlockDriverState *iter;
2648dfba279SKevin Wolf     BlockDriverState *commit_top_bs = NULL;
2659a71b9deSMax Reitz     BlockDriverState *filtered_base;
2669a71b9deSMax Reitz     int64_t base_size, top_size;
2679a71b9deSMax Reitz     uint64_t base_perms, iter_shared_perms;
268d7086422SKevin Wolf     int ret;
269747ff602SJeff Cody 
270b4ad82aaSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
271b4ad82aaSEmanuele Giuseppe Esposito 
27218da7f94SFam Zheng     assert(top != bs);
273ad74751fSKevin Wolf     bdrv_graph_rdlock_main_loop();
2749a71b9deSMax Reitz     if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
275747ff602SJeff Cody         error_setg(errp, "Invalid files for merge: top and base are the same");
276ad74751fSKevin Wolf         bdrv_graph_rdunlock_main_loop();
277747ff602SJeff Cody         return;
278747ff602SJeff Cody     }
279ad74751fSKevin Wolf     bdrv_graph_rdunlock_main_loop();
280747ff602SJeff Cody 
2819a71b9deSMax Reitz     base_size = bdrv_getlength(base);
2829a71b9deSMax Reitz     if (base_size < 0) {
2839a71b9deSMax Reitz         error_setg_errno(errp, -base_size, "Could not inquire base image size");
2849a71b9deSMax Reitz         return;
2859a71b9deSMax Reitz     }
2869a71b9deSMax Reitz 
2879a71b9deSMax Reitz     top_size = bdrv_getlength(top);
2889a71b9deSMax Reitz     if (top_size < 0) {
2899a71b9deSMax Reitz         error_setg_errno(errp, -top_size, "Could not inquire top image size");
2909a71b9deSMax Reitz         return;
2919a71b9deSMax Reitz     }
2929a71b9deSMax Reitz 
2939a71b9deSMax Reitz     base_perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
2949a71b9deSMax Reitz     if (base_size < top_size) {
2959a71b9deSMax Reitz         base_perms |= BLK_PERM_RESIZE;
2969a71b9deSMax Reitz     }
2979a71b9deSMax Reitz 
29875859b94SJohn Snow     s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
2995360782dSJohn Snow                          speed, creation_flags, NULL, NULL, errp);
300834fe28dSAlberto Garcia     if (!s) {
301834fe28dSAlberto Garcia         return;
302834fe28dSAlberto Garcia     }
303834fe28dSAlberto Garcia 
304bde70715SKevin Wolf     /* convert base to r/w, if necessary */
305e70cdc57SAlberto Garcia     s->base_read_only = bdrv_is_read_only(base);
306e70cdc57SAlberto Garcia     if (s->base_read_only) {
307e70cdc57SAlberto Garcia         if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
308d7086422SKevin Wolf             goto fail;
309747ff602SJeff Cody         }
310747ff602SJeff Cody     }
311747ff602SJeff Cody 
3128dfba279SKevin Wolf     /* Insert commit_top block node above top, so we can block consistent read
3138dfba279SKevin Wolf      * on the backing chain below it */
3140db832f4SKevin Wolf     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
3150db832f4SKevin Wolf                                          errp);
3168dfba279SKevin Wolf     if (commit_top_bs == NULL) {
3178dfba279SKevin Wolf         goto fail;
3188dfba279SKevin Wolf     }
319d3c8c674SKevin Wolf     if (!filter_node_name) {
320d3c8c674SKevin Wolf         commit_top_bs->implicit = true;
321d3c8c674SKevin Wolf     }
322e5182c1cSMax Reitz 
323e5182c1cSMax Reitz     /* So that we can always drop this node */
324e5182c1cSMax Reitz     commit_top_bs->never_freeze = true;
325e5182c1cSMax Reitz 
3260d0676a1SKevin Wolf     commit_top_bs->total_sectors = top->total_sectors;
3278dfba279SKevin Wolf 
328934aee14SVladimir Sementsov-Ogievskiy     ret = bdrv_append(commit_top_bs, top, errp);
329ae9d4417SVladimir Sementsov-Ogievskiy     bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
330934aee14SVladimir Sementsov-Ogievskiy     if (ret < 0) {
331b69f00ddSFam Zheng         commit_top_bs = NULL;
332b69f00ddSFam Zheng         goto fail;
333b69f00ddSFam Zheng     }
3348dfba279SKevin Wolf 
3358dfba279SKevin Wolf     s->commit_top_bs = commit_top_bs;
336747ff602SJeff Cody 
3379a71b9deSMax Reitz     /*
3389a71b9deSMax Reitz      * Block all nodes between top and base, because they will
3399a71b9deSMax Reitz      * disappear from the chain after this operation.
3409a71b9deSMax Reitz      * Note that this assumes that the user is fine with removing all
3419a71b9deSMax Reitz      * nodes (including R/W filters) between top and base.  Assuring
3429a71b9deSMax Reitz      * this is the responsibility of the interface (i.e. whoever calls
3439a71b9deSMax Reitz      * commit_start()).
3449a71b9deSMax Reitz      */
3456bc30f19SStefan Hajnoczi     bdrv_graph_wrlock();
3469a71b9deSMax Reitz     s->base_overlay = bdrv_find_overlay(top, base);
3479a71b9deSMax Reitz     assert(s->base_overlay);
3489a71b9deSMax Reitz 
3499a71b9deSMax Reitz     /*
3509a71b9deSMax Reitz      * The topmost node with
3519a71b9deSMax Reitz      * bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base)
3529a71b9deSMax Reitz      */
3539a71b9deSMax Reitz     filtered_base = bdrv_cow_bs(s->base_overlay);
3549a71b9deSMax Reitz     assert(bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base));
3559a71b9deSMax Reitz 
3569a71b9deSMax Reitz     /*
3579a71b9deSMax Reitz      * XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
3588dfba279SKevin Wolf      * at s->base (if writes are blocked for a node, they are also blocked
3598dfba279SKevin Wolf      * for its backing file). The other options would be a second filter
3609a71b9deSMax Reitz      * driver above s->base.
3619a71b9deSMax Reitz      */
3629a71b9deSMax Reitz     iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
3639a71b9deSMax Reitz 
3649a71b9deSMax Reitz     for (iter = top; iter != base; iter = bdrv_filter_or_cow_bs(iter)) {
3659a71b9deSMax Reitz         if (iter == filtered_base) {
3669a71b9deSMax Reitz             /*
3679a71b9deSMax Reitz              * From here on, all nodes are filters on the base.  This
3689a71b9deSMax Reitz              * allows us to share BLK_PERM_CONSISTENT_READ.
3699a71b9deSMax Reitz              */
3709a71b9deSMax Reitz             iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
3719a71b9deSMax Reitz         }
3729a71b9deSMax Reitz 
3738dfba279SKevin Wolf         ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
3749a71b9deSMax Reitz                                  iter_shared_perms, errp);
3758dfba279SKevin Wolf         if (ret < 0) {
3766bc30f19SStefan Hajnoczi             bdrv_graph_wrunlock();
3778dfba279SKevin Wolf             goto fail;
3783e4c5122SAlberto Garcia         }
3793e4c5122SAlberto Garcia     }
3803e4c5122SAlberto Garcia 
381df827336SAlberto Garcia     if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
3826bc30f19SStefan Hajnoczi         bdrv_graph_wrunlock();
383df827336SAlberto Garcia         goto fail;
384df827336SAlberto Garcia     }
385df827336SAlberto Garcia     s->chain_frozen = true;
386df827336SAlberto Garcia 
3878dfba279SKevin Wolf     ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
3886bc30f19SStefan Hajnoczi     bdrv_graph_wrunlock();
389f3bbc53dSKevin Wolf 
3908dfba279SKevin Wolf     if (ret < 0) {
3918dfba279SKevin Wolf         goto fail;
3928dfba279SKevin Wolf     }
3938dfba279SKevin Wolf 
394d861ab3aSKevin Wolf     s->base = blk_new(s->common.job.aio_context,
3959a71b9deSMax Reitz                       base_perms,
3968dfba279SKevin Wolf                       BLK_PERM_CONSISTENT_READ
3978dfba279SKevin Wolf                       | BLK_PERM_WRITE_UNCHANGED);
398d7086422SKevin Wolf     ret = blk_insert_bs(s->base, base, errp);
399d7086422SKevin Wolf     if (ret < 0) {
400d7086422SKevin Wolf         goto fail;
401d7086422SKevin Wolf     }
402cf312932SKevin Wolf     blk_set_disable_request_queuing(s->base, true);
40322dffcbeSJohn Snow     s->base_bs = base;
4044653456aSKevin Wolf 
4058dfba279SKevin Wolf     /* Required permissions are already taken with block_job_add_bdrv() */
406d861ab3aSKevin Wolf     s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
407b247767aSKevin Wolf     ret = blk_insert_bs(s->top, top, errp);
408d7086422SKevin Wolf     if (ret < 0) {
409d7086422SKevin Wolf         goto fail;
410d7086422SKevin Wolf     }
411cf312932SKevin Wolf     blk_set_disable_request_queuing(s->top, true);
4124653456aSKevin Wolf 
41354e26900SJeff Cody     s->backing_file_str = g_strdup(backing_file_str);
4144b028cbeSPeter Krempa     s->backing_mask_protocol = backing_mask_protocol;
415747ff602SJeff Cody     s->on_error = on_error;
416747ff602SJeff Cody 
4175ccac6f1SJohn Snow     trace_commit_start(bs, base, top, s);
418da01ff7fSKevin Wolf     job_start(&s->common.job);
419d7086422SKevin Wolf     return;
420d7086422SKevin Wolf 
421d7086422SKevin Wolf fail:
422df827336SAlberto Garcia     if (s->chain_frozen) {
4239275fc72SKevin Wolf         bdrv_graph_rdlock_main_loop();
424df827336SAlberto Garcia         bdrv_unfreeze_backing_chain(commit_top_bs, base);
4259275fc72SKevin Wolf         bdrv_graph_rdunlock_main_loop();
426df827336SAlberto Garcia     }
427d7086422SKevin Wolf     if (s->base) {
428d7086422SKevin Wolf         blk_unref(s->base);
429d7086422SKevin Wolf     }
430d7086422SKevin Wolf     if (s->top) {
431d7086422SKevin Wolf         blk_unref(s->top);
432d7086422SKevin Wolf     }
433065abf9fSAlberto Garcia     if (s->base_read_only) {
434065abf9fSAlberto Garcia         bdrv_reopen_set_read_only(base, true, NULL);
435065abf9fSAlberto Garcia     }
4362468eed3SAlberto Garcia     job_early_fail(&s->common.job);
4372468eed3SAlberto Garcia     /* commit_top_bs has to be replaced after deleting the block job,
4382468eed3SAlberto Garcia      * otherwise this would fail because of lack of permissions. */
4398dfba279SKevin Wolf     if (commit_top_bs) {
440ccd6a379SKevin Wolf         bdrv_drained_begin(top);
4416bc30f19SStefan Hajnoczi         bdrv_graph_wrlock();
442bde70715SKevin Wolf         bdrv_replace_node(commit_top_bs, top, &error_abort);
4436bc30f19SStefan Hajnoczi         bdrv_graph_wrunlock();
444ccd6a379SKevin Wolf         bdrv_drained_end(top);
4458dfba279SKevin Wolf     }
446747ff602SJeff Cody }
44783fd6dd3SKevin Wolf 
44883fd6dd3SKevin Wolf 
449d6a644bbSEric Blake #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
45083fd6dd3SKevin Wolf 
45183fd6dd3SKevin Wolf /* commit COW file into the raw image */
bdrv_commit(BlockDriverState * bs)45283fd6dd3SKevin Wolf int bdrv_commit(BlockDriverState *bs)
45383fd6dd3SKevin Wolf {
454f8e2bd53SKevin Wolf     BlockBackend *src, *backing;
455d3f06759SKevin Wolf     BlockDriverState *backing_file_bs = NULL;
456d3f06759SKevin Wolf     BlockDriverState *commit_top_bs = NULL;
45783fd6dd3SKevin Wolf     BlockDriver *drv = bs->drv;
458d861ab3aSKevin Wolf     AioContext *ctx;
459d6a644bbSEric Blake     int64_t offset, length, backing_length;
460c742a364SAlberto Garcia     int ro;
461d6a644bbSEric Blake     int64_t n;
46283fd6dd3SKevin Wolf     int ret = 0;
46371701708SVladimir Sementsov-Ogievskiy     QEMU_AUTO_VFREE uint8_t *buf = NULL;
464d3f06759SKevin Wolf     Error *local_err = NULL;
46583fd6dd3SKevin Wolf 
466f791bf7fSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
467277f2007SKevin Wolf     GRAPH_RDLOCK_GUARD_MAINLOOP();
468f791bf7fSEmanuele Giuseppe Esposito 
46983fd6dd3SKevin Wolf     if (!drv)
47083fd6dd3SKevin Wolf         return -ENOMEDIUM;
47183fd6dd3SKevin Wolf 
4729a71b9deSMax Reitz     backing_file_bs = bdrv_cow_bs(bs);
4739a71b9deSMax Reitz 
4749a71b9deSMax Reitz     if (!backing_file_bs) {
47583fd6dd3SKevin Wolf         return -ENOTSUP;
47683fd6dd3SKevin Wolf     }
47783fd6dd3SKevin Wolf 
47883fd6dd3SKevin Wolf     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
4799a71b9deSMax Reitz         bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL))
4809a71b9deSMax Reitz     {
48183fd6dd3SKevin Wolf         return -EBUSY;
48283fd6dd3SKevin Wolf     }
48383fd6dd3SKevin Wolf 
484307261b2SVladimir Sementsov-Ogievskiy     ro = bdrv_is_read_only(backing_file_bs);
48583fd6dd3SKevin Wolf 
48683fd6dd3SKevin Wolf     if (ro) {
4879a71b9deSMax Reitz         if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
48883fd6dd3SKevin Wolf             return -EACCES;
48983fd6dd3SKevin Wolf         }
49083fd6dd3SKevin Wolf     }
49183fd6dd3SKevin Wolf 
492d861ab3aSKevin Wolf     ctx = bdrv_get_aio_context(bs);
4932d97fde4SMax Reitz     /* WRITE_UNCHANGED is required for bdrv_make_empty() */
4942d97fde4SMax Reitz     src = blk_new(ctx, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
4952d97fde4SMax Reitz                   BLK_PERM_ALL);
496d861ab3aSKevin Wolf     backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
497d7086422SKevin Wolf 
498d3f06759SKevin Wolf     ret = blk_insert_bs(src, bs, &local_err);
499d7086422SKevin Wolf     if (ret < 0) {
500d3f06759SKevin Wolf         error_report_err(local_err);
501d7086422SKevin Wolf         goto ro_cleanup;
502d7086422SKevin Wolf     }
503d7086422SKevin Wolf 
504d3f06759SKevin Wolf     /* Insert commit_top block node above backing, so we can write to it */
505d3f06759SKevin Wolf     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
506d3f06759SKevin Wolf                                          &local_err);
507d3f06759SKevin Wolf     if (commit_top_bs == NULL) {
508d3f06759SKevin Wolf         error_report_err(local_err);
509d3f06759SKevin Wolf         goto ro_cleanup;
510d3f06759SKevin Wolf     }
511d3f06759SKevin Wolf 
51212fa4af6SKevin Wolf     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
51312fa4af6SKevin Wolf     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
514d3f06759SKevin Wolf 
515d3f06759SKevin Wolf     ret = blk_insert_bs(backing, backing_file_bs, &local_err);
516d7086422SKevin Wolf     if (ret < 0) {
517d3f06759SKevin Wolf         error_report_err(local_err);
518d7086422SKevin Wolf         goto ro_cleanup;
519d7086422SKevin Wolf     }
520f8e2bd53SKevin Wolf 
521f8e2bd53SKevin Wolf     length = blk_getlength(src);
52283fd6dd3SKevin Wolf     if (length < 0) {
52383fd6dd3SKevin Wolf         ret = length;
52483fd6dd3SKevin Wolf         goto ro_cleanup;
52583fd6dd3SKevin Wolf     }
52683fd6dd3SKevin Wolf 
527f8e2bd53SKevin Wolf     backing_length = blk_getlength(backing);
52883fd6dd3SKevin Wolf     if (backing_length < 0) {
52983fd6dd3SKevin Wolf         ret = backing_length;
53083fd6dd3SKevin Wolf         goto ro_cleanup;
53183fd6dd3SKevin Wolf     }
53283fd6dd3SKevin Wolf 
53383fd6dd3SKevin Wolf     /* If our top snapshot is larger than the backing file image,
53483fd6dd3SKevin Wolf      * grow the backing file image if possible.  If not possible,
53583fd6dd3SKevin Wolf      * we must return an error */
53683fd6dd3SKevin Wolf     if (length > backing_length) {
5378c6242b6SKevin Wolf         ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
538c80d8b06SMax Reitz                            &local_err);
53983fd6dd3SKevin Wolf         if (ret < 0) {
540ed3d2ec9SMax Reitz             error_report_err(local_err);
54183fd6dd3SKevin Wolf             goto ro_cleanup;
54283fd6dd3SKevin Wolf         }
54383fd6dd3SKevin Wolf     }
54483fd6dd3SKevin Wolf 
545f8e2bd53SKevin Wolf     /* blk_try_blockalign() for src will choose an alignment that works for
546f8e2bd53SKevin Wolf      * backing as well, so no need to compare the alignment manually. */
547d6a644bbSEric Blake     buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
54883fd6dd3SKevin Wolf     if (buf == NULL) {
54983fd6dd3SKevin Wolf         ret = -ENOMEM;
55083fd6dd3SKevin Wolf         goto ro_cleanup;
55183fd6dd3SKevin Wolf     }
55283fd6dd3SKevin Wolf 
553d6a644bbSEric Blake     for (offset = 0; offset < length; offset += n) {
554d6a644bbSEric Blake         ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
55583fd6dd3SKevin Wolf         if (ret < 0) {
55683fd6dd3SKevin Wolf             goto ro_cleanup;
55783fd6dd3SKevin Wolf         }
55883fd6dd3SKevin Wolf         if (ret) {
559a9262f55SAlberto Faria             ret = blk_pread(src, offset, n, buf, 0);
56083fd6dd3SKevin Wolf             if (ret < 0) {
56183fd6dd3SKevin Wolf                 goto ro_cleanup;
56283fd6dd3SKevin Wolf             }
56383fd6dd3SKevin Wolf 
564a9262f55SAlberto Faria             ret = blk_pwrite(backing, offset, n, buf, 0);
56583fd6dd3SKevin Wolf             if (ret < 0) {
56683fd6dd3SKevin Wolf                 goto ro_cleanup;
56783fd6dd3SKevin Wolf             }
56883fd6dd3SKevin Wolf         }
56983fd6dd3SKevin Wolf     }
57083fd6dd3SKevin Wolf 
5712d97fde4SMax Reitz     ret = blk_make_empty(src, NULL);
5722d97fde4SMax Reitz     /* Ignore -ENOTSUP */
5732d97fde4SMax Reitz     if (ret < 0 && ret != -ENOTSUP) {
57483fd6dd3SKevin Wolf         goto ro_cleanup;
57583fd6dd3SKevin Wolf     }
5762d97fde4SMax Reitz 
577f8e2bd53SKevin Wolf     blk_flush(src);
57883fd6dd3SKevin Wolf 
57983fd6dd3SKevin Wolf     /*
58083fd6dd3SKevin Wolf      * Make sure all data we wrote to the backing device is actually
58183fd6dd3SKevin Wolf      * stable on disk.
58283fd6dd3SKevin Wolf      */
583f8e2bd53SKevin Wolf     blk_flush(backing);
58483fd6dd3SKevin Wolf 
58583fd6dd3SKevin Wolf     ret = 0;
58683fd6dd3SKevin Wolf ro_cleanup:
587f8e2bd53SKevin Wolf     blk_unref(backing);
5889a71b9deSMax Reitz     if (bdrv_cow_bs(bs) != backing_file_bs) {
58912fa4af6SKevin Wolf         bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
590d3f06759SKevin Wolf     }
591d3f06759SKevin Wolf     bdrv_unref(commit_top_bs);
592d3f06759SKevin Wolf     blk_unref(src);
593f8e2bd53SKevin Wolf 
59483fd6dd3SKevin Wolf     if (ro) {
59583fd6dd3SKevin Wolf         /* ignoring error return here */
5969a71b9deSMax Reitz         bdrv_reopen_set_read_only(backing_file_bs, true, NULL);
59783fd6dd3SKevin Wolf     }
59883fd6dd3SKevin Wolf 
59983fd6dd3SKevin Wolf     return ret;
60083fd6dd3SKevin Wolf }
601