xref: /qemu/block/block-copy.c (revision 79bb7627)
1beb5f545SVladimir Sementsov-Ogievskiy /*
2beb5f545SVladimir Sementsov-Ogievskiy  * block_copy API
3beb5f545SVladimir Sementsov-Ogievskiy  *
4beb5f545SVladimir Sementsov-Ogievskiy  * Copyright (C) 2013 Proxmox Server Solutions
5beb5f545SVladimir Sementsov-Ogievskiy  * Copyright (c) 2019 Virtuozzo International GmbH.
6beb5f545SVladimir Sementsov-Ogievskiy  *
7beb5f545SVladimir Sementsov-Ogievskiy  * Authors:
8beb5f545SVladimir Sementsov-Ogievskiy  *  Dietmar Maurer (dietmar@proxmox.com)
9beb5f545SVladimir Sementsov-Ogievskiy  *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10beb5f545SVladimir Sementsov-Ogievskiy  *
11beb5f545SVladimir Sementsov-Ogievskiy  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12beb5f545SVladimir Sementsov-Ogievskiy  * See the COPYING file in the top-level directory.
13beb5f545SVladimir Sementsov-Ogievskiy  */
14beb5f545SVladimir Sementsov-Ogievskiy 
15beb5f545SVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
16beb5f545SVladimir Sementsov-Ogievskiy 
17beb5f545SVladimir Sementsov-Ogievskiy #include "trace.h"
18beb5f545SVladimir Sementsov-Ogievskiy #include "qapi/error.h"
19beb5f545SVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
20e2c1c34fSMarkus Armbruster #include "block/block_int-io.h"
21e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
22d088e6a4SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
23beb5f545SVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
24b3b7036aSVladimir Sementsov-Ogievskiy #include "qemu/units.h"
25e2c1c34fSMarkus Armbruster #include "qemu/co-shared-resource.h"
264ce5dd3eSVladimir Sementsov-Ogievskiy #include "qemu/coroutine.h"
27e2c1c34fSMarkus Armbruster #include "qemu/ratelimit.h"
284ce5dd3eSVladimir Sementsov-Ogievskiy #include "block/aio_task.h"
29b518e9e9SVladimir Sementsov-Ogievskiy #include "qemu/error-report.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31b3b7036aSVladimir Sementsov-Ogievskiy 
32b3b7036aSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
330e240245SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
347f739d0eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_MEM (128 * MiB)
354ce5dd3eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_WORKERS 64
367e032df0SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
37b518e9e9SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
384ce5dd3eSVladimir Sementsov-Ogievskiy 
3905d5e12bSPaolo Bonzini typedef enum {
4005d5e12bSPaolo Bonzini     COPY_READ_WRITE_CLUSTER,
4105d5e12bSPaolo Bonzini     COPY_READ_WRITE,
4205d5e12bSPaolo Bonzini     COPY_WRITE_ZEROES,
4305d5e12bSPaolo Bonzini     COPY_RANGE_SMALL,
4405d5e12bSPaolo Bonzini     COPY_RANGE_FULL
4505d5e12bSPaolo Bonzini } BlockCopyMethod;
4605d5e12bSPaolo Bonzini 
474ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task);
484ce5dd3eSVladimir Sementsov-Ogievskiy 
494ce5dd3eSVladimir Sementsov-Ogievskiy typedef struct BlockCopyCallState {
50d0c389d2SEmanuele Giuseppe Esposito     /* Fields initialized in block_copy_async() and never changed. */
513b8c2329SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
523b8c2329SVladimir Sementsov-Ogievskiy     int64_t offset;
533b8c2329SVladimir Sementsov-Ogievskiy     int64_t bytes;
5426be9d62SVladimir Sementsov-Ogievskiy     int max_workers;
5526be9d62SVladimir Sementsov-Ogievskiy     int64_t max_chunk;
567e032df0SVladimir Sementsov-Ogievskiy     bool ignore_ratelimit;
57de4641b4SVladimir Sementsov-Ogievskiy     BlockCopyAsyncCallbackFunc cb;
58de4641b4SVladimir Sementsov-Ogievskiy     void *cb_opaque;
59de4641b4SVladimir Sementsov-Ogievskiy     /* Coroutine where async block-copy is running */
60de4641b4SVladimir Sementsov-Ogievskiy     Coroutine *co;
613b8c2329SVladimir Sementsov-Ogievskiy 
62d0c389d2SEmanuele Giuseppe Esposito     /* Fields whose state changes throughout the execution */
63149009beSEmanuele Giuseppe Esposito     bool finished; /* atomic */
64d0c389d2SEmanuele Giuseppe Esposito     QemuCoSleep sleep; /* TODO: protect API with a lock */
65149009beSEmanuele Giuseppe Esposito     bool cancelled; /* atomic */
662e099a9dSVladimir Sementsov-Ogievskiy     /* To reference all call states from BlockCopyState */
672e099a9dSVladimir Sementsov-Ogievskiy     QLIST_ENTRY(BlockCopyCallState) list;
682e099a9dSVladimir Sementsov-Ogievskiy 
69d0c389d2SEmanuele Giuseppe Esposito     /*
703202d8e4SMichael Tokarev      * Fields that report information about return values and errors.
71d0c389d2SEmanuele Giuseppe Esposito      * Protected by lock in BlockCopyState.
72d0c389d2SEmanuele Giuseppe Esposito      */
734ce5dd3eSVladimir Sementsov-Ogievskiy     bool error_is_read;
74d0c389d2SEmanuele Giuseppe Esposito     /*
75d0c389d2SEmanuele Giuseppe Esposito      * @ret is set concurrently by tasks under mutex. Only set once by first
76d0c389d2SEmanuele Giuseppe Esposito      * failed task (and untouched if no task failed).
77d0c389d2SEmanuele Giuseppe Esposito      * After finishing (call_state->finished is true), it is not modified
78d0c389d2SEmanuele Giuseppe Esposito      * anymore and may be safely read without mutex.
79d0c389d2SEmanuele Giuseppe Esposito      */
80d0c389d2SEmanuele Giuseppe Esposito     int ret;
814ce5dd3eSVladimir Sementsov-Ogievskiy } BlockCopyCallState;
82beb5f545SVladimir Sementsov-Ogievskiy 
83e9407785SVladimir Sementsov-Ogievskiy typedef struct BlockCopyTask {
844ce5dd3eSVladimir Sementsov-Ogievskiy     AioTask task;
854ce5dd3eSVladimir Sementsov-Ogievskiy 
86d0c389d2SEmanuele Giuseppe Esposito     /*
87d0c389d2SEmanuele Giuseppe Esposito      * Fields initialized in block_copy_task_create()
88d0c389d2SEmanuele Giuseppe Esposito      * and never changed.
89d0c389d2SEmanuele Giuseppe Esposito      */
901348a657SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
914ce5dd3eSVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state;
92d0c389d2SEmanuele Giuseppe Esposito     /*
93d0c389d2SEmanuele Giuseppe Esposito      * @method can also be set again in the while loop of
94d0c389d2SEmanuele Giuseppe Esposito      * block_copy_dirty_clusters(), but it is never accessed concurrently
95d0c389d2SEmanuele Giuseppe Esposito      * because the only other function that reads it is
96d0c389d2SEmanuele Giuseppe Esposito      * block_copy_task_entry() and it is invoked afterwards in the same
97d0c389d2SEmanuele Giuseppe Esposito      * iteration.
98d0c389d2SEmanuele Giuseppe Esposito      */
9905d5e12bSPaolo Bonzini     BlockCopyMethod method;
100d0c389d2SEmanuele Giuseppe Esposito 
101d0c389d2SEmanuele Giuseppe Esposito     /*
102d088e6a4SVladimir Sementsov-Ogievskiy      * Generally, req is protected by lock in BlockCopyState, Still req.offset
103d088e6a4SVladimir Sementsov-Ogievskiy      * is only set on task creation, so may be read concurrently after creation.
104d088e6a4SVladimir Sementsov-Ogievskiy      * req.bytes is changed at most once, and need only protecting the case of
105d088e6a4SVladimir Sementsov-Ogievskiy      * parallel read while updating @bytes value in block_copy_task_shrink().
106d0c389d2SEmanuele Giuseppe Esposito      */
107d088e6a4SVladimir Sementsov-Ogievskiy     BlockReq req;
108e9407785SVladimir Sementsov-Ogievskiy } BlockCopyTask;
109397f4e9dSVladimir Sementsov-Ogievskiy 
task_end(BlockCopyTask * task)11042ac2144SVladimir Sementsov-Ogievskiy static int64_t task_end(BlockCopyTask *task)
11142ac2144SVladimir Sementsov-Ogievskiy {
112d088e6a4SVladimir Sementsov-Ogievskiy     return task->req.offset + task->req.bytes;
11342ac2144SVladimir Sementsov-Ogievskiy }
11442ac2144SVladimir Sementsov-Ogievskiy 
115397f4e9dSVladimir Sementsov-Ogievskiy typedef struct BlockCopyState {
116397f4e9dSVladimir Sementsov-Ogievskiy     /*
117397f4e9dSVladimir Sementsov-Ogievskiy      * BdrvChild objects are not owned or managed by block-copy. They are
118397f4e9dSVladimir Sementsov-Ogievskiy      * provided by block-copy user and user is responsible for appropriate
119397f4e9dSVladimir Sementsov-Ogievskiy      * permissions on these children.
120397f4e9dSVladimir Sementsov-Ogievskiy      */
121397f4e9dSVladimir Sementsov-Ogievskiy     BdrvChild *source;
122397f4e9dSVladimir Sementsov-Ogievskiy     BdrvChild *target;
123d0c389d2SEmanuele Giuseppe Esposito 
124d0c389d2SEmanuele Giuseppe Esposito     /*
125d0c389d2SEmanuele Giuseppe Esposito      * Fields initialized in block_copy_state_new()
126d0c389d2SEmanuele Giuseppe Esposito      * and never changed.
127d0c389d2SEmanuele Giuseppe Esposito      */
128397f4e9dSVladimir Sementsov-Ogievskiy     int64_t cluster_size;
12905d5e12bSPaolo Bonzini     int64_t max_transfer;
130397f4e9dSVladimir Sementsov-Ogievskiy     uint64_t len;
131397f4e9dSVladimir Sementsov-Ogievskiy     BdrvRequestFlags write_flags;
132397f4e9dSVladimir Sementsov-Ogievskiy 
133397f4e9dSVladimir Sementsov-Ogievskiy     /*
134d0c389d2SEmanuele Giuseppe Esposito      * Fields whose state changes throughout the execution
135d0c389d2SEmanuele Giuseppe Esposito      * Protected by lock.
136d0c389d2SEmanuele Giuseppe Esposito      */
137d0c389d2SEmanuele Giuseppe Esposito     CoMutex lock;
138d0c389d2SEmanuele Giuseppe Esposito     int64_t in_flight_bytes;
139d0c389d2SEmanuele Giuseppe Esposito     BlockCopyMethod method;
140d088e6a4SVladimir Sementsov-Ogievskiy     BlockReqList reqs;
141d0c389d2SEmanuele Giuseppe Esposito     QLIST_HEAD(, BlockCopyCallState) calls;
142d0c389d2SEmanuele Giuseppe Esposito     /*
143397f4e9dSVladimir Sementsov-Ogievskiy      * skip_unallocated:
144397f4e9dSVladimir Sementsov-Ogievskiy      *
145397f4e9dSVladimir Sementsov-Ogievskiy      * Used by sync=top jobs, which first scan the source node for unallocated
146397f4e9dSVladimir Sementsov-Ogievskiy      * areas and clear them in the copy_bitmap.  During this process, the bitmap
147397f4e9dSVladimir Sementsov-Ogievskiy      * is thus not fully initialized: It may still have bits set for areas that
148397f4e9dSVladimir Sementsov-Ogievskiy      * are unallocated and should actually not be copied.
149397f4e9dSVladimir Sementsov-Ogievskiy      *
150397f4e9dSVladimir Sementsov-Ogievskiy      * This is indicated by skip_unallocated.
151397f4e9dSVladimir Sementsov-Ogievskiy      *
152397f4e9dSVladimir Sementsov-Ogievskiy      * In this case, block_copy() will query the source’s allocation status,
153397f4e9dSVladimir Sementsov-Ogievskiy      * skip unallocated regions, clear them in the copy_bitmap, and invoke
154397f4e9dSVladimir Sementsov-Ogievskiy      * block_copy_reset_unallocated() every time it does.
155397f4e9dSVladimir Sementsov-Ogievskiy      */
156d0c389d2SEmanuele Giuseppe Esposito     bool skip_unallocated; /* atomic */
157d0c389d2SEmanuele Giuseppe Esposito     /* State fields that use a thread-safe API */
158d0c389d2SEmanuele Giuseppe Esposito     BdrvDirtyBitmap *copy_bitmap;
159397f4e9dSVladimir Sementsov-Ogievskiy     ProgressMeter *progress;
160397f4e9dSVladimir Sementsov-Ogievskiy     SharedResource *mem;
1617e032df0SVladimir Sementsov-Ogievskiy     RateLimit rate_limit;
162397f4e9dSVladimir Sementsov-Ogievskiy } BlockCopyState;
163397f4e9dSVladimir Sementsov-Ogievskiy 
164d0c389d2SEmanuele Giuseppe Esposito /* Called with lock held */
block_copy_chunk_size(BlockCopyState * s)16505d5e12bSPaolo Bonzini static int64_t block_copy_chunk_size(BlockCopyState *s)
16605d5e12bSPaolo Bonzini {
16705d5e12bSPaolo Bonzini     switch (s->method) {
16805d5e12bSPaolo Bonzini     case COPY_READ_WRITE_CLUSTER:
16905d5e12bSPaolo Bonzini         return s->cluster_size;
17005d5e12bSPaolo Bonzini     case COPY_READ_WRITE:
17105d5e12bSPaolo Bonzini     case COPY_RANGE_SMALL:
17205d5e12bSPaolo Bonzini         return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
17305d5e12bSPaolo Bonzini                    s->max_transfer);
17405d5e12bSPaolo Bonzini     case COPY_RANGE_FULL:
17505d5e12bSPaolo Bonzini         return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
17605d5e12bSPaolo Bonzini                    s->max_transfer);
17705d5e12bSPaolo Bonzini     default:
17805d5e12bSPaolo Bonzini         /* Cannot have COPY_WRITE_ZEROES here.  */
17905d5e12bSPaolo Bonzini         abort();
18005d5e12bSPaolo Bonzini     }
18105d5e12bSPaolo Bonzini }
18205d5e12bSPaolo Bonzini 
18342ac2144SVladimir Sementsov-Ogievskiy /*
18442ac2144SVladimir Sementsov-Ogievskiy  * Search for the first dirty area in offset/bytes range and create task at
18542ac2144SVladimir Sementsov-Ogievskiy  * the beginning of it.
18642ac2144SVladimir Sementsov-Ogievskiy  */
187d0c389d2SEmanuele Giuseppe Esposito static coroutine_fn BlockCopyTask *
block_copy_task_create(BlockCopyState * s,BlockCopyCallState * call_state,int64_t offset,int64_t bytes)188d0c389d2SEmanuele Giuseppe Esposito block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
1898719091fSVladimir Sementsov-Ogievskiy                        int64_t offset, int64_t bytes)
190a6ffe199SVladimir Sementsov-Ogievskiy {
19142ac2144SVladimir Sementsov-Ogievskiy     BlockCopyTask *task;
19205d5e12bSPaolo Bonzini     int64_t max_chunk;
193f13e60a9SVladimir Sementsov-Ogievskiy 
194d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&s->lock);
19505d5e12bSPaolo Bonzini     max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
19642ac2144SVladimir Sementsov-Ogievskiy     if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
19742ac2144SVladimir Sementsov-Ogievskiy                                            offset, offset + bytes,
19826be9d62SVladimir Sementsov-Ogievskiy                                            max_chunk, &offset, &bytes))
19942ac2144SVladimir Sementsov-Ogievskiy     {
20042ac2144SVladimir Sementsov-Ogievskiy         return NULL;
20142ac2144SVladimir Sementsov-Ogievskiy     }
20242ac2144SVladimir Sementsov-Ogievskiy 
2037661a886SStefan Reiter     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
2047661a886SStefan Reiter     bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
2057661a886SStefan Reiter 
20642ac2144SVladimir Sementsov-Ogievskiy     /* region is dirty, so no existent tasks possible in it */
207d088e6a4SVladimir Sementsov-Ogievskiy     assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
2085332e5d2SVladimir Sementsov-Ogievskiy 
2095332e5d2SVladimir Sementsov-Ogievskiy     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
2105332e5d2SVladimir Sementsov-Ogievskiy     s->in_flight_bytes += bytes;
2115332e5d2SVladimir Sementsov-Ogievskiy 
21242ac2144SVladimir Sementsov-Ogievskiy     task = g_new(BlockCopyTask, 1);
2131348a657SVladimir Sementsov-Ogievskiy     *task = (BlockCopyTask) {
2144ce5dd3eSVladimir Sementsov-Ogievskiy         .task.func = block_copy_task_entry,
2151348a657SVladimir Sementsov-Ogievskiy         .s = s,
2164ce5dd3eSVladimir Sementsov-Ogievskiy         .call_state = call_state,
21705d5e12bSPaolo Bonzini         .method = s->method,
2181348a657SVladimir Sementsov-Ogievskiy     };
219d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_init_req(&s->reqs, &task->req, offset, bytes);
220f13e60a9SVladimir Sementsov-Ogievskiy 
221f13e60a9SVladimir Sementsov-Ogievskiy     return task;
222a6ffe199SVladimir Sementsov-Ogievskiy }
223a6ffe199SVladimir Sementsov-Ogievskiy 
2245332e5d2SVladimir Sementsov-Ogievskiy /*
225e9407785SVladimir Sementsov-Ogievskiy  * block_copy_task_shrink
2265332e5d2SVladimir Sementsov-Ogievskiy  *
227e9407785SVladimir Sementsov-Ogievskiy  * Drop the tail of the task to be handled later. Set dirty bits back and
228e9407785SVladimir Sementsov-Ogievskiy  * wake up all tasks waiting for us (may be some of them are not intersecting
229e9407785SVladimir Sementsov-Ogievskiy  * with shrunk task)
2305332e5d2SVladimir Sementsov-Ogievskiy  */
block_copy_task_shrink(BlockCopyTask * task,int64_t new_bytes)2311348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
232e9407785SVladimir Sementsov-Ogievskiy                                                 int64_t new_bytes)
233a6ffe199SVladimir Sementsov-Ogievskiy {
234d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&task->s->lock);
235d088e6a4SVladimir Sementsov-Ogievskiy     if (new_bytes == task->req.bytes) {
2365332e5d2SVladimir Sementsov-Ogievskiy         return;
2375332e5d2SVladimir Sementsov-Ogievskiy     }
2385332e5d2SVladimir Sementsov-Ogievskiy 
239d088e6a4SVladimir Sementsov-Ogievskiy     assert(new_bytes > 0 && new_bytes < task->req.bytes);
2405332e5d2SVladimir Sementsov-Ogievskiy 
241d088e6a4SVladimir Sementsov-Ogievskiy     task->s->in_flight_bytes -= task->req.bytes - new_bytes;
2421348a657SVladimir Sementsov-Ogievskiy     bdrv_set_dirty_bitmap(task->s->copy_bitmap,
243d088e6a4SVladimir Sementsov-Ogievskiy                           task->req.offset + new_bytes,
244d088e6a4SVladimir Sementsov-Ogievskiy                           task->req.bytes - new_bytes);
2455332e5d2SVladimir Sementsov-Ogievskiy 
246d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_shrink_req(&task->req, new_bytes);
2475332e5d2SVladimir Sementsov-Ogievskiy }
2485332e5d2SVladimir Sementsov-Ogievskiy 
block_copy_task_end(BlockCopyTask * task,int ret)2491348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
2505332e5d2SVladimir Sementsov-Ogievskiy {
251d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&task->s->lock);
252d088e6a4SVladimir Sementsov-Ogievskiy     task->s->in_flight_bytes -= task->req.bytes;
2535332e5d2SVladimir Sementsov-Ogievskiy     if (ret < 0) {
254d088e6a4SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
255d088e6a4SVladimir Sementsov-Ogievskiy                               task->req.bytes);
2565332e5d2SVladimir Sementsov-Ogievskiy     }
257201b4bb6SVladimir Sementsov-Ogievskiy     if (task->s->progress) {
258e3dd339fSEmanuele Giuseppe Esposito         progress_set_remaining(task->s->progress,
259e3dd339fSEmanuele Giuseppe Esposito                                bdrv_get_dirty_count(task->s->copy_bitmap) +
260e3dd339fSEmanuele Giuseppe Esposito                                task->s->in_flight_bytes);
261201b4bb6SVladimir Sementsov-Ogievskiy     }
262d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_remove_req(&task->req);
263a6ffe199SVladimir Sementsov-Ogievskiy }
264a6ffe199SVladimir Sementsov-Ogievskiy 
block_copy_state_free(BlockCopyState * s)265beb5f545SVladimir Sementsov-Ogievskiy void block_copy_state_free(BlockCopyState *s)
266beb5f545SVladimir Sementsov-Ogievskiy {
267beb5f545SVladimir Sementsov-Ogievskiy     if (!s) {
268beb5f545SVladimir Sementsov-Ogievskiy         return;
269beb5f545SVladimir Sementsov-Ogievskiy     }
270beb5f545SVladimir Sementsov-Ogievskiy 
2714951967dSPaolo Bonzini     ratelimit_destroy(&s->rate_limit);
2725deb6cbdSVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->copy_bitmap);
2737f739d0eSVladimir Sementsov-Ogievskiy     shres_destroy(s->mem);
274beb5f545SVladimir Sementsov-Ogievskiy     g_free(s);
275beb5f545SVladimir Sementsov-Ogievskiy }
276beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_max_transfer(BdrvChild * source,BdrvChild * target)2779d31bc53SVladimir Sementsov-Ogievskiy static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
2789d31bc53SVladimir Sementsov-Ogievskiy {
2799d31bc53SVladimir Sementsov-Ogievskiy     return MIN_NON_ZERO(INT_MAX,
2809d31bc53SVladimir Sementsov-Ogievskiy                         MIN_NON_ZERO(source->bs->bl.max_transfer,
2819d31bc53SVladimir Sementsov-Ogievskiy                                      target->bs->bl.max_transfer));
2829d31bc53SVladimir Sementsov-Ogievskiy }
2839d31bc53SVladimir Sementsov-Ogievskiy 
block_copy_set_copy_opts(BlockCopyState * s,bool use_copy_range,bool compress)284f8b9504bSVladimir Sementsov-Ogievskiy void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
285f8b9504bSVladimir Sementsov-Ogievskiy                               bool compress)
286f8b9504bSVladimir Sementsov-Ogievskiy {
287f8b9504bSVladimir Sementsov-Ogievskiy     /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
288f8b9504bSVladimir Sementsov-Ogievskiy     s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
289f8b9504bSVladimir Sementsov-Ogievskiy         (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
290f8b9504bSVladimir Sementsov-Ogievskiy 
291f8b9504bSVladimir Sementsov-Ogievskiy     if (s->max_transfer < s->cluster_size) {
292f8b9504bSVladimir Sementsov-Ogievskiy         /*
293f8b9504bSVladimir Sementsov-Ogievskiy          * copy_range does not respect max_transfer. We don't want to bother
294f8b9504bSVladimir Sementsov-Ogievskiy          * with requests smaller than block-copy cluster size, so fallback to
295f8b9504bSVladimir Sementsov-Ogievskiy          * buffered copying (read and write respect max_transfer on their
296f8b9504bSVladimir Sementsov-Ogievskiy          * behalf).
297f8b9504bSVladimir Sementsov-Ogievskiy          */
298f8b9504bSVladimir Sementsov-Ogievskiy         s->method = COPY_READ_WRITE_CLUSTER;
299f8b9504bSVladimir Sementsov-Ogievskiy     } else if (compress) {
300f8b9504bSVladimir Sementsov-Ogievskiy         /* Compression supports only cluster-size writes and no copy-range. */
301f8b9504bSVladimir Sementsov-Ogievskiy         s->method = COPY_READ_WRITE_CLUSTER;
302f8b9504bSVladimir Sementsov-Ogievskiy     } else {
303f8b9504bSVladimir Sementsov-Ogievskiy         /*
304f8b9504bSVladimir Sementsov-Ogievskiy          * If copy range enabled, start with COPY_RANGE_SMALL, until first
305f8b9504bSVladimir Sementsov-Ogievskiy          * successful copy_range (look at block_copy_do_copy).
306f8b9504bSVladimir Sementsov-Ogievskiy          */
307f8b9504bSVladimir Sementsov-Ogievskiy         s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
308f8b9504bSVladimir Sementsov-Ogievskiy     }
309f8b9504bSVladimir Sementsov-Ogievskiy }
310f8b9504bSVladimir Sementsov-Ogievskiy 
block_copy_calculate_cluster_size(BlockDriverState * target,Error ** errp)311b518e9e9SVladimir Sementsov-Ogievskiy static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
312b518e9e9SVladimir Sementsov-Ogievskiy                                                  Error **errp)
313b518e9e9SVladimir Sementsov-Ogievskiy {
314b518e9e9SVladimir Sementsov-Ogievskiy     int ret;
315b518e9e9SVladimir Sementsov-Ogievskiy     BlockDriverInfo bdi;
316ad74751fSKevin Wolf     bool target_does_cow;
317ad74751fSKevin Wolf 
318ad74751fSKevin Wolf     GLOBAL_STATE_CODE();
319ad74751fSKevin Wolf     GRAPH_RDLOCK_GUARD_MAINLOOP();
320ad74751fSKevin Wolf 
321ad74751fSKevin Wolf     target_does_cow = bdrv_backing_chain_next(target);
322b518e9e9SVladimir Sementsov-Ogievskiy 
323b518e9e9SVladimir Sementsov-Ogievskiy     /*
324b518e9e9SVladimir Sementsov-Ogievskiy      * If there is no backing file on the target, we cannot rely on COW if our
325b518e9e9SVladimir Sementsov-Ogievskiy      * backup cluster size is smaller than the target cluster size. Even for
326b518e9e9SVladimir Sementsov-Ogievskiy      * targets with a backing file, try to avoid COW if possible.
327b518e9e9SVladimir Sementsov-Ogievskiy      */
328b518e9e9SVladimir Sementsov-Ogievskiy     ret = bdrv_get_info(target, &bdi);
329b518e9e9SVladimir Sementsov-Ogievskiy     if (ret == -ENOTSUP && !target_does_cow) {
330b518e9e9SVladimir Sementsov-Ogievskiy         /* Cluster size is not defined */
331b518e9e9SVladimir Sementsov-Ogievskiy         warn_report("The target block device doesn't provide "
332b518e9e9SVladimir Sementsov-Ogievskiy                     "information about the block size and it doesn't have a "
333b518e9e9SVladimir Sementsov-Ogievskiy                     "backing file. The default block size of %u bytes is "
334b518e9e9SVladimir Sementsov-Ogievskiy                     "used. If the actual block size of the target exceeds "
335b518e9e9SVladimir Sementsov-Ogievskiy                     "this default, the backup may be unusable",
336b518e9e9SVladimir Sementsov-Ogievskiy                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
337b518e9e9SVladimir Sementsov-Ogievskiy         return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
338b518e9e9SVladimir Sementsov-Ogievskiy     } else if (ret < 0 && !target_does_cow) {
339b518e9e9SVladimir Sementsov-Ogievskiy         error_setg_errno(errp, -ret,
340b518e9e9SVladimir Sementsov-Ogievskiy             "Couldn't determine the cluster size of the target image, "
341b518e9e9SVladimir Sementsov-Ogievskiy             "which has no backing file");
342b518e9e9SVladimir Sementsov-Ogievskiy         error_append_hint(errp,
343b518e9e9SVladimir Sementsov-Ogievskiy             "Aborting, since this may create an unusable destination image\n");
344b518e9e9SVladimir Sementsov-Ogievskiy         return ret;
345b518e9e9SVladimir Sementsov-Ogievskiy     } else if (ret < 0 && target_does_cow) {
346b518e9e9SVladimir Sementsov-Ogievskiy         /* Not fatal; just trudge on ahead. */
347b518e9e9SVladimir Sementsov-Ogievskiy         return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
348b518e9e9SVladimir Sementsov-Ogievskiy     }
349b518e9e9SVladimir Sementsov-Ogievskiy 
350b518e9e9SVladimir Sementsov-Ogievskiy     return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
351b518e9e9SVladimir Sementsov-Ogievskiy }
352b518e9e9SVladimir Sementsov-Ogievskiy 
block_copy_state_new(BdrvChild * source,BdrvChild * target,const BdrvDirtyBitmap * bitmap,Error ** errp)35300e30f05SVladimir Sementsov-Ogievskiy BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
3541f7252e8SVladimir Sementsov-Ogievskiy                                      const BdrvDirtyBitmap *bitmap,
355abde8ac2SVladimir Sementsov-Ogievskiy                                      Error **errp)
356beb5f545SVladimir Sementsov-Ogievskiy {
3571f7252e8SVladimir Sementsov-Ogievskiy     ERRP_GUARD();
358beb5f545SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
359b518e9e9SVladimir Sementsov-Ogievskiy     int64_t cluster_size;
360beb5f545SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *copy_bitmap;
36149577723SVladimir Sementsov-Ogievskiy     bool is_fleecing;
362beb5f545SVladimir Sementsov-Ogievskiy 
363ad74751fSKevin Wolf     GLOBAL_STATE_CODE();
364ad74751fSKevin Wolf 
365b518e9e9SVladimir Sementsov-Ogievskiy     cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
366b518e9e9SVladimir Sementsov-Ogievskiy     if (cluster_size < 0) {
367b518e9e9SVladimir Sementsov-Ogievskiy         return NULL;
368b518e9e9SVladimir Sementsov-Ogievskiy     }
369b518e9e9SVladimir Sementsov-Ogievskiy 
37000e30f05SVladimir Sementsov-Ogievskiy     copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
37100e30f05SVladimir Sementsov-Ogievskiy                                            errp);
372beb5f545SVladimir Sementsov-Ogievskiy     if (!copy_bitmap) {
373beb5f545SVladimir Sementsov-Ogievskiy         return NULL;
374beb5f545SVladimir Sementsov-Ogievskiy     }
375beb5f545SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(copy_bitmap);
3761f7252e8SVladimir Sementsov-Ogievskiy     if (bitmap) {
3771f7252e8SVladimir Sementsov-Ogievskiy         if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
3781f7252e8SVladimir Sementsov-Ogievskiy             error_prepend(errp, "Failed to merge bitmap '%s' to internal "
3791f7252e8SVladimir Sementsov-Ogievskiy                           "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
3801f7252e8SVladimir Sementsov-Ogievskiy             bdrv_release_dirty_bitmap(copy_bitmap);
3811f7252e8SVladimir Sementsov-Ogievskiy             return NULL;
3821f7252e8SVladimir Sementsov-Ogievskiy         }
3831f7252e8SVladimir Sementsov-Ogievskiy     } else {
3841f7252e8SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(copy_bitmap, 0,
3851f7252e8SVladimir Sementsov-Ogievskiy                               bdrv_dirty_bitmap_size(copy_bitmap));
3861f7252e8SVladimir Sementsov-Ogievskiy     }
387beb5f545SVladimir Sementsov-Ogievskiy 
38849577723SVladimir Sementsov-Ogievskiy     /*
38949577723SVladimir Sementsov-Ogievskiy      * If source is in backing chain of target assume that target is going to be
39049577723SVladimir Sementsov-Ogievskiy      * used for "image fleecing", i.e. it should represent a kind of snapshot of
39149577723SVladimir Sementsov-Ogievskiy      * source at backup-start point in time. And target is going to be read by
39249577723SVladimir Sementsov-Ogievskiy      * somebody (for example, used as NBD export) during backup job.
39349577723SVladimir Sementsov-Ogievskiy      *
39449577723SVladimir Sementsov-Ogievskiy      * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
39549577723SVladimir Sementsov-Ogievskiy      * intersection of backup writes and third party reads from target,
39649577723SVladimir Sementsov-Ogievskiy      * otherwise reading from target we may occasionally read already updated by
39749577723SVladimir Sementsov-Ogievskiy      * guest data.
39849577723SVladimir Sementsov-Ogievskiy      *
39949577723SVladimir Sementsov-Ogievskiy      * For more information see commit f8d59dfb40bb and test
40049577723SVladimir Sementsov-Ogievskiy      * tests/qemu-iotests/222
40149577723SVladimir Sementsov-Ogievskiy      */
40279bb7627SKevin Wolf     bdrv_graph_rdlock_main_loop();
40349577723SVladimir Sementsov-Ogievskiy     is_fleecing = bdrv_chain_contains(target->bs, source->bs);
40479bb7627SKevin Wolf     bdrv_graph_rdunlock_main_loop();
40549577723SVladimir Sementsov-Ogievskiy 
406beb5f545SVladimir Sementsov-Ogievskiy     s = g_new(BlockCopyState, 1);
407beb5f545SVladimir Sementsov-Ogievskiy     *s = (BlockCopyState) {
40800e30f05SVladimir Sementsov-Ogievskiy         .source = source,
40900e30f05SVladimir Sementsov-Ogievskiy         .target = target,
410beb5f545SVladimir Sementsov-Ogievskiy         .copy_bitmap = copy_bitmap,
411beb5f545SVladimir Sementsov-Ogievskiy         .cluster_size = cluster_size,
412beb5f545SVladimir Sementsov-Ogievskiy         .len = bdrv_dirty_bitmap_size(copy_bitmap),
413f8b9504bSVladimir Sementsov-Ogievskiy         .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
4147f739d0eSVladimir Sementsov-Ogievskiy         .mem = shres_create(BLOCK_COPY_MAX_MEM),
41505d5e12bSPaolo Bonzini         .max_transfer = QEMU_ALIGN_DOWN(
41605d5e12bSPaolo Bonzini                                     block_copy_max_transfer(source, target),
41705d5e12bSPaolo Bonzini                                     cluster_size),
418beb5f545SVladimir Sementsov-Ogievskiy     };
419beb5f545SVladimir Sementsov-Ogievskiy 
420abde8ac2SVladimir Sementsov-Ogievskiy     block_copy_set_copy_opts(s, false, false);
421beb5f545SVladimir Sementsov-Ogievskiy 
4224951967dSPaolo Bonzini     ratelimit_init(&s->rate_limit);
423d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_init(&s->lock);
424d088e6a4SVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->reqs);
4252e099a9dSVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->calls);
426a6ffe199SVladimir Sementsov-Ogievskiy 
427beb5f545SVladimir Sementsov-Ogievskiy     return s;
428beb5f545SVladimir Sementsov-Ogievskiy }
429beb5f545SVladimir Sementsov-Ogievskiy 
430d0c389d2SEmanuele Giuseppe Esposito /* Only set before running the job, no need for locking. */
block_copy_set_progress_meter(BlockCopyState * s,ProgressMeter * pm)431d0ebeca1SVladimir Sementsov-Ogievskiy void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
432d0ebeca1SVladimir Sementsov-Ogievskiy {
433d0ebeca1SVladimir Sementsov-Ogievskiy     s->progress = pm;
434d0ebeca1SVladimir Sementsov-Ogievskiy }
435d0ebeca1SVladimir Sementsov-Ogievskiy 
436beb5f545SVladimir Sementsov-Ogievskiy /*
4374ce5dd3eSVladimir Sementsov-Ogievskiy  * Takes ownership of @task
4384ce5dd3eSVladimir Sementsov-Ogievskiy  *
4394ce5dd3eSVladimir Sementsov-Ogievskiy  * If pool is NULL directly run the task, otherwise schedule it into the pool.
4404ce5dd3eSVladimir Sementsov-Ogievskiy  *
4414ce5dd3eSVladimir Sementsov-Ogievskiy  * Returns: task.func return code if pool is NULL
4424ce5dd3eSVladimir Sementsov-Ogievskiy  *          otherwise -ECANCELED if pool status is bad
4434ce5dd3eSVladimir Sementsov-Ogievskiy  *          otherwise 0 (successfully scheduled)
4444ce5dd3eSVladimir Sementsov-Ogievskiy  */
block_copy_task_run(AioTaskPool * pool,BlockCopyTask * task)4454ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
4464ce5dd3eSVladimir Sementsov-Ogievskiy                                             BlockCopyTask *task)
4474ce5dd3eSVladimir Sementsov-Ogievskiy {
4484ce5dd3eSVladimir Sementsov-Ogievskiy     if (!pool) {
4494ce5dd3eSVladimir Sementsov-Ogievskiy         int ret = task->task.func(&task->task);
4504ce5dd3eSVladimir Sementsov-Ogievskiy 
4514ce5dd3eSVladimir Sementsov-Ogievskiy         g_free(task);
4524ce5dd3eSVladimir Sementsov-Ogievskiy         return ret;
4534ce5dd3eSVladimir Sementsov-Ogievskiy     }
4544ce5dd3eSVladimir Sementsov-Ogievskiy 
4554ce5dd3eSVladimir Sementsov-Ogievskiy     aio_task_pool_wait_slot(pool);
4564ce5dd3eSVladimir Sementsov-Ogievskiy     if (aio_task_pool_status(pool) < 0) {
457d088e6a4SVladimir Sementsov-Ogievskiy         co_put_to_shres(task->s->mem, task->req.bytes);
4584ce5dd3eSVladimir Sementsov-Ogievskiy         block_copy_task_end(task, -ECANCELED);
4594ce5dd3eSVladimir Sementsov-Ogievskiy         g_free(task);
4604ce5dd3eSVladimir Sementsov-Ogievskiy         return -ECANCELED;
4614ce5dd3eSVladimir Sementsov-Ogievskiy     }
4624ce5dd3eSVladimir Sementsov-Ogievskiy 
4634ce5dd3eSVladimir Sementsov-Ogievskiy     aio_task_pool_start_task(pool, &task->task);
4644ce5dd3eSVladimir Sementsov-Ogievskiy 
4654ce5dd3eSVladimir Sementsov-Ogievskiy     return 0;
4664ce5dd3eSVladimir Sementsov-Ogievskiy }
4674ce5dd3eSVladimir Sementsov-Ogievskiy 
4684ce5dd3eSVladimir Sementsov-Ogievskiy /*
469e332a726SVladimir Sementsov-Ogievskiy  * block_copy_do_copy
470e332a726SVladimir Sementsov-Ogievskiy  *
471dafaf135SVladimir Sementsov-Ogievskiy  * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
472dafaf135SVladimir Sementsov-Ogievskiy  * s->len only to cover last cluster when s->len is not aligned to clusters.
473e332a726SVladimir Sementsov-Ogievskiy  *
4743202d8e4SMichael Tokarev  * No sync here: neither bitmap nor intersecting requests handling, only copy.
475e332a726SVladimir Sementsov-Ogievskiy  *
47605d5e12bSPaolo Bonzini  * @method is an in-out argument, so that copy_range can be either extended to
47705d5e12bSPaolo Bonzini  * a full-size buffer or disabled if the copy_range attempt fails.  The output
47805d5e12bSPaolo Bonzini  * value of @method should be used for subsequent tasks.
479e332a726SVladimir Sementsov-Ogievskiy  * Returns 0 on success.
480beb5f545SVladimir Sementsov-Ogievskiy  */
481abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_do_copy(BlockCopyState * s,int64_t offset,int64_t bytes,BlockCopyMethod * method,bool * error_is_read)482abaf8b75SKevin Wolf block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
483abaf8b75SKevin Wolf                    BlockCopyMethod *method, bool *error_is_read)
484beb5f545SVladimir Sementsov-Ogievskiy {
485beb5f545SVladimir Sementsov-Ogievskiy     int ret;
4868719091fSVladimir Sementsov-Ogievskiy     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
487e332a726SVladimir Sementsov-Ogievskiy     void *bounce_buffer = NULL;
488beb5f545SVladimir Sementsov-Ogievskiy 
4898719091fSVladimir Sementsov-Ogievskiy     assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
4908719091fSVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
491dafaf135SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
4928719091fSVladimir Sementsov-Ogievskiy     assert(offset < s->len);
4938719091fSVladimir Sementsov-Ogievskiy     assert(offset + bytes <= s->len ||
4948719091fSVladimir Sementsov-Ogievskiy            offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
495dafaf135SVladimir Sementsov-Ogievskiy     assert(nbytes < INT_MAX);
496e332a726SVladimir Sementsov-Ogievskiy 
49705d5e12bSPaolo Bonzini     switch (*method) {
49805d5e12bSPaolo Bonzini     case COPY_WRITE_ZEROES:
4998719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
5002d57511aSVladimir Sementsov-Ogievskiy                                     ~BDRV_REQ_WRITE_COMPRESSED);
5012d57511aSVladimir Sementsov-Ogievskiy         if (ret < 0) {
5028719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_write_zeroes_fail(s, offset, ret);
5032d57511aSVladimir Sementsov-Ogievskiy             *error_is_read = false;
5042d57511aSVladimir Sementsov-Ogievskiy         }
5052d57511aSVladimir Sementsov-Ogievskiy         return ret;
5062d57511aSVladimir Sementsov-Ogievskiy 
50705d5e12bSPaolo Bonzini     case COPY_RANGE_SMALL:
50805d5e12bSPaolo Bonzini     case COPY_RANGE_FULL:
5098719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
510e332a726SVladimir Sementsov-Ogievskiy                                  0, s->write_flags);
51105d5e12bSPaolo Bonzini         if (ret >= 0) {
51205d5e12bSPaolo Bonzini             /* Successful copy-range, increase chunk size.  */
51305d5e12bSPaolo Bonzini             *method = COPY_RANGE_FULL;
514bed95234SVladimir Sementsov-Ogievskiy             return 0;
515e332a726SVladimir Sementsov-Ogievskiy         }
516e332a726SVladimir Sementsov-Ogievskiy 
51705d5e12bSPaolo Bonzini         trace_block_copy_copy_range_fail(s, offset, ret);
51805d5e12bSPaolo Bonzini         *method = COPY_READ_WRITE;
51905d5e12bSPaolo Bonzini         /* Fall through to read+write with allocated buffer */
52005d5e12bSPaolo Bonzini 
52105d5e12bSPaolo Bonzini     case COPY_READ_WRITE_CLUSTER:
52205d5e12bSPaolo Bonzini     case COPY_READ_WRITE:
5230e240245SVladimir Sementsov-Ogievskiy         /*
52405d5e12bSPaolo Bonzini          * In case of failed copy_range request above, we may proceed with
52505d5e12bSPaolo Bonzini          * buffered request larger than BLOCK_COPY_MAX_BUFFER.
52605d5e12bSPaolo Bonzini          * Still, further requests will be properly limited, so don't care too
52705d5e12bSPaolo Bonzini          * much. Moreover the most likely case (copy_range is unsupported for
52805d5e12bSPaolo Bonzini          * the configuration, so the very first copy_range request fails)
52905d5e12bSPaolo Bonzini          * is handled by setting large copy_size only after first successful
53005d5e12bSPaolo Bonzini          * copy_range.
5310e240245SVladimir Sementsov-Ogievskiy          */
5320e240245SVladimir Sementsov-Ogievskiy 
533e332a726SVladimir Sementsov-Ogievskiy         bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
534beb5f545SVladimir Sementsov-Ogievskiy 
5358719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
536beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
5378719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_read_fail(s, offset, ret);
538beb5f545SVladimir Sementsov-Ogievskiy             *error_is_read = true;
539e332a726SVladimir Sementsov-Ogievskiy             goto out;
540beb5f545SVladimir Sementsov-Ogievskiy         }
541beb5f545SVladimir Sementsov-Ogievskiy 
5428719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
543beb5f545SVladimir Sementsov-Ogievskiy                              s->write_flags);
544beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
5458719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_write_fail(s, offset, ret);
546beb5f545SVladimir Sementsov-Ogievskiy             *error_is_read = false;
547e332a726SVladimir Sementsov-Ogievskiy             goto out;
548beb5f545SVladimir Sementsov-Ogievskiy         }
549beb5f545SVladimir Sementsov-Ogievskiy 
550e332a726SVladimir Sementsov-Ogievskiy     out:
5513816edd2SVladimir Sementsov-Ogievskiy         qemu_vfree(bounce_buffer);
55205d5e12bSPaolo Bonzini         break;
55305d5e12bSPaolo Bonzini 
55405d5e12bSPaolo Bonzini     default:
55505d5e12bSPaolo Bonzini         abort();
55605d5e12bSPaolo Bonzini     }
5573816edd2SVladimir Sementsov-Ogievskiy 
558beb5f545SVladimir Sementsov-Ogievskiy     return ret;
559beb5f545SVladimir Sementsov-Ogievskiy }
560beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_task_entry(AioTask * task)5614ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task)
5624ce5dd3eSVladimir Sementsov-Ogievskiy {
5634ce5dd3eSVladimir Sementsov-Ogievskiy     BlockCopyTask *t = container_of(task, BlockCopyTask, task);
564c6a3e3dfSEmanuele Giuseppe Esposito     BlockCopyState *s = t->s;
565c78dd00eSPhilippe Mathieu-Daudé     bool error_is_read = false;
56605d5e12bSPaolo Bonzini     BlockCopyMethod method = t->method;
5674ce5dd3eSVladimir Sementsov-Ogievskiy     int ret;
5684ce5dd3eSVladimir Sementsov-Ogievskiy 
569abaf8b75SKevin Wolf     WITH_GRAPH_RDLOCK_GUARD() {
570d088e6a4SVladimir Sementsov-Ogievskiy         ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
571d088e6a4SVladimir Sementsov-Ogievskiy                                  &error_is_read);
572abaf8b75SKevin Wolf     }
573d0c389d2SEmanuele Giuseppe Esposito 
574d0c389d2SEmanuele Giuseppe Esposito     WITH_QEMU_LOCK_GUARD(&s->lock) {
57505d5e12bSPaolo Bonzini         if (s->method == t->method) {
57605d5e12bSPaolo Bonzini             s->method = method;
577bed95234SVladimir Sementsov-Ogievskiy         }
578d0c389d2SEmanuele Giuseppe Esposito 
5798146b357SVladimir Sementsov-Ogievskiy         if (ret < 0) {
5808146b357SVladimir Sementsov-Ogievskiy             if (!t->call_state->ret) {
581de4641b4SVladimir Sementsov-Ogievskiy                 t->call_state->ret = ret;
5824ce5dd3eSVladimir Sementsov-Ogievskiy                 t->call_state->error_is_read = error_is_read;
5838146b357SVladimir Sementsov-Ogievskiy             }
584201b4bb6SVladimir Sementsov-Ogievskiy         } else if (s->progress) {
585d088e6a4SVladimir Sementsov-Ogievskiy             progress_work_done(s->progress, t->req.bytes);
586d51590fcSVladimir Sementsov-Ogievskiy         }
587d0c389d2SEmanuele Giuseppe Esposito     }
588d088e6a4SVladimir Sementsov-Ogievskiy     co_put_to_shres(s->mem, t->req.bytes);
5894ce5dd3eSVladimir Sementsov-Ogievskiy     block_copy_task_end(t, ret);
5904ce5dd3eSVladimir Sementsov-Ogievskiy 
5914ce5dd3eSVladimir Sementsov-Ogievskiy     return ret;
5924ce5dd3eSVladimir Sementsov-Ogievskiy }
5934ce5dd3eSVladimir Sementsov-Ogievskiy 
5947ff9579eSKevin Wolf static coroutine_fn GRAPH_RDLOCK
block_copy_block_status(BlockCopyState * s,int64_t offset,int64_t bytes,int64_t * pnum)5957ff9579eSKevin Wolf int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
5967ff9579eSKevin Wolf                             int64_t *pnum)
5972d57511aSVladimir Sementsov-Ogievskiy {
5982d57511aSVladimir Sementsov-Ogievskiy     int64_t num;
5992d57511aSVladimir Sementsov-Ogievskiy     BlockDriverState *base;
6002d57511aSVladimir Sementsov-Ogievskiy     int ret;
6012d57511aSVladimir Sementsov-Ogievskiy 
602d0c389d2SEmanuele Giuseppe Esposito     if (qatomic_read(&s->skip_unallocated)) {
603c6f6d846SMax Reitz         base = bdrv_backing_chain_next(s->source->bs);
6042d57511aSVladimir Sementsov-Ogievskiy     } else {
6052d57511aSVladimir Sementsov-Ogievskiy         base = NULL;
6062d57511aSVladimir Sementsov-Ogievskiy     }
6072d57511aSVladimir Sementsov-Ogievskiy 
60843a0d4f0SEmanuele Giuseppe Esposito     ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
6092d57511aSVladimir Sementsov-Ogievskiy                                      NULL, NULL);
6102d57511aSVladimir Sementsov-Ogievskiy     if (ret < 0 || num < s->cluster_size) {
6112d57511aSVladimir Sementsov-Ogievskiy         /*
6122d57511aSVladimir Sementsov-Ogievskiy          * On error or if failed to obtain large enough chunk just fallback to
6132d57511aSVladimir Sementsov-Ogievskiy          * copy one cluster.
6142d57511aSVladimir Sementsov-Ogievskiy          */
6152d57511aSVladimir Sementsov-Ogievskiy         num = s->cluster_size;
6162d57511aSVladimir Sementsov-Ogievskiy         ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
6172d57511aSVladimir Sementsov-Ogievskiy     } else if (offset + num == s->len) {
6182d57511aSVladimir Sementsov-Ogievskiy         num = QEMU_ALIGN_UP(num, s->cluster_size);
6192d57511aSVladimir Sementsov-Ogievskiy     } else {
6202d57511aSVladimir Sementsov-Ogievskiy         num = QEMU_ALIGN_DOWN(num, s->cluster_size);
6212d57511aSVladimir Sementsov-Ogievskiy     }
6222d57511aSVladimir Sementsov-Ogievskiy 
6232d57511aSVladimir Sementsov-Ogievskiy     *pnum = num;
6242d57511aSVladimir Sementsov-Ogievskiy     return ret;
6252d57511aSVladimir Sementsov-Ogievskiy }
6262d57511aSVladimir Sementsov-Ogievskiy 
627beb5f545SVladimir Sementsov-Ogievskiy /*
628beb5f545SVladimir Sementsov-Ogievskiy  * Check if the cluster starting at offset is allocated or not.
629beb5f545SVladimir Sementsov-Ogievskiy  * return via pnum the number of contiguous clusters sharing this allocation.
630beb5f545SVladimir Sementsov-Ogievskiy  */
6317ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_is_cluster_allocated(BlockCopyState * s,int64_t offset,int64_t * pnum)6327ff9579eSKevin Wolf block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
633beb5f545SVladimir Sementsov-Ogievskiy                                 int64_t *pnum)
634beb5f545SVladimir Sementsov-Ogievskiy {
63500e30f05SVladimir Sementsov-Ogievskiy     BlockDriverState *bs = s->source->bs;
636beb5f545SVladimir Sementsov-Ogievskiy     int64_t count, total_count = 0;
637beb5f545SVladimir Sementsov-Ogievskiy     int64_t bytes = s->len - offset;
638beb5f545SVladimir Sementsov-Ogievskiy     int ret;
639beb5f545SVladimir Sementsov-Ogievskiy 
640beb5f545SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
641beb5f545SVladimir Sementsov-Ogievskiy 
642beb5f545SVladimir Sementsov-Ogievskiy     while (true) {
6437ff9579eSKevin Wolf         /* protected in backup_run() */
64443a0d4f0SEmanuele Giuseppe Esposito         ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
645beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
646beb5f545SVladimir Sementsov-Ogievskiy             return ret;
647beb5f545SVladimir Sementsov-Ogievskiy         }
648beb5f545SVladimir Sementsov-Ogievskiy 
649beb5f545SVladimir Sementsov-Ogievskiy         total_count += count;
650beb5f545SVladimir Sementsov-Ogievskiy 
651beb5f545SVladimir Sementsov-Ogievskiy         if (ret || count == 0) {
652beb5f545SVladimir Sementsov-Ogievskiy             /*
653beb5f545SVladimir Sementsov-Ogievskiy              * ret: partial segment(s) are considered allocated.
654beb5f545SVladimir Sementsov-Ogievskiy              * otherwise: unallocated tail is treated as an entire segment.
655beb5f545SVladimir Sementsov-Ogievskiy              */
656beb5f545SVladimir Sementsov-Ogievskiy             *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
657beb5f545SVladimir Sementsov-Ogievskiy             return ret;
658beb5f545SVladimir Sementsov-Ogievskiy         }
659beb5f545SVladimir Sementsov-Ogievskiy 
660beb5f545SVladimir Sementsov-Ogievskiy         /* Unallocated segment(s) with uncertain following segment(s) */
661beb5f545SVladimir Sementsov-Ogievskiy         if (total_count >= s->cluster_size) {
662beb5f545SVladimir Sementsov-Ogievskiy             *pnum = total_count / s->cluster_size;
663beb5f545SVladimir Sementsov-Ogievskiy             return 0;
664beb5f545SVladimir Sementsov-Ogievskiy         }
665beb5f545SVladimir Sementsov-Ogievskiy 
666beb5f545SVladimir Sementsov-Ogievskiy         offset += count;
667beb5f545SVladimir Sementsov-Ogievskiy         bytes -= count;
668beb5f545SVladimir Sementsov-Ogievskiy     }
669beb5f545SVladimir Sementsov-Ogievskiy }
670beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_reset(BlockCopyState * s,int64_t offset,int64_t bytes)671177541e6SVladimir Sementsov-Ogievskiy void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
672177541e6SVladimir Sementsov-Ogievskiy {
673177541e6SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
674177541e6SVladimir Sementsov-Ogievskiy 
675177541e6SVladimir Sementsov-Ogievskiy     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
676177541e6SVladimir Sementsov-Ogievskiy     if (s->progress) {
677177541e6SVladimir Sementsov-Ogievskiy         progress_set_remaining(s->progress,
678177541e6SVladimir Sementsov-Ogievskiy                                bdrv_get_dirty_count(s->copy_bitmap) +
679177541e6SVladimir Sementsov-Ogievskiy                                s->in_flight_bytes);
680177541e6SVladimir Sementsov-Ogievskiy     }
681177541e6SVladimir Sementsov-Ogievskiy }
682177541e6SVladimir Sementsov-Ogievskiy 
683beb5f545SVladimir Sementsov-Ogievskiy /*
684beb5f545SVladimir Sementsov-Ogievskiy  * Reset bits in copy_bitmap starting at offset if they represent unallocated
685beb5f545SVladimir Sementsov-Ogievskiy  * data in the image. May reset subsequent contiguous bits.
686beb5f545SVladimir Sementsov-Ogievskiy  * @return 0 when the cluster at @offset was unallocated,
687beb5f545SVladimir Sementsov-Ogievskiy  *         1 otherwise, and -ret on error.
688beb5f545SVladimir Sementsov-Ogievskiy  */
block_copy_reset_unallocated(BlockCopyState * s,int64_t offset,int64_t * count)68943a0d4f0SEmanuele Giuseppe Esposito int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
69043a0d4f0SEmanuele Giuseppe Esposito                                                   int64_t offset,
69143a0d4f0SEmanuele Giuseppe Esposito                                                   int64_t *count)
692beb5f545SVladimir Sementsov-Ogievskiy {
693beb5f545SVladimir Sementsov-Ogievskiy     int ret;
694beb5f545SVladimir Sementsov-Ogievskiy     int64_t clusters, bytes;
695beb5f545SVladimir Sementsov-Ogievskiy 
696beb5f545SVladimir Sementsov-Ogievskiy     ret = block_copy_is_cluster_allocated(s, offset, &clusters);
697beb5f545SVladimir Sementsov-Ogievskiy     if (ret < 0) {
698beb5f545SVladimir Sementsov-Ogievskiy         return ret;
699beb5f545SVladimir Sementsov-Ogievskiy     }
700beb5f545SVladimir Sementsov-Ogievskiy 
701beb5f545SVladimir Sementsov-Ogievskiy     bytes = clusters * s->cluster_size;
702beb5f545SVladimir Sementsov-Ogievskiy 
703beb5f545SVladimir Sementsov-Ogievskiy     if (!ret) {
704177541e6SVladimir Sementsov-Ogievskiy         block_copy_reset(s, offset, bytes);
705beb5f545SVladimir Sementsov-Ogievskiy     }
706beb5f545SVladimir Sementsov-Ogievskiy 
707beb5f545SVladimir Sementsov-Ogievskiy     *count = bytes;
708beb5f545SVladimir Sementsov-Ogievskiy     return ret;
709beb5f545SVladimir Sementsov-Ogievskiy }
710beb5f545SVladimir Sementsov-Ogievskiy 
7115332e5d2SVladimir Sementsov-Ogievskiy /*
7125332e5d2SVladimir Sementsov-Ogievskiy  * block_copy_dirty_clusters
7135332e5d2SVladimir Sementsov-Ogievskiy  *
7145332e5d2SVladimir Sementsov-Ogievskiy  * Copy dirty clusters in @offset/@bytes range.
7155332e5d2SVladimir Sementsov-Ogievskiy  * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
7165332e5d2SVladimir Sementsov-Ogievskiy  * clusters found and -errno on failure.
7175332e5d2SVladimir Sementsov-Ogievskiy  */
7187ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_dirty_clusters(BlockCopyCallState * call_state)7193b8c2329SVladimir Sementsov-Ogievskiy block_copy_dirty_clusters(BlockCopyCallState *call_state)
720beb5f545SVladimir Sementsov-Ogievskiy {
7213b8c2329SVladimir Sementsov-Ogievskiy     BlockCopyState *s = call_state->s;
7223b8c2329SVladimir Sementsov-Ogievskiy     int64_t offset = call_state->offset;
7233b8c2329SVladimir Sementsov-Ogievskiy     int64_t bytes = call_state->bytes;
7243b8c2329SVladimir Sementsov-Ogievskiy 
725beb5f545SVladimir Sementsov-Ogievskiy     int ret = 0;
7265332e5d2SVladimir Sementsov-Ogievskiy     bool found_dirty = false;
72742ac2144SVladimir Sementsov-Ogievskiy     int64_t end = offset + bytes;
7284ce5dd3eSVladimir Sementsov-Ogievskiy     AioTaskPool *aio = NULL;
729beb5f545SVladimir Sementsov-Ogievskiy 
730beb5f545SVladimir Sementsov-Ogievskiy     /*
731beb5f545SVladimir Sementsov-Ogievskiy      * block_copy() user is responsible for keeping source and target in same
732beb5f545SVladimir Sementsov-Ogievskiy      * aio context
733beb5f545SVladimir Sementsov-Ogievskiy      */
73400e30f05SVladimir Sementsov-Ogievskiy     assert(bdrv_get_aio_context(s->source->bs) ==
73500e30f05SVladimir Sementsov-Ogievskiy            bdrv_get_aio_context(s->target->bs));
736beb5f545SVladimir Sementsov-Ogievskiy 
7378719091fSVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
738dafaf135SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
739beb5f545SVladimir Sementsov-Ogievskiy 
740149009beSEmanuele Giuseppe Esposito     while (bytes && aio_task_pool_status(aio) == 0 &&
741149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled)) {
7424ce5dd3eSVladimir Sementsov-Ogievskiy         BlockCopyTask *task;
74342ac2144SVladimir Sementsov-Ogievskiy         int64_t status_bytes;
744beb5f545SVladimir Sementsov-Ogievskiy 
7453b8c2329SVladimir Sementsov-Ogievskiy         task = block_copy_task_create(s, call_state, offset, bytes);
74642ac2144SVladimir Sementsov-Ogievskiy         if (!task) {
74742ac2144SVladimir Sementsov-Ogievskiy             /* No more dirty bits in the bitmap */
74842ac2144SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, offset, bytes);
74942ac2144SVladimir Sementsov-Ogievskiy             break;
75042ac2144SVladimir Sementsov-Ogievskiy         }
751d088e6a4SVladimir Sementsov-Ogievskiy         if (task->req.offset > offset) {
752d088e6a4SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, offset, task->req.offset - offset);
753beb5f545SVladimir Sementsov-Ogievskiy         }
754beb5f545SVladimir Sementsov-Ogievskiy 
7555332e5d2SVladimir Sementsov-Ogievskiy         found_dirty = true;
7565332e5d2SVladimir Sementsov-Ogievskiy 
757d088e6a4SVladimir Sementsov-Ogievskiy         ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
75842ac2144SVladimir Sementsov-Ogievskiy                                       &status_bytes);
7595332e5d2SVladimir Sementsov-Ogievskiy         assert(ret >= 0); /* never fail */
760d088e6a4SVladimir Sementsov-Ogievskiy         if (status_bytes < task->req.bytes) {
76142ac2144SVladimir Sementsov-Ogievskiy             block_copy_task_shrink(task, status_bytes);
76242ac2144SVladimir Sementsov-Ogievskiy         }
763d0c389d2SEmanuele Giuseppe Esposito         if (qatomic_read(&s->skip_unallocated) &&
764d0c389d2SEmanuele Giuseppe Esposito             !(ret & BDRV_BLOCK_ALLOCATED)) {
7651348a657SVladimir Sementsov-Ogievskiy             block_copy_task_end(task, 0);
766d088e6a4SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
76742ac2144SVladimir Sementsov-Ogievskiy             offset = task_end(task);
76842ac2144SVladimir Sementsov-Ogievskiy             bytes = end - offset;
769fc9aefc8SVladimir Sementsov-Ogievskiy             g_free(task);
770beb5f545SVladimir Sementsov-Ogievskiy             continue;
771beb5f545SVladimir Sementsov-Ogievskiy         }
772bed95234SVladimir Sementsov-Ogievskiy         if (ret & BDRV_BLOCK_ZERO) {
77305d5e12bSPaolo Bonzini             task->method = COPY_WRITE_ZEROES;
774bed95234SVladimir Sementsov-Ogievskiy         }
7752d57511aSVladimir Sementsov-Ogievskiy 
7767e032df0SVladimir Sementsov-Ogievskiy         if (!call_state->ignore_ratelimit) {
7777e032df0SVladimir Sementsov-Ogievskiy             uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
7787e032df0SVladimir Sementsov-Ogievskiy             if (ns > 0) {
7797e032df0SVladimir Sementsov-Ogievskiy                 block_copy_task_end(task, -EAGAIN);
7807e032df0SVladimir Sementsov-Ogievskiy                 g_free(task);
78129a6ea24SPaolo Bonzini                 qemu_co_sleep_ns_wakeable(&call_state->sleep,
78229a6ea24SPaolo Bonzini                                           QEMU_CLOCK_REALTIME, ns);
7837e032df0SVladimir Sementsov-Ogievskiy                 continue;
7847e032df0SVladimir Sementsov-Ogievskiy             }
7857e032df0SVladimir Sementsov-Ogievskiy         }
7867e032df0SVladimir Sementsov-Ogievskiy 
787d088e6a4SVladimir Sementsov-Ogievskiy         ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
7887e032df0SVladimir Sementsov-Ogievskiy 
789d088e6a4SVladimir Sementsov-Ogievskiy         trace_block_copy_process(s, task->req.offset);
790beb5f545SVladimir Sementsov-Ogievskiy 
791d088e6a4SVladimir Sementsov-Ogievskiy         co_get_from_shres(s->mem, task->req.bytes);
792beb5f545SVladimir Sementsov-Ogievskiy 
79342ac2144SVladimir Sementsov-Ogievskiy         offset = task_end(task);
79442ac2144SVladimir Sementsov-Ogievskiy         bytes = end - offset;
7954ce5dd3eSVladimir Sementsov-Ogievskiy 
7964ce5dd3eSVladimir Sementsov-Ogievskiy         if (!aio && bytes) {
79726be9d62SVladimir Sementsov-Ogievskiy             aio = aio_task_pool_new(call_state->max_workers);
798beb5f545SVladimir Sementsov-Ogievskiy         }
799beb5f545SVladimir Sementsov-Ogievskiy 
8004ce5dd3eSVladimir Sementsov-Ogievskiy         ret = block_copy_task_run(aio, task);
8014ce5dd3eSVladimir Sementsov-Ogievskiy         if (ret < 0) {
8024ce5dd3eSVladimir Sementsov-Ogievskiy             goto out;
8034ce5dd3eSVladimir Sementsov-Ogievskiy         }
8044ce5dd3eSVladimir Sementsov-Ogievskiy     }
8054ce5dd3eSVladimir Sementsov-Ogievskiy 
8064ce5dd3eSVladimir Sementsov-Ogievskiy out:
8074ce5dd3eSVladimir Sementsov-Ogievskiy     if (aio) {
8084ce5dd3eSVladimir Sementsov-Ogievskiy         aio_task_pool_wait_all(aio);
8094ce5dd3eSVladimir Sementsov-Ogievskiy 
8104ce5dd3eSVladimir Sementsov-Ogievskiy         /*
8114ce5dd3eSVladimir Sementsov-Ogievskiy          * We are not really interested in -ECANCELED returned from
8124ce5dd3eSVladimir Sementsov-Ogievskiy          * block_copy_task_run. If it fails, it means some task already failed
8134ce5dd3eSVladimir Sementsov-Ogievskiy          * for real reason, let's return first failure.
8144ce5dd3eSVladimir Sementsov-Ogievskiy          * Still, assert that we don't rewrite failure by success.
815e8de7ba9SVladimir Sementsov-Ogievskiy          *
816e8de7ba9SVladimir Sementsov-Ogievskiy          * Note: ret may be positive here because of block-status result.
8174ce5dd3eSVladimir Sementsov-Ogievskiy          */
818e8de7ba9SVladimir Sementsov-Ogievskiy         assert(ret >= 0 || aio_task_pool_status(aio) < 0);
8194ce5dd3eSVladimir Sementsov-Ogievskiy         ret = aio_task_pool_status(aio);
8204ce5dd3eSVladimir Sementsov-Ogievskiy 
8214ce5dd3eSVladimir Sementsov-Ogievskiy         aio_task_pool_free(aio);
8224ce5dd3eSVladimir Sementsov-Ogievskiy     }
8234ce5dd3eSVladimir Sementsov-Ogievskiy 
8244ce5dd3eSVladimir Sementsov-Ogievskiy     return ret < 0 ? ret : found_dirty;
8255332e5d2SVladimir Sementsov-Ogievskiy }
8265332e5d2SVladimir Sementsov-Ogievskiy 
block_copy_kick(BlockCopyCallState * call_state)8277e032df0SVladimir Sementsov-Ogievskiy void block_copy_kick(BlockCopyCallState *call_state)
8287e032df0SVladimir Sementsov-Ogievskiy {
82929a6ea24SPaolo Bonzini     qemu_co_sleep_wake(&call_state->sleep);
8307e032df0SVladimir Sementsov-Ogievskiy }
8317e032df0SVladimir Sementsov-Ogievskiy 
8325332e5d2SVladimir Sementsov-Ogievskiy /*
8333b8c2329SVladimir Sementsov-Ogievskiy  * block_copy_common
8345332e5d2SVladimir Sementsov-Ogievskiy  *
8355332e5d2SVladimir Sementsov-Ogievskiy  * Copy requested region, accordingly to dirty bitmap.
8365332e5d2SVladimir Sementsov-Ogievskiy  * Collaborate with parallel block_copy requests: if they succeed it will help
8375332e5d2SVladimir Sementsov-Ogievskiy  * us. If they fail, we will retry not-copied regions. So, if we return error,
8385332e5d2SVladimir Sementsov-Ogievskiy  * it means that some I/O operation failed in context of _this_ block_copy call,
8395332e5d2SVladimir Sementsov-Ogievskiy  * not some parallel operation.
8405332e5d2SVladimir Sementsov-Ogievskiy  */
8417ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_common(BlockCopyCallState * call_state)8427ff9579eSKevin Wolf block_copy_common(BlockCopyCallState *call_state)
8435332e5d2SVladimir Sementsov-Ogievskiy {
8445332e5d2SVladimir Sementsov-Ogievskiy     int ret;
845c6a3e3dfSEmanuele Giuseppe Esposito     BlockCopyState *s = call_state->s;
8465332e5d2SVladimir Sementsov-Ogievskiy 
847d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_lock(&s->lock);
848c6a3e3dfSEmanuele Giuseppe Esposito     QLIST_INSERT_HEAD(&s->calls, call_state, list);
849d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_unlock(&s->lock);
8502e099a9dSVladimir Sementsov-Ogievskiy 
8515332e5d2SVladimir Sementsov-Ogievskiy     do {
8523b8c2329SVladimir Sementsov-Ogievskiy         ret = block_copy_dirty_clusters(call_state);
8535332e5d2SVladimir Sementsov-Ogievskiy 
854149009beSEmanuele Giuseppe Esposito         if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
855d0c389d2SEmanuele Giuseppe Esposito             WITH_QEMU_LOCK_GUARD(&s->lock) {
856d0c389d2SEmanuele Giuseppe Esposito                 /*
857d0c389d2SEmanuele Giuseppe Esposito                  * Check that there is no task we still need to
858d0c389d2SEmanuele Giuseppe Esposito                  * wait to complete
859d0c389d2SEmanuele Giuseppe Esposito                  */
860d088e6a4SVladimir Sementsov-Ogievskiy                 ret = reqlist_wait_one(&s->reqs, call_state->offset,
861d088e6a4SVladimir Sementsov-Ogievskiy                                        call_state->bytes, &s->lock);
862d0c389d2SEmanuele Giuseppe Esposito                 if (ret == 0) {
863d0c389d2SEmanuele Giuseppe Esposito                     /*
864d0c389d2SEmanuele Giuseppe Esposito                      * No pending tasks, but check again the bitmap in this
865d0c389d2SEmanuele Giuseppe Esposito                      * same critical section, since a task might have failed
866d0c389d2SEmanuele Giuseppe Esposito                      * between this and the critical section in
867d0c389d2SEmanuele Giuseppe Esposito                      * block_copy_dirty_clusters().
868d0c389d2SEmanuele Giuseppe Esposito                      *
869d088e6a4SVladimir Sementsov-Ogievskiy                      * reqlist_wait_one return value 0 also means that it
870d0c389d2SEmanuele Giuseppe Esposito                      * didn't release the lock. So, we are still in the same
871d0c389d2SEmanuele Giuseppe Esposito                      * critical section, not interrupted by any concurrent
872d0c389d2SEmanuele Giuseppe Esposito                      * access to state.
873d0c389d2SEmanuele Giuseppe Esposito                      */
874d0c389d2SEmanuele Giuseppe Esposito                     ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
875d0c389d2SEmanuele Giuseppe Esposito                                                        call_state->offset,
876d0c389d2SEmanuele Giuseppe Esposito                                                        call_state->bytes) >= 0;
877d0c389d2SEmanuele Giuseppe Esposito                 }
878d0c389d2SEmanuele Giuseppe Esposito             }
8795332e5d2SVladimir Sementsov-Ogievskiy         }
8805332e5d2SVladimir Sementsov-Ogievskiy 
8815332e5d2SVladimir Sementsov-Ogievskiy         /*
8825332e5d2SVladimir Sementsov-Ogievskiy          * We retry in two cases:
8835332e5d2SVladimir Sementsov-Ogievskiy          * 1. Some progress done
8845332e5d2SVladimir Sementsov-Ogievskiy          *    Something was copied, which means that there were yield points
8855332e5d2SVladimir Sementsov-Ogievskiy          *    and some new dirty bits may have appeared (due to failed parallel
8865332e5d2SVladimir Sementsov-Ogievskiy          *    block-copy requests).
8875332e5d2SVladimir Sementsov-Ogievskiy          * 2. We have waited for some intersecting block-copy request
8885332e5d2SVladimir Sementsov-Ogievskiy          *    It may have failed and produced new dirty bits.
8895332e5d2SVladimir Sementsov-Ogievskiy          */
890149009beSEmanuele Giuseppe Esposito     } while (ret > 0 && !qatomic_read(&call_state->cancelled));
891a6ffe199SVladimir Sementsov-Ogievskiy 
892149009beSEmanuele Giuseppe Esposito     qatomic_store_release(&call_state->finished, true);
893de4641b4SVladimir Sementsov-Ogievskiy 
894de4641b4SVladimir Sementsov-Ogievskiy     if (call_state->cb) {
895de4641b4SVladimir Sementsov-Ogievskiy         call_state->cb(call_state->cb_opaque);
896de4641b4SVladimir Sementsov-Ogievskiy     }
897de4641b4SVladimir Sementsov-Ogievskiy 
898d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_lock(&s->lock);
8992e099a9dSVladimir Sementsov-Ogievskiy     QLIST_REMOVE(call_state, list);
900d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_unlock(&s->lock);
9012e099a9dSVladimir Sementsov-Ogievskiy 
902beb5f545SVladimir Sementsov-Ogievskiy     return ret;
903beb5f545SVladimir Sementsov-Ogievskiy }
904397f4e9dSVladimir Sementsov-Ogievskiy 
block_copy_async_co_entry(void * opaque)90515df6e69SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_async_co_entry(void *opaque)
9063b8c2329SVladimir Sementsov-Ogievskiy {
9077ff9579eSKevin Wolf     GRAPH_RDLOCK_GUARD();
90815df6e69SVladimir Sementsov-Ogievskiy     block_copy_common(opaque);
90915df6e69SVladimir Sementsov-Ogievskiy }
91015df6e69SVladimir Sementsov-Ogievskiy 
block_copy(BlockCopyState * s,int64_t start,int64_t bytes,bool ignore_ratelimit,uint64_t timeout_ns,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)91115df6e69SVladimir Sementsov-Ogievskiy int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
91215df6e69SVladimir Sementsov-Ogievskiy                             bool ignore_ratelimit, uint64_t timeout_ns,
91315df6e69SVladimir Sementsov-Ogievskiy                             BlockCopyAsyncCallbackFunc cb,
91415df6e69SVladimir Sementsov-Ogievskiy                             void *cb_opaque)
91515df6e69SVladimir Sementsov-Ogievskiy {
91615df6e69SVladimir Sementsov-Ogievskiy     int ret;
91715df6e69SVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
91815df6e69SVladimir Sementsov-Ogievskiy 
91915df6e69SVladimir Sementsov-Ogievskiy     *call_state = (BlockCopyCallState) {
9203b8c2329SVladimir Sementsov-Ogievskiy         .s = s,
9213b8c2329SVladimir Sementsov-Ogievskiy         .offset = start,
9223b8c2329SVladimir Sementsov-Ogievskiy         .bytes = bytes,
9237e032df0SVladimir Sementsov-Ogievskiy         .ignore_ratelimit = ignore_ratelimit,
92426be9d62SVladimir Sementsov-Ogievskiy         .max_workers = BLOCK_COPY_MAX_WORKERS,
92515df6e69SVladimir Sementsov-Ogievskiy         .cb = cb,
92615df6e69SVladimir Sementsov-Ogievskiy         .cb_opaque = cb_opaque,
9273b8c2329SVladimir Sementsov-Ogievskiy     };
9283b8c2329SVladimir Sementsov-Ogievskiy 
92915df6e69SVladimir Sementsov-Ogievskiy     ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
93015df6e69SVladimir Sementsov-Ogievskiy                           g_free);
93115df6e69SVladimir Sementsov-Ogievskiy     if (ret < 0) {
93215df6e69SVladimir Sementsov-Ogievskiy         assert(ret == -ETIMEDOUT);
93315df6e69SVladimir Sementsov-Ogievskiy         block_copy_call_cancel(call_state);
93415df6e69SVladimir Sementsov-Ogievskiy         /* call_state will be freed by running coroutine. */
93515df6e69SVladimir Sementsov-Ogievskiy         return ret;
9363b8c2329SVladimir Sementsov-Ogievskiy     }
9373b8c2329SVladimir Sementsov-Ogievskiy 
93815df6e69SVladimir Sementsov-Ogievskiy     ret = call_state->ret;
93915df6e69SVladimir Sementsov-Ogievskiy     g_free(call_state);
94015df6e69SVladimir Sementsov-Ogievskiy 
94115df6e69SVladimir Sementsov-Ogievskiy     return ret;
942de4641b4SVladimir Sementsov-Ogievskiy }
943de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_async(BlockCopyState * s,int64_t offset,int64_t bytes,int max_workers,int64_t max_chunk,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)944de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *block_copy_async(BlockCopyState *s,
945de4641b4SVladimir Sementsov-Ogievskiy                                      int64_t offset, int64_t bytes,
94626be9d62SVladimir Sementsov-Ogievskiy                                      int max_workers, int64_t max_chunk,
947de4641b4SVladimir Sementsov-Ogievskiy                                      BlockCopyAsyncCallbackFunc cb,
948de4641b4SVladimir Sementsov-Ogievskiy                                      void *cb_opaque)
949de4641b4SVladimir Sementsov-Ogievskiy {
950de4641b4SVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
951de4641b4SVladimir Sementsov-Ogievskiy 
952de4641b4SVladimir Sementsov-Ogievskiy     *call_state = (BlockCopyCallState) {
953de4641b4SVladimir Sementsov-Ogievskiy         .s = s,
954de4641b4SVladimir Sementsov-Ogievskiy         .offset = offset,
955de4641b4SVladimir Sementsov-Ogievskiy         .bytes = bytes,
95626be9d62SVladimir Sementsov-Ogievskiy         .max_workers = max_workers,
95726be9d62SVladimir Sementsov-Ogievskiy         .max_chunk = max_chunk,
958de4641b4SVladimir Sementsov-Ogievskiy         .cb = cb,
959de4641b4SVladimir Sementsov-Ogievskiy         .cb_opaque = cb_opaque,
960de4641b4SVladimir Sementsov-Ogievskiy 
961de4641b4SVladimir Sementsov-Ogievskiy         .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
962de4641b4SVladimir Sementsov-Ogievskiy     };
963de4641b4SVladimir Sementsov-Ogievskiy 
964de4641b4SVladimir Sementsov-Ogievskiy     qemu_coroutine_enter(call_state->co);
965de4641b4SVladimir Sementsov-Ogievskiy 
966de4641b4SVladimir Sementsov-Ogievskiy     return call_state;
967de4641b4SVladimir Sementsov-Ogievskiy }
968de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_free(BlockCopyCallState * call_state)969de4641b4SVladimir Sementsov-Ogievskiy void block_copy_call_free(BlockCopyCallState *call_state)
970de4641b4SVladimir Sementsov-Ogievskiy {
971de4641b4SVladimir Sementsov-Ogievskiy     if (!call_state) {
972de4641b4SVladimir Sementsov-Ogievskiy         return;
973de4641b4SVladimir Sementsov-Ogievskiy     }
974de4641b4SVladimir Sementsov-Ogievskiy 
975149009beSEmanuele Giuseppe Esposito     assert(qatomic_read(&call_state->finished));
976de4641b4SVladimir Sementsov-Ogievskiy     g_free(call_state);
977de4641b4SVladimir Sementsov-Ogievskiy }
978de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_finished(BlockCopyCallState * call_state)979de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_finished(BlockCopyCallState *call_state)
980de4641b4SVladimir Sementsov-Ogievskiy {
981149009beSEmanuele Giuseppe Esposito     return qatomic_read(&call_state->finished);
982de4641b4SVladimir Sementsov-Ogievskiy }
983de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_succeeded(BlockCopyCallState * call_state)984de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_succeeded(BlockCopyCallState *call_state)
985de4641b4SVladimir Sementsov-Ogievskiy {
986149009beSEmanuele Giuseppe Esposito     return qatomic_load_acquire(&call_state->finished) &&
987149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled) &&
988a6d23d56SVladimir Sementsov-Ogievskiy            call_state->ret == 0;
989de4641b4SVladimir Sementsov-Ogievskiy }
990de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_failed(BlockCopyCallState * call_state)991de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_failed(BlockCopyCallState *call_state)
992de4641b4SVladimir Sementsov-Ogievskiy {
993149009beSEmanuele Giuseppe Esposito     return qatomic_load_acquire(&call_state->finished) &&
994149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled) &&
995a6d23d56SVladimir Sementsov-Ogievskiy            call_state->ret < 0;
996a6d23d56SVladimir Sementsov-Ogievskiy }
997a6d23d56SVladimir Sementsov-Ogievskiy 
block_copy_call_cancelled(BlockCopyCallState * call_state)998a6d23d56SVladimir Sementsov-Ogievskiy bool block_copy_call_cancelled(BlockCopyCallState *call_state)
999a6d23d56SVladimir Sementsov-Ogievskiy {
1000149009beSEmanuele Giuseppe Esposito     return qatomic_read(&call_state->cancelled);
1001de4641b4SVladimir Sementsov-Ogievskiy }
1002de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_status(BlockCopyCallState * call_state,bool * error_is_read)1003de4641b4SVladimir Sementsov-Ogievskiy int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
1004de4641b4SVladimir Sementsov-Ogievskiy {
1005149009beSEmanuele Giuseppe Esposito     assert(qatomic_load_acquire(&call_state->finished));
1006de4641b4SVladimir Sementsov-Ogievskiy     if (error_is_read) {
1007de4641b4SVladimir Sementsov-Ogievskiy         *error_is_read = call_state->error_is_read;
1008de4641b4SVladimir Sementsov-Ogievskiy     }
1009de4641b4SVladimir Sementsov-Ogievskiy     return call_state->ret;
1010de4641b4SVladimir Sementsov-Ogievskiy }
1011de4641b4SVladimir Sementsov-Ogievskiy 
1012149009beSEmanuele Giuseppe Esposito /*
1013149009beSEmanuele Giuseppe Esposito  * Note that cancelling and finishing are racy.
1014149009beSEmanuele Giuseppe Esposito  * User can cancel a block-copy that is already finished.
1015149009beSEmanuele Giuseppe Esposito  */
block_copy_call_cancel(BlockCopyCallState * call_state)1016a6d23d56SVladimir Sementsov-Ogievskiy void block_copy_call_cancel(BlockCopyCallState *call_state)
1017a6d23d56SVladimir Sementsov-Ogievskiy {
1018149009beSEmanuele Giuseppe Esposito     qatomic_set(&call_state->cancelled, true);
1019a6d23d56SVladimir Sementsov-Ogievskiy     block_copy_kick(call_state);
1020a6d23d56SVladimir Sementsov-Ogievskiy }
1021a6d23d56SVladimir Sementsov-Ogievskiy 
block_copy_dirty_bitmap(BlockCopyState * s)1022397f4e9dSVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
1023397f4e9dSVladimir Sementsov-Ogievskiy {
1024397f4e9dSVladimir Sementsov-Ogievskiy     return s->copy_bitmap;
1025397f4e9dSVladimir Sementsov-Ogievskiy }
1026397f4e9dSVladimir Sementsov-Ogievskiy 
block_copy_cluster_size(BlockCopyState * s)1027b518e9e9SVladimir Sementsov-Ogievskiy int64_t block_copy_cluster_size(BlockCopyState *s)
1028b518e9e9SVladimir Sementsov-Ogievskiy {
1029b518e9e9SVladimir Sementsov-Ogievskiy     return s->cluster_size;
1030b518e9e9SVladimir Sementsov-Ogievskiy }
1031b518e9e9SVladimir Sementsov-Ogievskiy 
block_copy_set_skip_unallocated(BlockCopyState * s,bool skip)1032397f4e9dSVladimir Sementsov-Ogievskiy void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
1033397f4e9dSVladimir Sementsov-Ogievskiy {
1034d0c389d2SEmanuele Giuseppe Esposito     qatomic_set(&s->skip_unallocated, skip);
1035397f4e9dSVladimir Sementsov-Ogievskiy }
10367e032df0SVladimir Sementsov-Ogievskiy 
block_copy_set_speed(BlockCopyState * s,uint64_t speed)10377e032df0SVladimir Sementsov-Ogievskiy void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
10387e032df0SVladimir Sementsov-Ogievskiy {
10397e032df0SVladimir Sementsov-Ogievskiy     ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
10407e032df0SVladimir Sementsov-Ogievskiy 
10417e032df0SVladimir Sementsov-Ogievskiy     /*
10427e032df0SVladimir Sementsov-Ogievskiy      * Note: it's good to kick all call states from here, but it should be done
10437e032df0SVladimir Sementsov-Ogievskiy      * only from a coroutine, to not crash if s->calls list changed while
10447e032df0SVladimir Sementsov-Ogievskiy      * entering one call. So for now, the only user of this function kicks its
10457e032df0SVladimir Sementsov-Ogievskiy      * only one call_state by hand.
10467e032df0SVladimir Sementsov-Ogievskiy      */
10477e032df0SVladimir Sementsov-Ogievskiy }
1048