1beb5f545SVladimir Sementsov-Ogievskiy /*
2beb5f545SVladimir Sementsov-Ogievskiy * block_copy API
3beb5f545SVladimir Sementsov-Ogievskiy *
4beb5f545SVladimir Sementsov-Ogievskiy * Copyright (C) 2013 Proxmox Server Solutions
5beb5f545SVladimir Sementsov-Ogievskiy * Copyright (c) 2019 Virtuozzo International GmbH.
6beb5f545SVladimir Sementsov-Ogievskiy *
7beb5f545SVladimir Sementsov-Ogievskiy * Authors:
8beb5f545SVladimir Sementsov-Ogievskiy * Dietmar Maurer (dietmar@proxmox.com)
9beb5f545SVladimir Sementsov-Ogievskiy * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10beb5f545SVladimir Sementsov-Ogievskiy *
11beb5f545SVladimir Sementsov-Ogievskiy * This work is licensed under the terms of the GNU GPL, version 2 or later.
12beb5f545SVladimir Sementsov-Ogievskiy * See the COPYING file in the top-level directory.
13beb5f545SVladimir Sementsov-Ogievskiy */
14beb5f545SVladimir Sementsov-Ogievskiy
15beb5f545SVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
16beb5f545SVladimir Sementsov-Ogievskiy
17beb5f545SVladimir Sementsov-Ogievskiy #include "trace.h"
18beb5f545SVladimir Sementsov-Ogievskiy #include "qapi/error.h"
19beb5f545SVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
20e2c1c34fSMarkus Armbruster #include "block/block_int-io.h"
21e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
22d088e6a4SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
23beb5f545SVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
24b3b7036aSVladimir Sementsov-Ogievskiy #include "qemu/units.h"
25e2c1c34fSMarkus Armbruster #include "qemu/co-shared-resource.h"
264ce5dd3eSVladimir Sementsov-Ogievskiy #include "qemu/coroutine.h"
27e2c1c34fSMarkus Armbruster #include "qemu/ratelimit.h"
284ce5dd3eSVladimir Sementsov-Ogievskiy #include "block/aio_task.h"
29b518e9e9SVladimir Sementsov-Ogievskiy #include "qemu/error-report.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31b3b7036aSVladimir Sementsov-Ogievskiy
32b3b7036aSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
330e240245SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
347f739d0eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_MEM (128 * MiB)
354ce5dd3eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_WORKERS 64
367e032df0SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
37b518e9e9SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
384ce5dd3eSVladimir Sementsov-Ogievskiy
3905d5e12bSPaolo Bonzini typedef enum {
4005d5e12bSPaolo Bonzini COPY_READ_WRITE_CLUSTER,
4105d5e12bSPaolo Bonzini COPY_READ_WRITE,
4205d5e12bSPaolo Bonzini COPY_WRITE_ZEROES,
4305d5e12bSPaolo Bonzini COPY_RANGE_SMALL,
4405d5e12bSPaolo Bonzini COPY_RANGE_FULL
4505d5e12bSPaolo Bonzini } BlockCopyMethod;
4605d5e12bSPaolo Bonzini
474ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task);
484ce5dd3eSVladimir Sementsov-Ogievskiy
494ce5dd3eSVladimir Sementsov-Ogievskiy typedef struct BlockCopyCallState {
50d0c389d2SEmanuele Giuseppe Esposito /* Fields initialized in block_copy_async() and never changed. */
513b8c2329SVladimir Sementsov-Ogievskiy BlockCopyState *s;
523b8c2329SVladimir Sementsov-Ogievskiy int64_t offset;
533b8c2329SVladimir Sementsov-Ogievskiy int64_t bytes;
5426be9d62SVladimir Sementsov-Ogievskiy int max_workers;
5526be9d62SVladimir Sementsov-Ogievskiy int64_t max_chunk;
567e032df0SVladimir Sementsov-Ogievskiy bool ignore_ratelimit;
57de4641b4SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb;
58de4641b4SVladimir Sementsov-Ogievskiy void *cb_opaque;
59de4641b4SVladimir Sementsov-Ogievskiy /* Coroutine where async block-copy is running */
60de4641b4SVladimir Sementsov-Ogievskiy Coroutine *co;
613b8c2329SVladimir Sementsov-Ogievskiy
62d0c389d2SEmanuele Giuseppe Esposito /* Fields whose state changes throughout the execution */
63149009beSEmanuele Giuseppe Esposito bool finished; /* atomic */
64d0c389d2SEmanuele Giuseppe Esposito QemuCoSleep sleep; /* TODO: protect API with a lock */
65149009beSEmanuele Giuseppe Esposito bool cancelled; /* atomic */
662e099a9dSVladimir Sementsov-Ogievskiy /* To reference all call states from BlockCopyState */
672e099a9dSVladimir Sementsov-Ogievskiy QLIST_ENTRY(BlockCopyCallState) list;
682e099a9dSVladimir Sementsov-Ogievskiy
69d0c389d2SEmanuele Giuseppe Esposito /*
703202d8e4SMichael Tokarev * Fields that report information about return values and errors.
71d0c389d2SEmanuele Giuseppe Esposito * Protected by lock in BlockCopyState.
72d0c389d2SEmanuele Giuseppe Esposito */
734ce5dd3eSVladimir Sementsov-Ogievskiy bool error_is_read;
74d0c389d2SEmanuele Giuseppe Esposito /*
75d0c389d2SEmanuele Giuseppe Esposito * @ret is set concurrently by tasks under mutex. Only set once by first
76d0c389d2SEmanuele Giuseppe Esposito * failed task (and untouched if no task failed).
77d0c389d2SEmanuele Giuseppe Esposito * After finishing (call_state->finished is true), it is not modified
78d0c389d2SEmanuele Giuseppe Esposito * anymore and may be safely read without mutex.
79d0c389d2SEmanuele Giuseppe Esposito */
80d0c389d2SEmanuele Giuseppe Esposito int ret;
814ce5dd3eSVladimir Sementsov-Ogievskiy } BlockCopyCallState;
82beb5f545SVladimir Sementsov-Ogievskiy
83e9407785SVladimir Sementsov-Ogievskiy typedef struct BlockCopyTask {
844ce5dd3eSVladimir Sementsov-Ogievskiy AioTask task;
854ce5dd3eSVladimir Sementsov-Ogievskiy
86d0c389d2SEmanuele Giuseppe Esposito /*
87d0c389d2SEmanuele Giuseppe Esposito * Fields initialized in block_copy_task_create()
88d0c389d2SEmanuele Giuseppe Esposito * and never changed.
89d0c389d2SEmanuele Giuseppe Esposito */
901348a657SVladimir Sementsov-Ogievskiy BlockCopyState *s;
914ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state;
92d0c389d2SEmanuele Giuseppe Esposito /*
93d0c389d2SEmanuele Giuseppe Esposito * @method can also be set again in the while loop of
94d0c389d2SEmanuele Giuseppe Esposito * block_copy_dirty_clusters(), but it is never accessed concurrently
95d0c389d2SEmanuele Giuseppe Esposito * because the only other function that reads it is
96d0c389d2SEmanuele Giuseppe Esposito * block_copy_task_entry() and it is invoked afterwards in the same
97d0c389d2SEmanuele Giuseppe Esposito * iteration.
98d0c389d2SEmanuele Giuseppe Esposito */
9905d5e12bSPaolo Bonzini BlockCopyMethod method;
100d0c389d2SEmanuele Giuseppe Esposito
101d0c389d2SEmanuele Giuseppe Esposito /*
102d088e6a4SVladimir Sementsov-Ogievskiy * Generally, req is protected by lock in BlockCopyState, Still req.offset
103d088e6a4SVladimir Sementsov-Ogievskiy * is only set on task creation, so may be read concurrently after creation.
104d088e6a4SVladimir Sementsov-Ogievskiy * req.bytes is changed at most once, and need only protecting the case of
105d088e6a4SVladimir Sementsov-Ogievskiy * parallel read while updating @bytes value in block_copy_task_shrink().
106d0c389d2SEmanuele Giuseppe Esposito */
107d088e6a4SVladimir Sementsov-Ogievskiy BlockReq req;
108e9407785SVladimir Sementsov-Ogievskiy } BlockCopyTask;
109397f4e9dSVladimir Sementsov-Ogievskiy
task_end(BlockCopyTask * task)11042ac2144SVladimir Sementsov-Ogievskiy static int64_t task_end(BlockCopyTask *task)
11142ac2144SVladimir Sementsov-Ogievskiy {
112d088e6a4SVladimir Sementsov-Ogievskiy return task->req.offset + task->req.bytes;
11342ac2144SVladimir Sementsov-Ogievskiy }
11442ac2144SVladimir Sementsov-Ogievskiy
115397f4e9dSVladimir Sementsov-Ogievskiy typedef struct BlockCopyState {
116397f4e9dSVladimir Sementsov-Ogievskiy /*
117397f4e9dSVladimir Sementsov-Ogievskiy * BdrvChild objects are not owned or managed by block-copy. They are
118397f4e9dSVladimir Sementsov-Ogievskiy * provided by block-copy user and user is responsible for appropriate
119397f4e9dSVladimir Sementsov-Ogievskiy * permissions on these children.
120397f4e9dSVladimir Sementsov-Ogievskiy */
121397f4e9dSVladimir Sementsov-Ogievskiy BdrvChild *source;
122397f4e9dSVladimir Sementsov-Ogievskiy BdrvChild *target;
123d0c389d2SEmanuele Giuseppe Esposito
124d0c389d2SEmanuele Giuseppe Esposito /*
125d0c389d2SEmanuele Giuseppe Esposito * Fields initialized in block_copy_state_new()
126d0c389d2SEmanuele Giuseppe Esposito * and never changed.
127d0c389d2SEmanuele Giuseppe Esposito */
128397f4e9dSVladimir Sementsov-Ogievskiy int64_t cluster_size;
12905d5e12bSPaolo Bonzini int64_t max_transfer;
130397f4e9dSVladimir Sementsov-Ogievskiy uint64_t len;
131397f4e9dSVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags;
132397f4e9dSVladimir Sementsov-Ogievskiy
133397f4e9dSVladimir Sementsov-Ogievskiy /*
134d0c389d2SEmanuele Giuseppe Esposito * Fields whose state changes throughout the execution
135d0c389d2SEmanuele Giuseppe Esposito * Protected by lock.
136d0c389d2SEmanuele Giuseppe Esposito */
137d0c389d2SEmanuele Giuseppe Esposito CoMutex lock;
138d0c389d2SEmanuele Giuseppe Esposito int64_t in_flight_bytes;
139d0c389d2SEmanuele Giuseppe Esposito BlockCopyMethod method;
140d088e6a4SVladimir Sementsov-Ogievskiy BlockReqList reqs;
141d0c389d2SEmanuele Giuseppe Esposito QLIST_HEAD(, BlockCopyCallState) calls;
142d0c389d2SEmanuele Giuseppe Esposito /*
143397f4e9dSVladimir Sementsov-Ogievskiy * skip_unallocated:
144397f4e9dSVladimir Sementsov-Ogievskiy *
145397f4e9dSVladimir Sementsov-Ogievskiy * Used by sync=top jobs, which first scan the source node for unallocated
146397f4e9dSVladimir Sementsov-Ogievskiy * areas and clear them in the copy_bitmap. During this process, the bitmap
147397f4e9dSVladimir Sementsov-Ogievskiy * is thus not fully initialized: It may still have bits set for areas that
148397f4e9dSVladimir Sementsov-Ogievskiy * are unallocated and should actually not be copied.
149397f4e9dSVladimir Sementsov-Ogievskiy *
150397f4e9dSVladimir Sementsov-Ogievskiy * This is indicated by skip_unallocated.
151397f4e9dSVladimir Sementsov-Ogievskiy *
152397f4e9dSVladimir Sementsov-Ogievskiy * In this case, block_copy() will query the source’s allocation status,
153397f4e9dSVladimir Sementsov-Ogievskiy * skip unallocated regions, clear them in the copy_bitmap, and invoke
154397f4e9dSVladimir Sementsov-Ogievskiy * block_copy_reset_unallocated() every time it does.
155397f4e9dSVladimir Sementsov-Ogievskiy */
156d0c389d2SEmanuele Giuseppe Esposito bool skip_unallocated; /* atomic */
157d0c389d2SEmanuele Giuseppe Esposito /* State fields that use a thread-safe API */
158d0c389d2SEmanuele Giuseppe Esposito BdrvDirtyBitmap *copy_bitmap;
159397f4e9dSVladimir Sementsov-Ogievskiy ProgressMeter *progress;
160397f4e9dSVladimir Sementsov-Ogievskiy SharedResource *mem;
1617e032df0SVladimir Sementsov-Ogievskiy RateLimit rate_limit;
162397f4e9dSVladimir Sementsov-Ogievskiy } BlockCopyState;
163397f4e9dSVladimir Sementsov-Ogievskiy
164d0c389d2SEmanuele Giuseppe Esposito /* Called with lock held */
block_copy_chunk_size(BlockCopyState * s)16505d5e12bSPaolo Bonzini static int64_t block_copy_chunk_size(BlockCopyState *s)
16605d5e12bSPaolo Bonzini {
16705d5e12bSPaolo Bonzini switch (s->method) {
16805d5e12bSPaolo Bonzini case COPY_READ_WRITE_CLUSTER:
16905d5e12bSPaolo Bonzini return s->cluster_size;
17005d5e12bSPaolo Bonzini case COPY_READ_WRITE:
17105d5e12bSPaolo Bonzini case COPY_RANGE_SMALL:
17205d5e12bSPaolo Bonzini return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
17305d5e12bSPaolo Bonzini s->max_transfer);
17405d5e12bSPaolo Bonzini case COPY_RANGE_FULL:
17505d5e12bSPaolo Bonzini return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
17605d5e12bSPaolo Bonzini s->max_transfer);
17705d5e12bSPaolo Bonzini default:
17805d5e12bSPaolo Bonzini /* Cannot have COPY_WRITE_ZEROES here. */
17905d5e12bSPaolo Bonzini abort();
18005d5e12bSPaolo Bonzini }
18105d5e12bSPaolo Bonzini }
18205d5e12bSPaolo Bonzini
18342ac2144SVladimir Sementsov-Ogievskiy /*
18442ac2144SVladimir Sementsov-Ogievskiy * Search for the first dirty area in offset/bytes range and create task at
18542ac2144SVladimir Sementsov-Ogievskiy * the beginning of it.
18642ac2144SVladimir Sementsov-Ogievskiy */
187d0c389d2SEmanuele Giuseppe Esposito static coroutine_fn BlockCopyTask *
block_copy_task_create(BlockCopyState * s,BlockCopyCallState * call_state,int64_t offset,int64_t bytes)188d0c389d2SEmanuele Giuseppe Esposito block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
1898719091fSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes)
190a6ffe199SVladimir Sementsov-Ogievskiy {
19142ac2144SVladimir Sementsov-Ogievskiy BlockCopyTask *task;
19205d5e12bSPaolo Bonzini int64_t max_chunk;
193f13e60a9SVladimir Sementsov-Ogievskiy
194d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&s->lock);
19505d5e12bSPaolo Bonzini max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
19642ac2144SVladimir Sementsov-Ogievskiy if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
19742ac2144SVladimir Sementsov-Ogievskiy offset, offset + bytes,
19826be9d62SVladimir Sementsov-Ogievskiy max_chunk, &offset, &bytes))
19942ac2144SVladimir Sementsov-Ogievskiy {
20042ac2144SVladimir Sementsov-Ogievskiy return NULL;
20142ac2144SVladimir Sementsov-Ogievskiy }
20242ac2144SVladimir Sementsov-Ogievskiy
2037661a886SStefan Reiter assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
2047661a886SStefan Reiter bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
2057661a886SStefan Reiter
20642ac2144SVladimir Sementsov-Ogievskiy /* region is dirty, so no existent tasks possible in it */
207d088e6a4SVladimir Sementsov-Ogievskiy assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
2085332e5d2SVladimir Sementsov-Ogievskiy
2095332e5d2SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
2105332e5d2SVladimir Sementsov-Ogievskiy s->in_flight_bytes += bytes;
2115332e5d2SVladimir Sementsov-Ogievskiy
21242ac2144SVladimir Sementsov-Ogievskiy task = g_new(BlockCopyTask, 1);
2131348a657SVladimir Sementsov-Ogievskiy *task = (BlockCopyTask) {
2144ce5dd3eSVladimir Sementsov-Ogievskiy .task.func = block_copy_task_entry,
2151348a657SVladimir Sementsov-Ogievskiy .s = s,
2164ce5dd3eSVladimir Sementsov-Ogievskiy .call_state = call_state,
21705d5e12bSPaolo Bonzini .method = s->method,
2181348a657SVladimir Sementsov-Ogievskiy };
219d088e6a4SVladimir Sementsov-Ogievskiy reqlist_init_req(&s->reqs, &task->req, offset, bytes);
220f13e60a9SVladimir Sementsov-Ogievskiy
221f13e60a9SVladimir Sementsov-Ogievskiy return task;
222a6ffe199SVladimir Sementsov-Ogievskiy }
223a6ffe199SVladimir Sementsov-Ogievskiy
2245332e5d2SVladimir Sementsov-Ogievskiy /*
225e9407785SVladimir Sementsov-Ogievskiy * block_copy_task_shrink
2265332e5d2SVladimir Sementsov-Ogievskiy *
227e9407785SVladimir Sementsov-Ogievskiy * Drop the tail of the task to be handled later. Set dirty bits back and
228e9407785SVladimir Sementsov-Ogievskiy * wake up all tasks waiting for us (may be some of them are not intersecting
229e9407785SVladimir Sementsov-Ogievskiy * with shrunk task)
2305332e5d2SVladimir Sementsov-Ogievskiy */
block_copy_task_shrink(BlockCopyTask * task,int64_t new_bytes)2311348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
232e9407785SVladimir Sementsov-Ogievskiy int64_t new_bytes)
233a6ffe199SVladimir Sementsov-Ogievskiy {
234d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&task->s->lock);
235d088e6a4SVladimir Sementsov-Ogievskiy if (new_bytes == task->req.bytes) {
2365332e5d2SVladimir Sementsov-Ogievskiy return;
2375332e5d2SVladimir Sementsov-Ogievskiy }
2385332e5d2SVladimir Sementsov-Ogievskiy
239d088e6a4SVladimir Sementsov-Ogievskiy assert(new_bytes > 0 && new_bytes < task->req.bytes);
2405332e5d2SVladimir Sementsov-Ogievskiy
241d088e6a4SVladimir Sementsov-Ogievskiy task->s->in_flight_bytes -= task->req.bytes - new_bytes;
2421348a657SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(task->s->copy_bitmap,
243d088e6a4SVladimir Sementsov-Ogievskiy task->req.offset + new_bytes,
244d088e6a4SVladimir Sementsov-Ogievskiy task->req.bytes - new_bytes);
2455332e5d2SVladimir Sementsov-Ogievskiy
246d088e6a4SVladimir Sementsov-Ogievskiy reqlist_shrink_req(&task->req, new_bytes);
2475332e5d2SVladimir Sementsov-Ogievskiy }
2485332e5d2SVladimir Sementsov-Ogievskiy
block_copy_task_end(BlockCopyTask * task,int ret)2491348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
2505332e5d2SVladimir Sementsov-Ogievskiy {
251d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&task->s->lock);
252d088e6a4SVladimir Sementsov-Ogievskiy task->s->in_flight_bytes -= task->req.bytes;
2535332e5d2SVladimir Sementsov-Ogievskiy if (ret < 0) {
254d088e6a4SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
255d088e6a4SVladimir Sementsov-Ogievskiy task->req.bytes);
2565332e5d2SVladimir Sementsov-Ogievskiy }
257201b4bb6SVladimir Sementsov-Ogievskiy if (task->s->progress) {
258e3dd339fSEmanuele Giuseppe Esposito progress_set_remaining(task->s->progress,
259e3dd339fSEmanuele Giuseppe Esposito bdrv_get_dirty_count(task->s->copy_bitmap) +
260e3dd339fSEmanuele Giuseppe Esposito task->s->in_flight_bytes);
261201b4bb6SVladimir Sementsov-Ogievskiy }
262d088e6a4SVladimir Sementsov-Ogievskiy reqlist_remove_req(&task->req);
263a6ffe199SVladimir Sementsov-Ogievskiy }
264a6ffe199SVladimir Sementsov-Ogievskiy
block_copy_state_free(BlockCopyState * s)265beb5f545SVladimir Sementsov-Ogievskiy void block_copy_state_free(BlockCopyState *s)
266beb5f545SVladimir Sementsov-Ogievskiy {
267beb5f545SVladimir Sementsov-Ogievskiy if (!s) {
268beb5f545SVladimir Sementsov-Ogievskiy return;
269beb5f545SVladimir Sementsov-Ogievskiy }
270beb5f545SVladimir Sementsov-Ogievskiy
2714951967dSPaolo Bonzini ratelimit_destroy(&s->rate_limit);
2725deb6cbdSVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->copy_bitmap);
2737f739d0eSVladimir Sementsov-Ogievskiy shres_destroy(s->mem);
274beb5f545SVladimir Sementsov-Ogievskiy g_free(s);
275beb5f545SVladimir Sementsov-Ogievskiy }
276beb5f545SVladimir Sementsov-Ogievskiy
block_copy_max_transfer(BdrvChild * source,BdrvChild * target)2779d31bc53SVladimir Sementsov-Ogievskiy static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
2789d31bc53SVladimir Sementsov-Ogievskiy {
2799d31bc53SVladimir Sementsov-Ogievskiy return MIN_NON_ZERO(INT_MAX,
2809d31bc53SVladimir Sementsov-Ogievskiy MIN_NON_ZERO(source->bs->bl.max_transfer,
2819d31bc53SVladimir Sementsov-Ogievskiy target->bs->bl.max_transfer));
2829d31bc53SVladimir Sementsov-Ogievskiy }
2839d31bc53SVladimir Sementsov-Ogievskiy
block_copy_set_copy_opts(BlockCopyState * s,bool use_copy_range,bool compress)284f8b9504bSVladimir Sementsov-Ogievskiy void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
285f8b9504bSVladimir Sementsov-Ogievskiy bool compress)
286f8b9504bSVladimir Sementsov-Ogievskiy {
287f8b9504bSVladimir Sementsov-Ogievskiy /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
288f8b9504bSVladimir Sementsov-Ogievskiy s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
289f8b9504bSVladimir Sementsov-Ogievskiy (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
290f8b9504bSVladimir Sementsov-Ogievskiy
291f8b9504bSVladimir Sementsov-Ogievskiy if (s->max_transfer < s->cluster_size) {
292f8b9504bSVladimir Sementsov-Ogievskiy /*
293f8b9504bSVladimir Sementsov-Ogievskiy * copy_range does not respect max_transfer. We don't want to bother
294f8b9504bSVladimir Sementsov-Ogievskiy * with requests smaller than block-copy cluster size, so fallback to
295f8b9504bSVladimir Sementsov-Ogievskiy * buffered copying (read and write respect max_transfer on their
296f8b9504bSVladimir Sementsov-Ogievskiy * behalf).
297f8b9504bSVladimir Sementsov-Ogievskiy */
298f8b9504bSVladimir Sementsov-Ogievskiy s->method = COPY_READ_WRITE_CLUSTER;
299f8b9504bSVladimir Sementsov-Ogievskiy } else if (compress) {
300f8b9504bSVladimir Sementsov-Ogievskiy /* Compression supports only cluster-size writes and no copy-range. */
301f8b9504bSVladimir Sementsov-Ogievskiy s->method = COPY_READ_WRITE_CLUSTER;
302f8b9504bSVladimir Sementsov-Ogievskiy } else {
303f8b9504bSVladimir Sementsov-Ogievskiy /*
304f8b9504bSVladimir Sementsov-Ogievskiy * If copy range enabled, start with COPY_RANGE_SMALL, until first
305f8b9504bSVladimir Sementsov-Ogievskiy * successful copy_range (look at block_copy_do_copy).
306f8b9504bSVladimir Sementsov-Ogievskiy */
307f8b9504bSVladimir Sementsov-Ogievskiy s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
308f8b9504bSVladimir Sementsov-Ogievskiy }
309f8b9504bSVladimir Sementsov-Ogievskiy }
310f8b9504bSVladimir Sementsov-Ogievskiy
block_copy_calculate_cluster_size(BlockDriverState * target,Error ** errp)311b518e9e9SVladimir Sementsov-Ogievskiy static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
312b518e9e9SVladimir Sementsov-Ogievskiy Error **errp)
313b518e9e9SVladimir Sementsov-Ogievskiy {
314b518e9e9SVladimir Sementsov-Ogievskiy int ret;
315b518e9e9SVladimir Sementsov-Ogievskiy BlockDriverInfo bdi;
316ad74751fSKevin Wolf bool target_does_cow;
317ad74751fSKevin Wolf
318ad74751fSKevin Wolf GLOBAL_STATE_CODE();
319ad74751fSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
320ad74751fSKevin Wolf
321ad74751fSKevin Wolf target_does_cow = bdrv_backing_chain_next(target);
322b518e9e9SVladimir Sementsov-Ogievskiy
323b518e9e9SVladimir Sementsov-Ogievskiy /*
324b518e9e9SVladimir Sementsov-Ogievskiy * If there is no backing file on the target, we cannot rely on COW if our
325b518e9e9SVladimir Sementsov-Ogievskiy * backup cluster size is smaller than the target cluster size. Even for
326b518e9e9SVladimir Sementsov-Ogievskiy * targets with a backing file, try to avoid COW if possible.
327b518e9e9SVladimir Sementsov-Ogievskiy */
328b518e9e9SVladimir Sementsov-Ogievskiy ret = bdrv_get_info(target, &bdi);
329b518e9e9SVladimir Sementsov-Ogievskiy if (ret == -ENOTSUP && !target_does_cow) {
330b518e9e9SVladimir Sementsov-Ogievskiy /* Cluster size is not defined */
331b518e9e9SVladimir Sementsov-Ogievskiy warn_report("The target block device doesn't provide "
332b518e9e9SVladimir Sementsov-Ogievskiy "information about the block size and it doesn't have a "
333b518e9e9SVladimir Sementsov-Ogievskiy "backing file. The default block size of %u bytes is "
334b518e9e9SVladimir Sementsov-Ogievskiy "used. If the actual block size of the target exceeds "
335b518e9e9SVladimir Sementsov-Ogievskiy "this default, the backup may be unusable",
336b518e9e9SVladimir Sementsov-Ogievskiy BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
337b518e9e9SVladimir Sementsov-Ogievskiy return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
338b518e9e9SVladimir Sementsov-Ogievskiy } else if (ret < 0 && !target_does_cow) {
339b518e9e9SVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret,
340b518e9e9SVladimir Sementsov-Ogievskiy "Couldn't determine the cluster size of the target image, "
341b518e9e9SVladimir Sementsov-Ogievskiy "which has no backing file");
342b518e9e9SVladimir Sementsov-Ogievskiy error_append_hint(errp,
343b518e9e9SVladimir Sementsov-Ogievskiy "Aborting, since this may create an unusable destination image\n");
344b518e9e9SVladimir Sementsov-Ogievskiy return ret;
345b518e9e9SVladimir Sementsov-Ogievskiy } else if (ret < 0 && target_does_cow) {
346b518e9e9SVladimir Sementsov-Ogievskiy /* Not fatal; just trudge on ahead. */
347b518e9e9SVladimir Sementsov-Ogievskiy return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
348b518e9e9SVladimir Sementsov-Ogievskiy }
349b518e9e9SVladimir Sementsov-Ogievskiy
350b518e9e9SVladimir Sementsov-Ogievskiy return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
351b518e9e9SVladimir Sementsov-Ogievskiy }
352b518e9e9SVladimir Sementsov-Ogievskiy
block_copy_state_new(BdrvChild * source,BdrvChild * target,const BdrvDirtyBitmap * bitmap,Error ** errp)35300e30f05SVladimir Sementsov-Ogievskiy BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
3541f7252e8SVladimir Sementsov-Ogievskiy const BdrvDirtyBitmap *bitmap,
355abde8ac2SVladimir Sementsov-Ogievskiy Error **errp)
356beb5f545SVladimir Sementsov-Ogievskiy {
3571f7252e8SVladimir Sementsov-Ogievskiy ERRP_GUARD();
358beb5f545SVladimir Sementsov-Ogievskiy BlockCopyState *s;
359b518e9e9SVladimir Sementsov-Ogievskiy int64_t cluster_size;
360beb5f545SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *copy_bitmap;
36149577723SVladimir Sementsov-Ogievskiy bool is_fleecing;
362beb5f545SVladimir Sementsov-Ogievskiy
363ad74751fSKevin Wolf GLOBAL_STATE_CODE();
364ad74751fSKevin Wolf
365b518e9e9SVladimir Sementsov-Ogievskiy cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
366b518e9e9SVladimir Sementsov-Ogievskiy if (cluster_size < 0) {
367b518e9e9SVladimir Sementsov-Ogievskiy return NULL;
368b518e9e9SVladimir Sementsov-Ogievskiy }
369b518e9e9SVladimir Sementsov-Ogievskiy
37000e30f05SVladimir Sementsov-Ogievskiy copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
37100e30f05SVladimir Sementsov-Ogievskiy errp);
372beb5f545SVladimir Sementsov-Ogievskiy if (!copy_bitmap) {
373beb5f545SVladimir Sementsov-Ogievskiy return NULL;
374beb5f545SVladimir Sementsov-Ogievskiy }
375beb5f545SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(copy_bitmap);
3761f7252e8SVladimir Sementsov-Ogievskiy if (bitmap) {
3771f7252e8SVladimir Sementsov-Ogievskiy if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
3781f7252e8SVladimir Sementsov-Ogievskiy error_prepend(errp, "Failed to merge bitmap '%s' to internal "
3791f7252e8SVladimir Sementsov-Ogievskiy "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
3801f7252e8SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(copy_bitmap);
3811f7252e8SVladimir Sementsov-Ogievskiy return NULL;
3821f7252e8SVladimir Sementsov-Ogievskiy }
3831f7252e8SVladimir Sementsov-Ogievskiy } else {
3841f7252e8SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(copy_bitmap, 0,
3851f7252e8SVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_size(copy_bitmap));
3861f7252e8SVladimir Sementsov-Ogievskiy }
387beb5f545SVladimir Sementsov-Ogievskiy
38849577723SVladimir Sementsov-Ogievskiy /*
38949577723SVladimir Sementsov-Ogievskiy * If source is in backing chain of target assume that target is going to be
39049577723SVladimir Sementsov-Ogievskiy * used for "image fleecing", i.e. it should represent a kind of snapshot of
39149577723SVladimir Sementsov-Ogievskiy * source at backup-start point in time. And target is going to be read by
39249577723SVladimir Sementsov-Ogievskiy * somebody (for example, used as NBD export) during backup job.
39349577723SVladimir Sementsov-Ogievskiy *
39449577723SVladimir Sementsov-Ogievskiy * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
39549577723SVladimir Sementsov-Ogievskiy * intersection of backup writes and third party reads from target,
39649577723SVladimir Sementsov-Ogievskiy * otherwise reading from target we may occasionally read already updated by
39749577723SVladimir Sementsov-Ogievskiy * guest data.
39849577723SVladimir Sementsov-Ogievskiy *
39949577723SVladimir Sementsov-Ogievskiy * For more information see commit f8d59dfb40bb and test
40049577723SVladimir Sementsov-Ogievskiy * tests/qemu-iotests/222
40149577723SVladimir Sementsov-Ogievskiy */
40279bb7627SKevin Wolf bdrv_graph_rdlock_main_loop();
40349577723SVladimir Sementsov-Ogievskiy is_fleecing = bdrv_chain_contains(target->bs, source->bs);
40479bb7627SKevin Wolf bdrv_graph_rdunlock_main_loop();
40549577723SVladimir Sementsov-Ogievskiy
406beb5f545SVladimir Sementsov-Ogievskiy s = g_new(BlockCopyState, 1);
407beb5f545SVladimir Sementsov-Ogievskiy *s = (BlockCopyState) {
40800e30f05SVladimir Sementsov-Ogievskiy .source = source,
40900e30f05SVladimir Sementsov-Ogievskiy .target = target,
410beb5f545SVladimir Sementsov-Ogievskiy .copy_bitmap = copy_bitmap,
411beb5f545SVladimir Sementsov-Ogievskiy .cluster_size = cluster_size,
412beb5f545SVladimir Sementsov-Ogievskiy .len = bdrv_dirty_bitmap_size(copy_bitmap),
413f8b9504bSVladimir Sementsov-Ogievskiy .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
4147f739d0eSVladimir Sementsov-Ogievskiy .mem = shres_create(BLOCK_COPY_MAX_MEM),
41505d5e12bSPaolo Bonzini .max_transfer = QEMU_ALIGN_DOWN(
41605d5e12bSPaolo Bonzini block_copy_max_transfer(source, target),
41705d5e12bSPaolo Bonzini cluster_size),
418beb5f545SVladimir Sementsov-Ogievskiy };
419beb5f545SVladimir Sementsov-Ogievskiy
420abde8ac2SVladimir Sementsov-Ogievskiy block_copy_set_copy_opts(s, false, false);
421beb5f545SVladimir Sementsov-Ogievskiy
4224951967dSPaolo Bonzini ratelimit_init(&s->rate_limit);
423d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_init(&s->lock);
424d088e6a4SVladimir Sementsov-Ogievskiy QLIST_INIT(&s->reqs);
4252e099a9dSVladimir Sementsov-Ogievskiy QLIST_INIT(&s->calls);
426a6ffe199SVladimir Sementsov-Ogievskiy
427beb5f545SVladimir Sementsov-Ogievskiy return s;
428beb5f545SVladimir Sementsov-Ogievskiy }
429beb5f545SVladimir Sementsov-Ogievskiy
430d0c389d2SEmanuele Giuseppe Esposito /* Only set before running the job, no need for locking. */
block_copy_set_progress_meter(BlockCopyState * s,ProgressMeter * pm)431d0ebeca1SVladimir Sementsov-Ogievskiy void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
432d0ebeca1SVladimir Sementsov-Ogievskiy {
433d0ebeca1SVladimir Sementsov-Ogievskiy s->progress = pm;
434d0ebeca1SVladimir Sementsov-Ogievskiy }
435d0ebeca1SVladimir Sementsov-Ogievskiy
436beb5f545SVladimir Sementsov-Ogievskiy /*
4374ce5dd3eSVladimir Sementsov-Ogievskiy * Takes ownership of @task
4384ce5dd3eSVladimir Sementsov-Ogievskiy *
4394ce5dd3eSVladimir Sementsov-Ogievskiy * If pool is NULL directly run the task, otherwise schedule it into the pool.
4404ce5dd3eSVladimir Sementsov-Ogievskiy *
4414ce5dd3eSVladimir Sementsov-Ogievskiy * Returns: task.func return code if pool is NULL
4424ce5dd3eSVladimir Sementsov-Ogievskiy * otherwise -ECANCELED if pool status is bad
4434ce5dd3eSVladimir Sementsov-Ogievskiy * otherwise 0 (successfully scheduled)
4444ce5dd3eSVladimir Sementsov-Ogievskiy */
block_copy_task_run(AioTaskPool * pool,BlockCopyTask * task)4454ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
4464ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *task)
4474ce5dd3eSVladimir Sementsov-Ogievskiy {
4484ce5dd3eSVladimir Sementsov-Ogievskiy if (!pool) {
4494ce5dd3eSVladimir Sementsov-Ogievskiy int ret = task->task.func(&task->task);
4504ce5dd3eSVladimir Sementsov-Ogievskiy
4514ce5dd3eSVladimir Sementsov-Ogievskiy g_free(task);
4524ce5dd3eSVladimir Sementsov-Ogievskiy return ret;
4534ce5dd3eSVladimir Sementsov-Ogievskiy }
4544ce5dd3eSVladimir Sementsov-Ogievskiy
4554ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_wait_slot(pool);
4564ce5dd3eSVladimir Sementsov-Ogievskiy if (aio_task_pool_status(pool) < 0) {
457d088e6a4SVladimir Sementsov-Ogievskiy co_put_to_shres(task->s->mem, task->req.bytes);
4584ce5dd3eSVladimir Sementsov-Ogievskiy block_copy_task_end(task, -ECANCELED);
4594ce5dd3eSVladimir Sementsov-Ogievskiy g_free(task);
4604ce5dd3eSVladimir Sementsov-Ogievskiy return -ECANCELED;
4614ce5dd3eSVladimir Sementsov-Ogievskiy }
4624ce5dd3eSVladimir Sementsov-Ogievskiy
4634ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_start_task(pool, &task->task);
4644ce5dd3eSVladimir Sementsov-Ogievskiy
4654ce5dd3eSVladimir Sementsov-Ogievskiy return 0;
4664ce5dd3eSVladimir Sementsov-Ogievskiy }
4674ce5dd3eSVladimir Sementsov-Ogievskiy
4684ce5dd3eSVladimir Sementsov-Ogievskiy /*
469e332a726SVladimir Sementsov-Ogievskiy * block_copy_do_copy
470e332a726SVladimir Sementsov-Ogievskiy *
471dafaf135SVladimir Sementsov-Ogievskiy * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
472dafaf135SVladimir Sementsov-Ogievskiy * s->len only to cover last cluster when s->len is not aligned to clusters.
473e332a726SVladimir Sementsov-Ogievskiy *
4743202d8e4SMichael Tokarev * No sync here: neither bitmap nor intersecting requests handling, only copy.
475e332a726SVladimir Sementsov-Ogievskiy *
47605d5e12bSPaolo Bonzini * @method is an in-out argument, so that copy_range can be either extended to
47705d5e12bSPaolo Bonzini * a full-size buffer or disabled if the copy_range attempt fails. The output
47805d5e12bSPaolo Bonzini * value of @method should be used for subsequent tasks.
479e332a726SVladimir Sementsov-Ogievskiy * Returns 0 on success.
480beb5f545SVladimir Sementsov-Ogievskiy */
481abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_do_copy(BlockCopyState * s,int64_t offset,int64_t bytes,BlockCopyMethod * method,bool * error_is_read)482abaf8b75SKevin Wolf block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
483abaf8b75SKevin Wolf BlockCopyMethod *method, bool *error_is_read)
484beb5f545SVladimir Sementsov-Ogievskiy {
485beb5f545SVladimir Sementsov-Ogievskiy int ret;
4868719091fSVladimir Sementsov-Ogievskiy int64_t nbytes = MIN(offset + bytes, s->len) - offset;
487e332a726SVladimir Sementsov-Ogievskiy void *bounce_buffer = NULL;
488beb5f545SVladimir Sementsov-Ogievskiy
4898719091fSVladimir Sementsov-Ogievskiy assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
4908719091fSVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
491dafaf135SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
4928719091fSVladimir Sementsov-Ogievskiy assert(offset < s->len);
4938719091fSVladimir Sementsov-Ogievskiy assert(offset + bytes <= s->len ||
4948719091fSVladimir Sementsov-Ogievskiy offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
495dafaf135SVladimir Sementsov-Ogievskiy assert(nbytes < INT_MAX);
496e332a726SVladimir Sementsov-Ogievskiy
49705d5e12bSPaolo Bonzini switch (*method) {
49805d5e12bSPaolo Bonzini case COPY_WRITE_ZEROES:
4998719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
5002d57511aSVladimir Sementsov-Ogievskiy ~BDRV_REQ_WRITE_COMPRESSED);
5012d57511aSVladimir Sementsov-Ogievskiy if (ret < 0) {
5028719091fSVladimir Sementsov-Ogievskiy trace_block_copy_write_zeroes_fail(s, offset, ret);
5032d57511aSVladimir Sementsov-Ogievskiy *error_is_read = false;
5042d57511aSVladimir Sementsov-Ogievskiy }
5052d57511aSVladimir Sementsov-Ogievskiy return ret;
5062d57511aSVladimir Sementsov-Ogievskiy
50705d5e12bSPaolo Bonzini case COPY_RANGE_SMALL:
50805d5e12bSPaolo Bonzini case COPY_RANGE_FULL:
5098719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
510e332a726SVladimir Sementsov-Ogievskiy 0, s->write_flags);
51105d5e12bSPaolo Bonzini if (ret >= 0) {
51205d5e12bSPaolo Bonzini /* Successful copy-range, increase chunk size. */
51305d5e12bSPaolo Bonzini *method = COPY_RANGE_FULL;
514bed95234SVladimir Sementsov-Ogievskiy return 0;
515e332a726SVladimir Sementsov-Ogievskiy }
516e332a726SVladimir Sementsov-Ogievskiy
51705d5e12bSPaolo Bonzini trace_block_copy_copy_range_fail(s, offset, ret);
51805d5e12bSPaolo Bonzini *method = COPY_READ_WRITE;
51905d5e12bSPaolo Bonzini /* Fall through to read+write with allocated buffer */
52005d5e12bSPaolo Bonzini
52105d5e12bSPaolo Bonzini case COPY_READ_WRITE_CLUSTER:
52205d5e12bSPaolo Bonzini case COPY_READ_WRITE:
5230e240245SVladimir Sementsov-Ogievskiy /*
52405d5e12bSPaolo Bonzini * In case of failed copy_range request above, we may proceed with
52505d5e12bSPaolo Bonzini * buffered request larger than BLOCK_COPY_MAX_BUFFER.
52605d5e12bSPaolo Bonzini * Still, further requests will be properly limited, so don't care too
52705d5e12bSPaolo Bonzini * much. Moreover the most likely case (copy_range is unsupported for
52805d5e12bSPaolo Bonzini * the configuration, so the very first copy_range request fails)
52905d5e12bSPaolo Bonzini * is handled by setting large copy_size only after first successful
53005d5e12bSPaolo Bonzini * copy_range.
5310e240245SVladimir Sementsov-Ogievskiy */
5320e240245SVladimir Sementsov-Ogievskiy
533e332a726SVladimir Sementsov-Ogievskiy bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
534beb5f545SVladimir Sementsov-Ogievskiy
5358719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
536beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
5378719091fSVladimir Sementsov-Ogievskiy trace_block_copy_read_fail(s, offset, ret);
538beb5f545SVladimir Sementsov-Ogievskiy *error_is_read = true;
539e332a726SVladimir Sementsov-Ogievskiy goto out;
540beb5f545SVladimir Sementsov-Ogievskiy }
541beb5f545SVladimir Sementsov-Ogievskiy
5428719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
543beb5f545SVladimir Sementsov-Ogievskiy s->write_flags);
544beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
5458719091fSVladimir Sementsov-Ogievskiy trace_block_copy_write_fail(s, offset, ret);
546beb5f545SVladimir Sementsov-Ogievskiy *error_is_read = false;
547e332a726SVladimir Sementsov-Ogievskiy goto out;
548beb5f545SVladimir Sementsov-Ogievskiy }
549beb5f545SVladimir Sementsov-Ogievskiy
550e332a726SVladimir Sementsov-Ogievskiy out:
5513816edd2SVladimir Sementsov-Ogievskiy qemu_vfree(bounce_buffer);
55205d5e12bSPaolo Bonzini break;
55305d5e12bSPaolo Bonzini
55405d5e12bSPaolo Bonzini default:
55505d5e12bSPaolo Bonzini abort();
55605d5e12bSPaolo Bonzini }
5573816edd2SVladimir Sementsov-Ogievskiy
558beb5f545SVladimir Sementsov-Ogievskiy return ret;
559beb5f545SVladimir Sementsov-Ogievskiy }
560beb5f545SVladimir Sementsov-Ogievskiy
block_copy_task_entry(AioTask * task)5614ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task)
5624ce5dd3eSVladimir Sementsov-Ogievskiy {
5634ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *t = container_of(task, BlockCopyTask, task);
564c6a3e3dfSEmanuele Giuseppe Esposito BlockCopyState *s = t->s;
565c78dd00eSPhilippe Mathieu-Daudé bool error_is_read = false;
56605d5e12bSPaolo Bonzini BlockCopyMethod method = t->method;
5674ce5dd3eSVladimir Sementsov-Ogievskiy int ret;
5684ce5dd3eSVladimir Sementsov-Ogievskiy
569abaf8b75SKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
570d088e6a4SVladimir Sementsov-Ogievskiy ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
571d088e6a4SVladimir Sementsov-Ogievskiy &error_is_read);
572abaf8b75SKevin Wolf }
573d0c389d2SEmanuele Giuseppe Esposito
574d0c389d2SEmanuele Giuseppe Esposito WITH_QEMU_LOCK_GUARD(&s->lock) {
57505d5e12bSPaolo Bonzini if (s->method == t->method) {
57605d5e12bSPaolo Bonzini s->method = method;
577bed95234SVladimir Sementsov-Ogievskiy }
578d0c389d2SEmanuele Giuseppe Esposito
5798146b357SVladimir Sementsov-Ogievskiy if (ret < 0) {
5808146b357SVladimir Sementsov-Ogievskiy if (!t->call_state->ret) {
581de4641b4SVladimir Sementsov-Ogievskiy t->call_state->ret = ret;
5824ce5dd3eSVladimir Sementsov-Ogievskiy t->call_state->error_is_read = error_is_read;
5838146b357SVladimir Sementsov-Ogievskiy }
584201b4bb6SVladimir Sementsov-Ogievskiy } else if (s->progress) {
585d088e6a4SVladimir Sementsov-Ogievskiy progress_work_done(s->progress, t->req.bytes);
586d51590fcSVladimir Sementsov-Ogievskiy }
587d0c389d2SEmanuele Giuseppe Esposito }
588d088e6a4SVladimir Sementsov-Ogievskiy co_put_to_shres(s->mem, t->req.bytes);
5894ce5dd3eSVladimir Sementsov-Ogievskiy block_copy_task_end(t, ret);
5904ce5dd3eSVladimir Sementsov-Ogievskiy
5914ce5dd3eSVladimir Sementsov-Ogievskiy return ret;
5924ce5dd3eSVladimir Sementsov-Ogievskiy }
5934ce5dd3eSVladimir Sementsov-Ogievskiy
5947ff9579eSKevin Wolf static coroutine_fn GRAPH_RDLOCK
block_copy_block_status(BlockCopyState * s,int64_t offset,int64_t bytes,int64_t * pnum)5957ff9579eSKevin Wolf int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
5967ff9579eSKevin Wolf int64_t *pnum)
5972d57511aSVladimir Sementsov-Ogievskiy {
5982d57511aSVladimir Sementsov-Ogievskiy int64_t num;
5992d57511aSVladimir Sementsov-Ogievskiy BlockDriverState *base;
6002d57511aSVladimir Sementsov-Ogievskiy int ret;
6012d57511aSVladimir Sementsov-Ogievskiy
602d0c389d2SEmanuele Giuseppe Esposito if (qatomic_read(&s->skip_unallocated)) {
603c6f6d846SMax Reitz base = bdrv_backing_chain_next(s->source->bs);
6042d57511aSVladimir Sementsov-Ogievskiy } else {
6052d57511aSVladimir Sementsov-Ogievskiy base = NULL;
6062d57511aSVladimir Sementsov-Ogievskiy }
6072d57511aSVladimir Sementsov-Ogievskiy
60843a0d4f0SEmanuele Giuseppe Esposito ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
6092d57511aSVladimir Sementsov-Ogievskiy NULL, NULL);
6102d57511aSVladimir Sementsov-Ogievskiy if (ret < 0 || num < s->cluster_size) {
6112d57511aSVladimir Sementsov-Ogievskiy /*
6122d57511aSVladimir Sementsov-Ogievskiy * On error or if failed to obtain large enough chunk just fallback to
6132d57511aSVladimir Sementsov-Ogievskiy * copy one cluster.
6142d57511aSVladimir Sementsov-Ogievskiy */
6152d57511aSVladimir Sementsov-Ogievskiy num = s->cluster_size;
6162d57511aSVladimir Sementsov-Ogievskiy ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
6172d57511aSVladimir Sementsov-Ogievskiy } else if (offset + num == s->len) {
6182d57511aSVladimir Sementsov-Ogievskiy num = QEMU_ALIGN_UP(num, s->cluster_size);
6192d57511aSVladimir Sementsov-Ogievskiy } else {
6202d57511aSVladimir Sementsov-Ogievskiy num = QEMU_ALIGN_DOWN(num, s->cluster_size);
6212d57511aSVladimir Sementsov-Ogievskiy }
6222d57511aSVladimir Sementsov-Ogievskiy
6232d57511aSVladimir Sementsov-Ogievskiy *pnum = num;
6242d57511aSVladimir Sementsov-Ogievskiy return ret;
6252d57511aSVladimir Sementsov-Ogievskiy }
6262d57511aSVladimir Sementsov-Ogievskiy
627beb5f545SVladimir Sementsov-Ogievskiy /*
628beb5f545SVladimir Sementsov-Ogievskiy * Check if the cluster starting at offset is allocated or not.
629beb5f545SVladimir Sementsov-Ogievskiy * return via pnum the number of contiguous clusters sharing this allocation.
630beb5f545SVladimir Sementsov-Ogievskiy */
6317ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_is_cluster_allocated(BlockCopyState * s,int64_t offset,int64_t * pnum)6327ff9579eSKevin Wolf block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
633beb5f545SVladimir Sementsov-Ogievskiy int64_t *pnum)
634beb5f545SVladimir Sementsov-Ogievskiy {
63500e30f05SVladimir Sementsov-Ogievskiy BlockDriverState *bs = s->source->bs;
636beb5f545SVladimir Sementsov-Ogievskiy int64_t count, total_count = 0;
637beb5f545SVladimir Sementsov-Ogievskiy int64_t bytes = s->len - offset;
638beb5f545SVladimir Sementsov-Ogievskiy int ret;
639beb5f545SVladimir Sementsov-Ogievskiy
640beb5f545SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
641beb5f545SVladimir Sementsov-Ogievskiy
642beb5f545SVladimir Sementsov-Ogievskiy while (true) {
6437ff9579eSKevin Wolf /* protected in backup_run() */
64443a0d4f0SEmanuele Giuseppe Esposito ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
645beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
646beb5f545SVladimir Sementsov-Ogievskiy return ret;
647beb5f545SVladimir Sementsov-Ogievskiy }
648beb5f545SVladimir Sementsov-Ogievskiy
649beb5f545SVladimir Sementsov-Ogievskiy total_count += count;
650beb5f545SVladimir Sementsov-Ogievskiy
651beb5f545SVladimir Sementsov-Ogievskiy if (ret || count == 0) {
652beb5f545SVladimir Sementsov-Ogievskiy /*
653beb5f545SVladimir Sementsov-Ogievskiy * ret: partial segment(s) are considered allocated.
654beb5f545SVladimir Sementsov-Ogievskiy * otherwise: unallocated tail is treated as an entire segment.
655beb5f545SVladimir Sementsov-Ogievskiy */
656beb5f545SVladimir Sementsov-Ogievskiy *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
657beb5f545SVladimir Sementsov-Ogievskiy return ret;
658beb5f545SVladimir Sementsov-Ogievskiy }
659beb5f545SVladimir Sementsov-Ogievskiy
660beb5f545SVladimir Sementsov-Ogievskiy /* Unallocated segment(s) with uncertain following segment(s) */
661beb5f545SVladimir Sementsov-Ogievskiy if (total_count >= s->cluster_size) {
662beb5f545SVladimir Sementsov-Ogievskiy *pnum = total_count / s->cluster_size;
663beb5f545SVladimir Sementsov-Ogievskiy return 0;
664beb5f545SVladimir Sementsov-Ogievskiy }
665beb5f545SVladimir Sementsov-Ogievskiy
666beb5f545SVladimir Sementsov-Ogievskiy offset += count;
667beb5f545SVladimir Sementsov-Ogievskiy bytes -= count;
668beb5f545SVladimir Sementsov-Ogievskiy }
669beb5f545SVladimir Sementsov-Ogievskiy }
670beb5f545SVladimir Sementsov-Ogievskiy
block_copy_reset(BlockCopyState * s,int64_t offset,int64_t bytes)671177541e6SVladimir Sementsov-Ogievskiy void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
672177541e6SVladimir Sementsov-Ogievskiy {
673177541e6SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock);
674177541e6SVladimir Sementsov-Ogievskiy
675177541e6SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
676177541e6SVladimir Sementsov-Ogievskiy if (s->progress) {
677177541e6SVladimir Sementsov-Ogievskiy progress_set_remaining(s->progress,
678177541e6SVladimir Sementsov-Ogievskiy bdrv_get_dirty_count(s->copy_bitmap) +
679177541e6SVladimir Sementsov-Ogievskiy s->in_flight_bytes);
680177541e6SVladimir Sementsov-Ogievskiy }
681177541e6SVladimir Sementsov-Ogievskiy }
682177541e6SVladimir Sementsov-Ogievskiy
683beb5f545SVladimir Sementsov-Ogievskiy /*
684beb5f545SVladimir Sementsov-Ogievskiy * Reset bits in copy_bitmap starting at offset if they represent unallocated
685beb5f545SVladimir Sementsov-Ogievskiy * data in the image. May reset subsequent contiguous bits.
686beb5f545SVladimir Sementsov-Ogievskiy * @return 0 when the cluster at @offset was unallocated,
687beb5f545SVladimir Sementsov-Ogievskiy * 1 otherwise, and -ret on error.
688beb5f545SVladimir Sementsov-Ogievskiy */
block_copy_reset_unallocated(BlockCopyState * s,int64_t offset,int64_t * count)68943a0d4f0SEmanuele Giuseppe Esposito int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
69043a0d4f0SEmanuele Giuseppe Esposito int64_t offset,
69143a0d4f0SEmanuele Giuseppe Esposito int64_t *count)
692beb5f545SVladimir Sementsov-Ogievskiy {
693beb5f545SVladimir Sementsov-Ogievskiy int ret;
694beb5f545SVladimir Sementsov-Ogievskiy int64_t clusters, bytes;
695beb5f545SVladimir Sementsov-Ogievskiy
696beb5f545SVladimir Sementsov-Ogievskiy ret = block_copy_is_cluster_allocated(s, offset, &clusters);
697beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
698beb5f545SVladimir Sementsov-Ogievskiy return ret;
699beb5f545SVladimir Sementsov-Ogievskiy }
700beb5f545SVladimir Sementsov-Ogievskiy
701beb5f545SVladimir Sementsov-Ogievskiy bytes = clusters * s->cluster_size;
702beb5f545SVladimir Sementsov-Ogievskiy
703beb5f545SVladimir Sementsov-Ogievskiy if (!ret) {
704177541e6SVladimir Sementsov-Ogievskiy block_copy_reset(s, offset, bytes);
705beb5f545SVladimir Sementsov-Ogievskiy }
706beb5f545SVladimir Sementsov-Ogievskiy
707beb5f545SVladimir Sementsov-Ogievskiy *count = bytes;
708beb5f545SVladimir Sementsov-Ogievskiy return ret;
709beb5f545SVladimir Sementsov-Ogievskiy }
710beb5f545SVladimir Sementsov-Ogievskiy
7115332e5d2SVladimir Sementsov-Ogievskiy /*
7125332e5d2SVladimir Sementsov-Ogievskiy * block_copy_dirty_clusters
7135332e5d2SVladimir Sementsov-Ogievskiy *
7145332e5d2SVladimir Sementsov-Ogievskiy * Copy dirty clusters in @offset/@bytes range.
7155332e5d2SVladimir Sementsov-Ogievskiy * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
7165332e5d2SVladimir Sementsov-Ogievskiy * clusters found and -errno on failure.
7175332e5d2SVladimir Sementsov-Ogievskiy */
7187ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_dirty_clusters(BlockCopyCallState * call_state)7193b8c2329SVladimir Sementsov-Ogievskiy block_copy_dirty_clusters(BlockCopyCallState *call_state)
720beb5f545SVladimir Sementsov-Ogievskiy {
7213b8c2329SVladimir Sementsov-Ogievskiy BlockCopyState *s = call_state->s;
7223b8c2329SVladimir Sementsov-Ogievskiy int64_t offset = call_state->offset;
7233b8c2329SVladimir Sementsov-Ogievskiy int64_t bytes = call_state->bytes;
7243b8c2329SVladimir Sementsov-Ogievskiy
725beb5f545SVladimir Sementsov-Ogievskiy int ret = 0;
7265332e5d2SVladimir Sementsov-Ogievskiy bool found_dirty = false;
72742ac2144SVladimir Sementsov-Ogievskiy int64_t end = offset + bytes;
7284ce5dd3eSVladimir Sementsov-Ogievskiy AioTaskPool *aio = NULL;
729beb5f545SVladimir Sementsov-Ogievskiy
730beb5f545SVladimir Sementsov-Ogievskiy /*
731beb5f545SVladimir Sementsov-Ogievskiy * block_copy() user is responsible for keeping source and target in same
732beb5f545SVladimir Sementsov-Ogievskiy * aio context
733beb5f545SVladimir Sementsov-Ogievskiy */
73400e30f05SVladimir Sementsov-Ogievskiy assert(bdrv_get_aio_context(s->source->bs) ==
73500e30f05SVladimir Sementsov-Ogievskiy bdrv_get_aio_context(s->target->bs));
736beb5f545SVladimir Sementsov-Ogievskiy
7378719091fSVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
738dafaf135SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
739beb5f545SVladimir Sementsov-Ogievskiy
740149009beSEmanuele Giuseppe Esposito while (bytes && aio_task_pool_status(aio) == 0 &&
741149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled)) {
7424ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *task;
74342ac2144SVladimir Sementsov-Ogievskiy int64_t status_bytes;
744beb5f545SVladimir Sementsov-Ogievskiy
7453b8c2329SVladimir Sementsov-Ogievskiy task = block_copy_task_create(s, call_state, offset, bytes);
74642ac2144SVladimir Sementsov-Ogievskiy if (!task) {
74742ac2144SVladimir Sementsov-Ogievskiy /* No more dirty bits in the bitmap */
74842ac2144SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, offset, bytes);
74942ac2144SVladimir Sementsov-Ogievskiy break;
75042ac2144SVladimir Sementsov-Ogievskiy }
751d088e6a4SVladimir Sementsov-Ogievskiy if (task->req.offset > offset) {
752d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, offset, task->req.offset - offset);
753beb5f545SVladimir Sementsov-Ogievskiy }
754beb5f545SVladimir Sementsov-Ogievskiy
7555332e5d2SVladimir Sementsov-Ogievskiy found_dirty = true;
7565332e5d2SVladimir Sementsov-Ogievskiy
757d088e6a4SVladimir Sementsov-Ogievskiy ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
75842ac2144SVladimir Sementsov-Ogievskiy &status_bytes);
7595332e5d2SVladimir Sementsov-Ogievskiy assert(ret >= 0); /* never fail */
760d088e6a4SVladimir Sementsov-Ogievskiy if (status_bytes < task->req.bytes) {
76142ac2144SVladimir Sementsov-Ogievskiy block_copy_task_shrink(task, status_bytes);
76242ac2144SVladimir Sementsov-Ogievskiy }
763d0c389d2SEmanuele Giuseppe Esposito if (qatomic_read(&s->skip_unallocated) &&
764d0c389d2SEmanuele Giuseppe Esposito !(ret & BDRV_BLOCK_ALLOCATED)) {
7651348a657SVladimir Sementsov-Ogievskiy block_copy_task_end(task, 0);
766d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
76742ac2144SVladimir Sementsov-Ogievskiy offset = task_end(task);
76842ac2144SVladimir Sementsov-Ogievskiy bytes = end - offset;
769fc9aefc8SVladimir Sementsov-Ogievskiy g_free(task);
770beb5f545SVladimir Sementsov-Ogievskiy continue;
771beb5f545SVladimir Sementsov-Ogievskiy }
772bed95234SVladimir Sementsov-Ogievskiy if (ret & BDRV_BLOCK_ZERO) {
77305d5e12bSPaolo Bonzini task->method = COPY_WRITE_ZEROES;
774bed95234SVladimir Sementsov-Ogievskiy }
7752d57511aSVladimir Sementsov-Ogievskiy
7767e032df0SVladimir Sementsov-Ogievskiy if (!call_state->ignore_ratelimit) {
7777e032df0SVladimir Sementsov-Ogievskiy uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
7787e032df0SVladimir Sementsov-Ogievskiy if (ns > 0) {
7797e032df0SVladimir Sementsov-Ogievskiy block_copy_task_end(task, -EAGAIN);
7807e032df0SVladimir Sementsov-Ogievskiy g_free(task);
78129a6ea24SPaolo Bonzini qemu_co_sleep_ns_wakeable(&call_state->sleep,
78229a6ea24SPaolo Bonzini QEMU_CLOCK_REALTIME, ns);
7837e032df0SVladimir Sementsov-Ogievskiy continue;
7847e032df0SVladimir Sementsov-Ogievskiy }
7857e032df0SVladimir Sementsov-Ogievskiy }
7867e032df0SVladimir Sementsov-Ogievskiy
787d088e6a4SVladimir Sementsov-Ogievskiy ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
7887e032df0SVladimir Sementsov-Ogievskiy
789d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_process(s, task->req.offset);
790beb5f545SVladimir Sementsov-Ogievskiy
791d088e6a4SVladimir Sementsov-Ogievskiy co_get_from_shres(s->mem, task->req.bytes);
792beb5f545SVladimir Sementsov-Ogievskiy
79342ac2144SVladimir Sementsov-Ogievskiy offset = task_end(task);
79442ac2144SVladimir Sementsov-Ogievskiy bytes = end - offset;
7954ce5dd3eSVladimir Sementsov-Ogievskiy
7964ce5dd3eSVladimir Sementsov-Ogievskiy if (!aio && bytes) {
79726be9d62SVladimir Sementsov-Ogievskiy aio = aio_task_pool_new(call_state->max_workers);
798beb5f545SVladimir Sementsov-Ogievskiy }
799beb5f545SVladimir Sementsov-Ogievskiy
8004ce5dd3eSVladimir Sementsov-Ogievskiy ret = block_copy_task_run(aio, task);
8014ce5dd3eSVladimir Sementsov-Ogievskiy if (ret < 0) {
8024ce5dd3eSVladimir Sementsov-Ogievskiy goto out;
8034ce5dd3eSVladimir Sementsov-Ogievskiy }
8044ce5dd3eSVladimir Sementsov-Ogievskiy }
8054ce5dd3eSVladimir Sementsov-Ogievskiy
8064ce5dd3eSVladimir Sementsov-Ogievskiy out:
8074ce5dd3eSVladimir Sementsov-Ogievskiy if (aio) {
8084ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_wait_all(aio);
8094ce5dd3eSVladimir Sementsov-Ogievskiy
8104ce5dd3eSVladimir Sementsov-Ogievskiy /*
8114ce5dd3eSVladimir Sementsov-Ogievskiy * We are not really interested in -ECANCELED returned from
8124ce5dd3eSVladimir Sementsov-Ogievskiy * block_copy_task_run. If it fails, it means some task already failed
8134ce5dd3eSVladimir Sementsov-Ogievskiy * for real reason, let's return first failure.
8144ce5dd3eSVladimir Sementsov-Ogievskiy * Still, assert that we don't rewrite failure by success.
815e8de7ba9SVladimir Sementsov-Ogievskiy *
816e8de7ba9SVladimir Sementsov-Ogievskiy * Note: ret may be positive here because of block-status result.
8174ce5dd3eSVladimir Sementsov-Ogievskiy */
818e8de7ba9SVladimir Sementsov-Ogievskiy assert(ret >= 0 || aio_task_pool_status(aio) < 0);
8194ce5dd3eSVladimir Sementsov-Ogievskiy ret = aio_task_pool_status(aio);
8204ce5dd3eSVladimir Sementsov-Ogievskiy
8214ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_free(aio);
8224ce5dd3eSVladimir Sementsov-Ogievskiy }
8234ce5dd3eSVladimir Sementsov-Ogievskiy
8244ce5dd3eSVladimir Sementsov-Ogievskiy return ret < 0 ? ret : found_dirty;
8255332e5d2SVladimir Sementsov-Ogievskiy }
8265332e5d2SVladimir Sementsov-Ogievskiy
block_copy_kick(BlockCopyCallState * call_state)8277e032df0SVladimir Sementsov-Ogievskiy void block_copy_kick(BlockCopyCallState *call_state)
8287e032df0SVladimir Sementsov-Ogievskiy {
82929a6ea24SPaolo Bonzini qemu_co_sleep_wake(&call_state->sleep);
8307e032df0SVladimir Sementsov-Ogievskiy }
8317e032df0SVladimir Sementsov-Ogievskiy
8325332e5d2SVladimir Sementsov-Ogievskiy /*
8333b8c2329SVladimir Sementsov-Ogievskiy * block_copy_common
8345332e5d2SVladimir Sementsov-Ogievskiy *
8355332e5d2SVladimir Sementsov-Ogievskiy * Copy requested region, accordingly to dirty bitmap.
8365332e5d2SVladimir Sementsov-Ogievskiy * Collaborate with parallel block_copy requests: if they succeed it will help
8375332e5d2SVladimir Sementsov-Ogievskiy * us. If they fail, we will retry not-copied regions. So, if we return error,
8385332e5d2SVladimir Sementsov-Ogievskiy * it means that some I/O operation failed in context of _this_ block_copy call,
8395332e5d2SVladimir Sementsov-Ogievskiy * not some parallel operation.
8405332e5d2SVladimir Sementsov-Ogievskiy */
8417ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_common(BlockCopyCallState * call_state)8427ff9579eSKevin Wolf block_copy_common(BlockCopyCallState *call_state)
8435332e5d2SVladimir Sementsov-Ogievskiy {
8445332e5d2SVladimir Sementsov-Ogievskiy int ret;
845c6a3e3dfSEmanuele Giuseppe Esposito BlockCopyState *s = call_state->s;
8465332e5d2SVladimir Sementsov-Ogievskiy
847d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_lock(&s->lock);
848c6a3e3dfSEmanuele Giuseppe Esposito QLIST_INSERT_HEAD(&s->calls, call_state, list);
849d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_unlock(&s->lock);
8502e099a9dSVladimir Sementsov-Ogievskiy
8515332e5d2SVladimir Sementsov-Ogievskiy do {
8523b8c2329SVladimir Sementsov-Ogievskiy ret = block_copy_dirty_clusters(call_state);
8535332e5d2SVladimir Sementsov-Ogievskiy
854149009beSEmanuele Giuseppe Esposito if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
855d0c389d2SEmanuele Giuseppe Esposito WITH_QEMU_LOCK_GUARD(&s->lock) {
856d0c389d2SEmanuele Giuseppe Esposito /*
857d0c389d2SEmanuele Giuseppe Esposito * Check that there is no task we still need to
858d0c389d2SEmanuele Giuseppe Esposito * wait to complete
859d0c389d2SEmanuele Giuseppe Esposito */
860d088e6a4SVladimir Sementsov-Ogievskiy ret = reqlist_wait_one(&s->reqs, call_state->offset,
861d088e6a4SVladimir Sementsov-Ogievskiy call_state->bytes, &s->lock);
862d0c389d2SEmanuele Giuseppe Esposito if (ret == 0) {
863d0c389d2SEmanuele Giuseppe Esposito /*
864d0c389d2SEmanuele Giuseppe Esposito * No pending tasks, but check again the bitmap in this
865d0c389d2SEmanuele Giuseppe Esposito * same critical section, since a task might have failed
866d0c389d2SEmanuele Giuseppe Esposito * between this and the critical section in
867d0c389d2SEmanuele Giuseppe Esposito * block_copy_dirty_clusters().
868d0c389d2SEmanuele Giuseppe Esposito *
869d088e6a4SVladimir Sementsov-Ogievskiy * reqlist_wait_one return value 0 also means that it
870d0c389d2SEmanuele Giuseppe Esposito * didn't release the lock. So, we are still in the same
871d0c389d2SEmanuele Giuseppe Esposito * critical section, not interrupted by any concurrent
872d0c389d2SEmanuele Giuseppe Esposito * access to state.
873d0c389d2SEmanuele Giuseppe Esposito */
874d0c389d2SEmanuele Giuseppe Esposito ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
875d0c389d2SEmanuele Giuseppe Esposito call_state->offset,
876d0c389d2SEmanuele Giuseppe Esposito call_state->bytes) >= 0;
877d0c389d2SEmanuele Giuseppe Esposito }
878d0c389d2SEmanuele Giuseppe Esposito }
8795332e5d2SVladimir Sementsov-Ogievskiy }
8805332e5d2SVladimir Sementsov-Ogievskiy
8815332e5d2SVladimir Sementsov-Ogievskiy /*
8825332e5d2SVladimir Sementsov-Ogievskiy * We retry in two cases:
8835332e5d2SVladimir Sementsov-Ogievskiy * 1. Some progress done
8845332e5d2SVladimir Sementsov-Ogievskiy * Something was copied, which means that there were yield points
8855332e5d2SVladimir Sementsov-Ogievskiy * and some new dirty bits may have appeared (due to failed parallel
8865332e5d2SVladimir Sementsov-Ogievskiy * block-copy requests).
8875332e5d2SVladimir Sementsov-Ogievskiy * 2. We have waited for some intersecting block-copy request
8885332e5d2SVladimir Sementsov-Ogievskiy * It may have failed and produced new dirty bits.
8895332e5d2SVladimir Sementsov-Ogievskiy */
890149009beSEmanuele Giuseppe Esposito } while (ret > 0 && !qatomic_read(&call_state->cancelled));
891a6ffe199SVladimir Sementsov-Ogievskiy
892149009beSEmanuele Giuseppe Esposito qatomic_store_release(&call_state->finished, true);
893de4641b4SVladimir Sementsov-Ogievskiy
894de4641b4SVladimir Sementsov-Ogievskiy if (call_state->cb) {
895de4641b4SVladimir Sementsov-Ogievskiy call_state->cb(call_state->cb_opaque);
896de4641b4SVladimir Sementsov-Ogievskiy }
897de4641b4SVladimir Sementsov-Ogievskiy
898d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_lock(&s->lock);
8992e099a9dSVladimir Sementsov-Ogievskiy QLIST_REMOVE(call_state, list);
900d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_unlock(&s->lock);
9012e099a9dSVladimir Sementsov-Ogievskiy
902beb5f545SVladimir Sementsov-Ogievskiy return ret;
903beb5f545SVladimir Sementsov-Ogievskiy }
904397f4e9dSVladimir Sementsov-Ogievskiy
block_copy_async_co_entry(void * opaque)90515df6e69SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_async_co_entry(void *opaque)
9063b8c2329SVladimir Sementsov-Ogievskiy {
9077ff9579eSKevin Wolf GRAPH_RDLOCK_GUARD();
90815df6e69SVladimir Sementsov-Ogievskiy block_copy_common(opaque);
90915df6e69SVladimir Sementsov-Ogievskiy }
91015df6e69SVladimir Sementsov-Ogievskiy
block_copy(BlockCopyState * s,int64_t start,int64_t bytes,bool ignore_ratelimit,uint64_t timeout_ns,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)91115df6e69SVladimir Sementsov-Ogievskiy int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
91215df6e69SVladimir Sementsov-Ogievskiy bool ignore_ratelimit, uint64_t timeout_ns,
91315df6e69SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb,
91415df6e69SVladimir Sementsov-Ogievskiy void *cb_opaque)
91515df6e69SVladimir Sementsov-Ogievskiy {
91615df6e69SVladimir Sementsov-Ogievskiy int ret;
91715df6e69SVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
91815df6e69SVladimir Sementsov-Ogievskiy
91915df6e69SVladimir Sementsov-Ogievskiy *call_state = (BlockCopyCallState) {
9203b8c2329SVladimir Sementsov-Ogievskiy .s = s,
9213b8c2329SVladimir Sementsov-Ogievskiy .offset = start,
9223b8c2329SVladimir Sementsov-Ogievskiy .bytes = bytes,
9237e032df0SVladimir Sementsov-Ogievskiy .ignore_ratelimit = ignore_ratelimit,
92426be9d62SVladimir Sementsov-Ogievskiy .max_workers = BLOCK_COPY_MAX_WORKERS,
92515df6e69SVladimir Sementsov-Ogievskiy .cb = cb,
92615df6e69SVladimir Sementsov-Ogievskiy .cb_opaque = cb_opaque,
9273b8c2329SVladimir Sementsov-Ogievskiy };
9283b8c2329SVladimir Sementsov-Ogievskiy
92915df6e69SVladimir Sementsov-Ogievskiy ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
93015df6e69SVladimir Sementsov-Ogievskiy g_free);
93115df6e69SVladimir Sementsov-Ogievskiy if (ret < 0) {
93215df6e69SVladimir Sementsov-Ogievskiy assert(ret == -ETIMEDOUT);
93315df6e69SVladimir Sementsov-Ogievskiy block_copy_call_cancel(call_state);
93415df6e69SVladimir Sementsov-Ogievskiy /* call_state will be freed by running coroutine. */
93515df6e69SVladimir Sementsov-Ogievskiy return ret;
9363b8c2329SVladimir Sementsov-Ogievskiy }
9373b8c2329SVladimir Sementsov-Ogievskiy
93815df6e69SVladimir Sementsov-Ogievskiy ret = call_state->ret;
93915df6e69SVladimir Sementsov-Ogievskiy g_free(call_state);
94015df6e69SVladimir Sementsov-Ogievskiy
94115df6e69SVladimir Sementsov-Ogievskiy return ret;
942de4641b4SVladimir Sementsov-Ogievskiy }
943de4641b4SVladimir Sementsov-Ogievskiy
block_copy_async(BlockCopyState * s,int64_t offset,int64_t bytes,int max_workers,int64_t max_chunk,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)944de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *block_copy_async(BlockCopyState *s,
945de4641b4SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes,
94626be9d62SVladimir Sementsov-Ogievskiy int max_workers, int64_t max_chunk,
947de4641b4SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb,
948de4641b4SVladimir Sementsov-Ogievskiy void *cb_opaque)
949de4641b4SVladimir Sementsov-Ogievskiy {
950de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
951de4641b4SVladimir Sementsov-Ogievskiy
952de4641b4SVladimir Sementsov-Ogievskiy *call_state = (BlockCopyCallState) {
953de4641b4SVladimir Sementsov-Ogievskiy .s = s,
954de4641b4SVladimir Sementsov-Ogievskiy .offset = offset,
955de4641b4SVladimir Sementsov-Ogievskiy .bytes = bytes,
95626be9d62SVladimir Sementsov-Ogievskiy .max_workers = max_workers,
95726be9d62SVladimir Sementsov-Ogievskiy .max_chunk = max_chunk,
958de4641b4SVladimir Sementsov-Ogievskiy .cb = cb,
959de4641b4SVladimir Sementsov-Ogievskiy .cb_opaque = cb_opaque,
960de4641b4SVladimir Sementsov-Ogievskiy
961de4641b4SVladimir Sementsov-Ogievskiy .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
962de4641b4SVladimir Sementsov-Ogievskiy };
963de4641b4SVladimir Sementsov-Ogievskiy
964de4641b4SVladimir Sementsov-Ogievskiy qemu_coroutine_enter(call_state->co);
965de4641b4SVladimir Sementsov-Ogievskiy
966de4641b4SVladimir Sementsov-Ogievskiy return call_state;
967de4641b4SVladimir Sementsov-Ogievskiy }
968de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_free(BlockCopyCallState * call_state)969de4641b4SVladimir Sementsov-Ogievskiy void block_copy_call_free(BlockCopyCallState *call_state)
970de4641b4SVladimir Sementsov-Ogievskiy {
971de4641b4SVladimir Sementsov-Ogievskiy if (!call_state) {
972de4641b4SVladimir Sementsov-Ogievskiy return;
973de4641b4SVladimir Sementsov-Ogievskiy }
974de4641b4SVladimir Sementsov-Ogievskiy
975149009beSEmanuele Giuseppe Esposito assert(qatomic_read(&call_state->finished));
976de4641b4SVladimir Sementsov-Ogievskiy g_free(call_state);
977de4641b4SVladimir Sementsov-Ogievskiy }
978de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_finished(BlockCopyCallState * call_state)979de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_finished(BlockCopyCallState *call_state)
980de4641b4SVladimir Sementsov-Ogievskiy {
981149009beSEmanuele Giuseppe Esposito return qatomic_read(&call_state->finished);
982de4641b4SVladimir Sementsov-Ogievskiy }
983de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_succeeded(BlockCopyCallState * call_state)984de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_succeeded(BlockCopyCallState *call_state)
985de4641b4SVladimir Sementsov-Ogievskiy {
986149009beSEmanuele Giuseppe Esposito return qatomic_load_acquire(&call_state->finished) &&
987149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled) &&
988a6d23d56SVladimir Sementsov-Ogievskiy call_state->ret == 0;
989de4641b4SVladimir Sementsov-Ogievskiy }
990de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_failed(BlockCopyCallState * call_state)991de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_failed(BlockCopyCallState *call_state)
992de4641b4SVladimir Sementsov-Ogievskiy {
993149009beSEmanuele Giuseppe Esposito return qatomic_load_acquire(&call_state->finished) &&
994149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled) &&
995a6d23d56SVladimir Sementsov-Ogievskiy call_state->ret < 0;
996a6d23d56SVladimir Sementsov-Ogievskiy }
997a6d23d56SVladimir Sementsov-Ogievskiy
block_copy_call_cancelled(BlockCopyCallState * call_state)998a6d23d56SVladimir Sementsov-Ogievskiy bool block_copy_call_cancelled(BlockCopyCallState *call_state)
999a6d23d56SVladimir Sementsov-Ogievskiy {
1000149009beSEmanuele Giuseppe Esposito return qatomic_read(&call_state->cancelled);
1001de4641b4SVladimir Sementsov-Ogievskiy }
1002de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_status(BlockCopyCallState * call_state,bool * error_is_read)1003de4641b4SVladimir Sementsov-Ogievskiy int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
1004de4641b4SVladimir Sementsov-Ogievskiy {
1005149009beSEmanuele Giuseppe Esposito assert(qatomic_load_acquire(&call_state->finished));
1006de4641b4SVladimir Sementsov-Ogievskiy if (error_is_read) {
1007de4641b4SVladimir Sementsov-Ogievskiy *error_is_read = call_state->error_is_read;
1008de4641b4SVladimir Sementsov-Ogievskiy }
1009de4641b4SVladimir Sementsov-Ogievskiy return call_state->ret;
1010de4641b4SVladimir Sementsov-Ogievskiy }
1011de4641b4SVladimir Sementsov-Ogievskiy
1012149009beSEmanuele Giuseppe Esposito /*
1013149009beSEmanuele Giuseppe Esposito * Note that cancelling and finishing are racy.
1014149009beSEmanuele Giuseppe Esposito * User can cancel a block-copy that is already finished.
1015149009beSEmanuele Giuseppe Esposito */
block_copy_call_cancel(BlockCopyCallState * call_state)1016a6d23d56SVladimir Sementsov-Ogievskiy void block_copy_call_cancel(BlockCopyCallState *call_state)
1017a6d23d56SVladimir Sementsov-Ogievskiy {
1018149009beSEmanuele Giuseppe Esposito qatomic_set(&call_state->cancelled, true);
1019a6d23d56SVladimir Sementsov-Ogievskiy block_copy_kick(call_state);
1020a6d23d56SVladimir Sementsov-Ogievskiy }
1021a6d23d56SVladimir Sementsov-Ogievskiy
block_copy_dirty_bitmap(BlockCopyState * s)1022397f4e9dSVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
1023397f4e9dSVladimir Sementsov-Ogievskiy {
1024397f4e9dSVladimir Sementsov-Ogievskiy return s->copy_bitmap;
1025397f4e9dSVladimir Sementsov-Ogievskiy }
1026397f4e9dSVladimir Sementsov-Ogievskiy
block_copy_cluster_size(BlockCopyState * s)1027b518e9e9SVladimir Sementsov-Ogievskiy int64_t block_copy_cluster_size(BlockCopyState *s)
1028b518e9e9SVladimir Sementsov-Ogievskiy {
1029b518e9e9SVladimir Sementsov-Ogievskiy return s->cluster_size;
1030b518e9e9SVladimir Sementsov-Ogievskiy }
1031b518e9e9SVladimir Sementsov-Ogievskiy
block_copy_set_skip_unallocated(BlockCopyState * s,bool skip)1032397f4e9dSVladimir Sementsov-Ogievskiy void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
1033397f4e9dSVladimir Sementsov-Ogievskiy {
1034d0c389d2SEmanuele Giuseppe Esposito qatomic_set(&s->skip_unallocated, skip);
1035397f4e9dSVladimir Sementsov-Ogievskiy }
10367e032df0SVladimir Sementsov-Ogievskiy
block_copy_set_speed(BlockCopyState * s,uint64_t speed)10377e032df0SVladimir Sementsov-Ogievskiy void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
10387e032df0SVladimir Sementsov-Ogievskiy {
10397e032df0SVladimir Sementsov-Ogievskiy ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
10407e032df0SVladimir Sementsov-Ogievskiy
10417e032df0SVladimir Sementsov-Ogievskiy /*
10427e032df0SVladimir Sementsov-Ogievskiy * Note: it's good to kick all call states from here, but it should be done
10437e032df0SVladimir Sementsov-Ogievskiy * only from a coroutine, to not crash if s->calls list changed while
10447e032df0SVladimir Sementsov-Ogievskiy * entering one call. So for now, the only user of this function kicks its
10457e032df0SVladimir Sementsov-Ogievskiy * only one call_state by hand.
10467e032df0SVladimir Sementsov-Ogievskiy */
10477e032df0SVladimir Sementsov-Ogievskiy }
1048