1 /* 2 * Image mirroring 3 * 4 * Copyright Red Hat, Inc. 2012 5 * 6 * Authors: 7 * Paolo Bonzini <pbonzini@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/cutils.h" 16 #include "qemu/coroutine.h" 17 #include "qemu/range.h" 18 #include "trace.h" 19 #include "block/blockjob_int.h" 20 #include "block/block_int.h" 21 #include "block/dirty-bitmap.h" 22 #include "sysemu/block-backend.h" 23 #include "qapi/error.h" 24 #include "qemu/ratelimit.h" 25 #include "qemu/bitmap.h" 26 #include "qemu/memalign.h" 27 28 #define MAX_IN_FLIGHT 16 29 #define MAX_IO_BYTES (1 << 20) /* 1 Mb */ 30 #define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES) 31 32 /* The mirroring buffer is a list of granularity-sized chunks. 33 * Free chunks are organized in a list. 34 */ 35 typedef struct MirrorBuffer { 36 QSIMPLEQ_ENTRY(MirrorBuffer) next; 37 } MirrorBuffer; 38 39 typedef struct MirrorOp MirrorOp; 40 41 typedef struct MirrorBlockJob { 42 BlockJob common; 43 BlockBackend *target; 44 BlockDriverState *mirror_top_bs; 45 BlockDriverState *base; 46 BlockDriverState *base_overlay; 47 48 /* The name of the graph node to replace */ 49 char *replaces; 50 /* The BDS to replace */ 51 BlockDriverState *to_replace; 52 /* Used to block operations on the drive-mirror-replace target */ 53 Error *replace_blocker; 54 bool is_none_mode; 55 BlockMirrorBackingMode backing_mode; 56 /* Whether the target image requires explicit zero-initialization */ 57 bool zero_target; 58 /* 59 * To be accesssed with atomics. Written only under the BQL (required by the 60 * current implementation of mirror_change()). 61 */ 62 MirrorCopyMode copy_mode; 63 BlockdevOnError on_source_error, on_target_error; 64 /* Set when the target is synced (dirty bitmap is clean, nothing 65 * in flight) and the job is running in active mode */ 66 bool actively_synced; 67 bool should_complete; 68 int64_t granularity; 69 size_t buf_size; 70 int64_t bdev_length; 71 unsigned long *cow_bitmap; 72 BdrvDirtyBitmap *dirty_bitmap; 73 BdrvDirtyBitmapIter *dbi; 74 uint8_t *buf; 75 QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; 76 int buf_free_count; 77 78 uint64_t last_pause_ns; 79 unsigned long *in_flight_bitmap; 80 unsigned in_flight; 81 int64_t bytes_in_flight; 82 QTAILQ_HEAD(, MirrorOp) ops_in_flight; 83 int ret; 84 bool unmap; 85 int target_cluster_size; 86 int max_iov; 87 bool initial_zeroing_ongoing; 88 int in_active_write_counter; 89 int64_t active_write_bytes_in_flight; 90 bool prepared; 91 bool in_drain; 92 } MirrorBlockJob; 93 94 typedef struct MirrorBDSOpaque { 95 MirrorBlockJob *job; 96 bool stop; 97 bool is_commit; 98 } MirrorBDSOpaque; 99 100 struct MirrorOp { 101 MirrorBlockJob *s; 102 QEMUIOVector qiov; 103 int64_t offset; 104 uint64_t bytes; 105 106 /* The pointee is set by mirror_co_read(), mirror_co_zero(), and 107 * mirror_co_discard() before yielding for the first time */ 108 int64_t *bytes_handled; 109 110 bool is_pseudo_op; 111 bool is_active_write; 112 bool is_in_flight; 113 CoQueue waiting_requests; 114 Coroutine *co; 115 MirrorOp *waiting_for_op; 116 117 QTAILQ_ENTRY(MirrorOp) next; 118 }; 119 120 typedef enum MirrorMethod { 121 MIRROR_METHOD_COPY, 122 MIRROR_METHOD_ZERO, 123 MIRROR_METHOD_DISCARD, 124 } MirrorMethod; 125 126 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, 127 int error) 128 { 129 s->actively_synced = false; 130 if (read) { 131 return block_job_error_action(&s->common, s->on_source_error, 132 true, error); 133 } else { 134 return block_job_error_action(&s->common, s->on_target_error, 135 false, error); 136 } 137 } 138 139 static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self, 140 MirrorBlockJob *s, 141 uint64_t offset, 142 uint64_t bytes) 143 { 144 uint64_t self_start_chunk = offset / s->granularity; 145 uint64_t self_end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); 146 uint64_t self_nb_chunks = self_end_chunk - self_start_chunk; 147 148 while (find_next_bit(s->in_flight_bitmap, self_end_chunk, 149 self_start_chunk) < self_end_chunk && 150 s->ret >= 0) 151 { 152 MirrorOp *op; 153 154 QTAILQ_FOREACH(op, &s->ops_in_flight, next) { 155 uint64_t op_start_chunk = op->offset / s->granularity; 156 uint64_t op_nb_chunks = DIV_ROUND_UP(op->offset + op->bytes, 157 s->granularity) - 158 op_start_chunk; 159 160 if (op == self) { 161 continue; 162 } 163 164 if (ranges_overlap(self_start_chunk, self_nb_chunks, 165 op_start_chunk, op_nb_chunks)) 166 { 167 if (self) { 168 /* 169 * If the operation is already (indirectly) waiting for us, 170 * or will wait for us as soon as it wakes up, then just go 171 * on (instead of producing a deadlock in the former case). 172 */ 173 if (op->waiting_for_op) { 174 continue; 175 } 176 177 self->waiting_for_op = op; 178 } 179 180 qemu_co_queue_wait(&op->waiting_requests, NULL); 181 182 if (self) { 183 self->waiting_for_op = NULL; 184 } 185 186 break; 187 } 188 } 189 } 190 } 191 192 static void coroutine_fn mirror_iteration_done(MirrorOp *op, int ret) 193 { 194 MirrorBlockJob *s = op->s; 195 struct iovec *iov; 196 int64_t chunk_num; 197 int i, nb_chunks; 198 199 trace_mirror_iteration_done(s, op->offset, op->bytes, ret); 200 201 s->in_flight--; 202 s->bytes_in_flight -= op->bytes; 203 iov = op->qiov.iov; 204 for (i = 0; i < op->qiov.niov; i++) { 205 MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base; 206 QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next); 207 s->buf_free_count++; 208 } 209 210 chunk_num = op->offset / s->granularity; 211 nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); 212 213 bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); 214 QTAILQ_REMOVE(&s->ops_in_flight, op, next); 215 if (ret >= 0) { 216 if (s->cow_bitmap) { 217 bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); 218 } 219 if (!s->initial_zeroing_ongoing) { 220 job_progress_update(&s->common.job, op->bytes); 221 } 222 } 223 qemu_iovec_destroy(&op->qiov); 224 225 qemu_co_queue_restart_all(&op->waiting_requests); 226 g_free(op); 227 } 228 229 static void coroutine_fn mirror_write_complete(MirrorOp *op, int ret) 230 { 231 MirrorBlockJob *s = op->s; 232 233 if (ret < 0) { 234 BlockErrorAction action; 235 236 bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); 237 action = mirror_error_action(s, false, -ret); 238 if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { 239 s->ret = ret; 240 } 241 } 242 243 mirror_iteration_done(op, ret); 244 } 245 246 static void coroutine_fn mirror_read_complete(MirrorOp *op, int ret) 247 { 248 MirrorBlockJob *s = op->s; 249 250 if (ret < 0) { 251 BlockErrorAction action; 252 253 bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); 254 action = mirror_error_action(s, true, -ret); 255 if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { 256 s->ret = ret; 257 } 258 259 mirror_iteration_done(op, ret); 260 return; 261 } 262 263 ret = blk_co_pwritev(s->target, op->offset, op->qiov.size, &op->qiov, 0); 264 mirror_write_complete(op, ret); 265 } 266 267 /* Clip bytes relative to offset to not exceed end-of-file */ 268 static inline int64_t mirror_clip_bytes(MirrorBlockJob *s, 269 int64_t offset, 270 int64_t bytes) 271 { 272 return MIN(bytes, s->bdev_length - offset); 273 } 274 275 /* Round offset and/or bytes to target cluster if COW is needed, and 276 * return the offset of the adjusted tail against original. */ 277 static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset, 278 uint64_t *bytes) 279 { 280 bool need_cow; 281 int ret = 0; 282 int64_t align_offset = *offset; 283 int64_t align_bytes = *bytes; 284 int max_bytes = s->granularity * s->max_iov; 285 286 need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap); 287 need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, 288 s->cow_bitmap); 289 if (need_cow) { 290 bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes, 291 &align_offset, &align_bytes); 292 } 293 294 if (align_bytes > max_bytes) { 295 align_bytes = max_bytes; 296 if (need_cow) { 297 align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size); 298 } 299 } 300 /* Clipping may result in align_bytes unaligned to chunk boundary, but 301 * that doesn't matter because it's already the end of source image. */ 302 align_bytes = mirror_clip_bytes(s, align_offset, align_bytes); 303 304 ret = align_offset + align_bytes - (*offset + *bytes); 305 *offset = align_offset; 306 *bytes = align_bytes; 307 assert(ret >= 0); 308 return ret; 309 } 310 311 static inline void coroutine_fn 312 mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) 313 { 314 MirrorOp *op; 315 316 QTAILQ_FOREACH(op, &s->ops_in_flight, next) { 317 /* 318 * Do not wait on pseudo ops, because it may in turn wait on 319 * some other operation to start, which may in fact be the 320 * caller of this function. Since there is only one pseudo op 321 * at any given time, we will always find some real operation 322 * to wait on. 323 * Also, do not wait on active operations, because they do not 324 * use up in-flight slots. 325 */ 326 if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) { 327 qemu_co_queue_wait(&op->waiting_requests, NULL); 328 return; 329 } 330 } 331 abort(); 332 } 333 334 /* Perform a mirror copy operation. 335 * 336 * *op->bytes_handled is set to the number of bytes copied after and 337 * including offset, excluding any bytes copied prior to offset due 338 * to alignment. This will be op->bytes if no alignment is necessary, 339 * or (new_end - op->offset) if the tail is rounded up or down due to 340 * alignment or buffer limit. 341 */ 342 static void coroutine_fn mirror_co_read(void *opaque) 343 { 344 MirrorOp *op = opaque; 345 MirrorBlockJob *s = op->s; 346 int nb_chunks; 347 uint64_t ret; 348 uint64_t max_bytes; 349 350 max_bytes = s->granularity * s->max_iov; 351 352 /* We can only handle as much as buf_size at a time. */ 353 op->bytes = MIN(s->buf_size, MIN(max_bytes, op->bytes)); 354 assert(op->bytes); 355 assert(op->bytes < BDRV_REQUEST_MAX_BYTES); 356 *op->bytes_handled = op->bytes; 357 358 if (s->cow_bitmap) { 359 *op->bytes_handled += mirror_cow_align(s, &op->offset, &op->bytes); 360 } 361 /* Cannot exceed BDRV_REQUEST_MAX_BYTES + INT_MAX */ 362 assert(*op->bytes_handled <= UINT_MAX); 363 assert(op->bytes <= s->buf_size); 364 /* The offset is granularity-aligned because: 365 * 1) Caller passes in aligned values; 366 * 2) mirror_cow_align is used only when target cluster is larger. */ 367 assert(QEMU_IS_ALIGNED(op->offset, s->granularity)); 368 /* The range is sector-aligned, since bdrv_getlength() rounds up. */ 369 assert(QEMU_IS_ALIGNED(op->bytes, BDRV_SECTOR_SIZE)); 370 nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity); 371 372 while (s->buf_free_count < nb_chunks) { 373 trace_mirror_yield_in_flight(s, op->offset, s->in_flight); 374 mirror_wait_for_free_in_flight_slot(s); 375 } 376 377 /* Now make a QEMUIOVector taking enough granularity-sized chunks 378 * from s->buf_free. 379 */ 380 qemu_iovec_init(&op->qiov, nb_chunks); 381 while (nb_chunks-- > 0) { 382 MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); 383 size_t remaining = op->bytes - op->qiov.size; 384 385 QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); 386 s->buf_free_count--; 387 qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); 388 } 389 390 /* Copy the dirty cluster. */ 391 s->in_flight++; 392 s->bytes_in_flight += op->bytes; 393 op->is_in_flight = true; 394 trace_mirror_one_iteration(s, op->offset, op->bytes); 395 396 WITH_GRAPH_RDLOCK_GUARD() { 397 ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, 398 &op->qiov, 0); 399 } 400 mirror_read_complete(op, ret); 401 } 402 403 static void coroutine_fn mirror_co_zero(void *opaque) 404 { 405 MirrorOp *op = opaque; 406 int ret; 407 408 op->s->in_flight++; 409 op->s->bytes_in_flight += op->bytes; 410 *op->bytes_handled = op->bytes; 411 op->is_in_flight = true; 412 413 ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, 414 op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); 415 mirror_write_complete(op, ret); 416 } 417 418 static void coroutine_fn mirror_co_discard(void *opaque) 419 { 420 MirrorOp *op = opaque; 421 int ret; 422 423 op->s->in_flight++; 424 op->s->bytes_in_flight += op->bytes; 425 *op->bytes_handled = op->bytes; 426 op->is_in_flight = true; 427 428 ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); 429 mirror_write_complete(op, ret); 430 } 431 432 static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, 433 unsigned bytes, MirrorMethod mirror_method) 434 { 435 MirrorOp *op; 436 Coroutine *co; 437 int64_t bytes_handled = -1; 438 439 op = g_new(MirrorOp, 1); 440 *op = (MirrorOp){ 441 .s = s, 442 .offset = offset, 443 .bytes = bytes, 444 .bytes_handled = &bytes_handled, 445 }; 446 qemu_co_queue_init(&op->waiting_requests); 447 448 switch (mirror_method) { 449 case MIRROR_METHOD_COPY: 450 co = qemu_coroutine_create(mirror_co_read, op); 451 break; 452 case MIRROR_METHOD_ZERO: 453 co = qemu_coroutine_create(mirror_co_zero, op); 454 break; 455 case MIRROR_METHOD_DISCARD: 456 co = qemu_coroutine_create(mirror_co_discard, op); 457 break; 458 default: 459 abort(); 460 } 461 op->co = co; 462 463 QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); 464 qemu_coroutine_enter(co); 465 /* At this point, ownership of op has been moved to the coroutine 466 * and the object may already be freed */ 467 468 /* Assert that this value has been set */ 469 assert(bytes_handled >= 0); 470 471 /* Same assertion as in mirror_co_read() (and for mirror_co_read() 472 * and mirror_co_discard(), bytes_handled == op->bytes, which 473 * is the @bytes parameter given to this function) */ 474 assert(bytes_handled <= UINT_MAX); 475 return bytes_handled; 476 } 477 478 static void coroutine_fn mirror_iteration(MirrorBlockJob *s) 479 { 480 BlockDriverState *source = s->mirror_top_bs->backing->bs; 481 MirrorOp *pseudo_op; 482 int64_t offset; 483 /* At least the first dirty chunk is mirrored in one iteration. */ 484 int nb_chunks = 1; 485 bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); 486 int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); 487 488 bdrv_dirty_bitmap_lock(s->dirty_bitmap); 489 offset = bdrv_dirty_iter_next(s->dbi); 490 if (offset < 0) { 491 bdrv_set_dirty_iter(s->dbi, 0); 492 offset = bdrv_dirty_iter_next(s->dbi); 493 trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); 494 assert(offset >= 0); 495 } 496 bdrv_dirty_bitmap_unlock(s->dirty_bitmap); 497 498 /* 499 * Wait for concurrent requests to @offset. The next loop will limit the 500 * copied area based on in_flight_bitmap so we only copy an area that does 501 * not overlap with concurrent in-flight requests. Still, we would like to 502 * copy something, so wait until there are at least no more requests to the 503 * very beginning of the area. 504 */ 505 mirror_wait_on_conflicts(NULL, s, offset, 1); 506 507 job_pause_point(&s->common.job); 508 509 /* Find the number of consecutive dirty chunks following the first dirty 510 * one, and wait for in flight requests in them. */ 511 bdrv_dirty_bitmap_lock(s->dirty_bitmap); 512 while (nb_chunks * s->granularity < s->buf_size) { 513 int64_t next_dirty; 514 int64_t next_offset = offset + nb_chunks * s->granularity; 515 int64_t next_chunk = next_offset / s->granularity; 516 if (next_offset >= s->bdev_length || 517 !bdrv_dirty_bitmap_get_locked(s->dirty_bitmap, next_offset)) { 518 break; 519 } 520 if (test_bit(next_chunk, s->in_flight_bitmap)) { 521 break; 522 } 523 524 next_dirty = bdrv_dirty_iter_next(s->dbi); 525 if (next_dirty > next_offset || next_dirty < 0) { 526 /* The bitmap iterator's cache is stale, refresh it */ 527 bdrv_set_dirty_iter(s->dbi, next_offset); 528 next_dirty = bdrv_dirty_iter_next(s->dbi); 529 } 530 assert(next_dirty == next_offset); 531 nb_chunks++; 532 } 533 534 /* Clear dirty bits before querying the block status, because 535 * calling bdrv_block_status_above could yield - if some blocks are 536 * marked dirty in this window, we need to know. 537 */ 538 bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, 539 nb_chunks * s->granularity); 540 bdrv_dirty_bitmap_unlock(s->dirty_bitmap); 541 542 /* Before claiming an area in the in-flight bitmap, we have to 543 * create a MirrorOp for it so that conflicting requests can wait 544 * for it. mirror_perform() will create the real MirrorOps later, 545 * for now we just create a pseudo operation that will wake up all 546 * conflicting requests once all real operations have been 547 * launched. */ 548 pseudo_op = g_new(MirrorOp, 1); 549 *pseudo_op = (MirrorOp){ 550 .offset = offset, 551 .bytes = nb_chunks * s->granularity, 552 .is_pseudo_op = true, 553 }; 554 qemu_co_queue_init(&pseudo_op->waiting_requests); 555 QTAILQ_INSERT_TAIL(&s->ops_in_flight, pseudo_op, next); 556 557 bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); 558 while (nb_chunks > 0 && offset < s->bdev_length) { 559 int ret; 560 int64_t io_bytes; 561 int64_t io_bytes_acct; 562 MirrorMethod mirror_method = MIRROR_METHOD_COPY; 563 564 assert(!(offset % s->granularity)); 565 WITH_GRAPH_RDLOCK_GUARD() { 566 ret = bdrv_co_block_status_above(source, NULL, offset, 567 nb_chunks * s->granularity, 568 &io_bytes, NULL, NULL); 569 } 570 if (ret < 0) { 571 io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); 572 } else if (ret & BDRV_BLOCK_DATA) { 573 io_bytes = MIN(io_bytes, max_io_bytes); 574 } 575 576 io_bytes -= io_bytes % s->granularity; 577 if (io_bytes < s->granularity) { 578 io_bytes = s->granularity; 579 } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { 580 int64_t target_offset; 581 int64_t target_bytes; 582 WITH_GRAPH_RDLOCK_GUARD() { 583 bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes, 584 &target_offset, &target_bytes); 585 } 586 if (target_offset == offset && 587 target_bytes == io_bytes) { 588 mirror_method = ret & BDRV_BLOCK_ZERO ? 589 MIRROR_METHOD_ZERO : 590 MIRROR_METHOD_DISCARD; 591 } 592 } 593 594 while (s->in_flight >= MAX_IN_FLIGHT) { 595 trace_mirror_yield_in_flight(s, offset, s->in_flight); 596 mirror_wait_for_free_in_flight_slot(s); 597 } 598 599 if (s->ret < 0) { 600 ret = 0; 601 goto fail; 602 } 603 604 io_bytes = mirror_clip_bytes(s, offset, io_bytes); 605 io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); 606 if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { 607 io_bytes_acct = 0; 608 } else { 609 io_bytes_acct = io_bytes; 610 } 611 assert(io_bytes); 612 offset += io_bytes; 613 nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); 614 block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct); 615 } 616 617 fail: 618 QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); 619 qemu_co_queue_restart_all(&pseudo_op->waiting_requests); 620 g_free(pseudo_op); 621 } 622 623 static void mirror_free_init(MirrorBlockJob *s) 624 { 625 int granularity = s->granularity; 626 size_t buf_size = s->buf_size; 627 uint8_t *buf = s->buf; 628 629 assert(s->buf_free_count == 0); 630 QSIMPLEQ_INIT(&s->buf_free); 631 while (buf_size != 0) { 632 MirrorBuffer *cur = (MirrorBuffer *)buf; 633 QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next); 634 s->buf_free_count++; 635 buf_size -= granularity; 636 buf += granularity; 637 } 638 } 639 640 /* This is also used for the .pause callback. There is no matching 641 * mirror_resume() because mirror_run() will begin iterating again 642 * when the job is resumed. 643 */ 644 static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) 645 { 646 while (s->in_flight > 0) { 647 mirror_wait_for_free_in_flight_slot(s); 648 } 649 } 650 651 /** 652 * mirror_exit_common: handle both abort() and prepare() cases. 653 * for .prepare, returns 0 on success and -errno on failure. 654 * for .abort cases, denoted by abort = true, MUST return 0. 655 */ 656 static int mirror_exit_common(Job *job) 657 { 658 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); 659 BlockJob *bjob = &s->common; 660 MirrorBDSOpaque *bs_opaque; 661 AioContext *replace_aio_context = NULL; 662 BlockDriverState *src; 663 BlockDriverState *target_bs; 664 BlockDriverState *mirror_top_bs; 665 Error *local_err = NULL; 666 bool abort = job->ret < 0; 667 int ret = 0; 668 669 GLOBAL_STATE_CODE(); 670 671 if (s->prepared) { 672 return 0; 673 } 674 s->prepared = true; 675 676 aio_context_acquire(qemu_get_aio_context()); 677 678 mirror_top_bs = s->mirror_top_bs; 679 bs_opaque = mirror_top_bs->opaque; 680 src = mirror_top_bs->backing->bs; 681 target_bs = blk_bs(s->target); 682 683 if (bdrv_chain_contains(src, target_bs)) { 684 bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs); 685 } 686 687 bdrv_release_dirty_bitmap(s->dirty_bitmap); 688 689 /* Make sure that the source BDS doesn't go away during bdrv_replace_node, 690 * before we can call bdrv_drained_end */ 691 bdrv_ref(src); 692 bdrv_ref(mirror_top_bs); 693 bdrv_ref(target_bs); 694 695 /* 696 * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before 697 * inserting target_bs at s->to_replace, where we might not be able to get 698 * these permissions. 699 */ 700 blk_unref(s->target); 701 s->target = NULL; 702 703 /* We don't access the source any more. Dropping any WRITE/RESIZE is 704 * required before it could become a backing file of target_bs. Not having 705 * these permissions any more means that we can't allow any new requests on 706 * mirror_top_bs from now on, so keep it drained. */ 707 bdrv_drained_begin(mirror_top_bs); 708 bs_opaque->stop = true; 709 710 bdrv_graph_rdlock_main_loop(); 711 bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, 712 &error_abort); 713 bdrv_graph_rdunlock_main_loop(); 714 715 if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { 716 BlockDriverState *backing = s->is_none_mode ? src : s->base; 717 BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); 718 719 if (bdrv_cow_bs(unfiltered_target) != backing) { 720 bdrv_set_backing_hd(unfiltered_target, backing, &local_err); 721 if (local_err) { 722 error_report_err(local_err); 723 local_err = NULL; 724 ret = -EPERM; 725 } 726 } 727 } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { 728 assert(!bdrv_backing_chain_next(target_bs)); 729 ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL, 730 "backing", &local_err); 731 if (ret < 0) { 732 error_report_err(local_err); 733 local_err = NULL; 734 } 735 } 736 737 if (s->to_replace) { 738 replace_aio_context = bdrv_get_aio_context(s->to_replace); 739 aio_context_acquire(replace_aio_context); 740 } 741 742 if (s->should_complete && !abort) { 743 BlockDriverState *to_replace = s->to_replace ?: src; 744 bool ro = bdrv_is_read_only(to_replace); 745 746 if (ro != bdrv_is_read_only(target_bs)) { 747 bdrv_reopen_set_read_only(target_bs, ro, NULL); 748 } 749 750 /* The mirror job has no requests in flight any more, but we need to 751 * drain potential other users of the BDS before changing the graph. */ 752 assert(s->in_drain); 753 bdrv_drained_begin(target_bs); 754 /* 755 * Cannot use check_to_replace_node() here, because that would 756 * check for an op blocker on @to_replace, and we have our own 757 * there. 758 * 759 * TODO Pull out the writer lock from bdrv_replace_node() to here 760 */ 761 bdrv_graph_rdlock_main_loop(); 762 if (bdrv_recurse_can_replace(src, to_replace)) { 763 bdrv_replace_node(to_replace, target_bs, &local_err); 764 } else { 765 error_setg(&local_err, "Can no longer replace '%s' by '%s', " 766 "because it can no longer be guaranteed that doing so " 767 "would not lead to an abrupt change of visible data", 768 to_replace->node_name, target_bs->node_name); 769 } 770 bdrv_graph_rdunlock_main_loop(); 771 bdrv_drained_end(target_bs); 772 if (local_err) { 773 error_report_err(local_err); 774 ret = -EPERM; 775 } 776 } 777 if (s->to_replace) { 778 bdrv_op_unblock_all(s->to_replace, s->replace_blocker); 779 error_free(s->replace_blocker); 780 bdrv_unref(s->to_replace); 781 } 782 if (replace_aio_context) { 783 aio_context_release(replace_aio_context); 784 } 785 g_free(s->replaces); 786 bdrv_unref(target_bs); 787 788 /* 789 * Remove the mirror filter driver from the graph. Before this, get rid of 790 * the blockers on the intermediate nodes so that the resulting state is 791 * valid. 792 */ 793 block_job_remove_all_bdrv(bjob); 794 bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); 795 796 bs_opaque->job = NULL; 797 798 bdrv_drained_end(src); 799 bdrv_drained_end(mirror_top_bs); 800 s->in_drain = false; 801 bdrv_unref(mirror_top_bs); 802 bdrv_unref(src); 803 804 aio_context_release(qemu_get_aio_context()); 805 806 return ret; 807 } 808 809 static int mirror_prepare(Job *job) 810 { 811 return mirror_exit_common(job); 812 } 813 814 static void mirror_abort(Job *job) 815 { 816 int ret = mirror_exit_common(job); 817 assert(ret == 0); 818 } 819 820 static void coroutine_fn mirror_throttle(MirrorBlockJob *s) 821 { 822 int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 823 824 if (now - s->last_pause_ns > BLOCK_JOB_SLICE_TIME) { 825 s->last_pause_ns = now; 826 job_sleep_ns(&s->common.job, 0); 827 } else { 828 job_pause_point(&s->common.job); 829 } 830 } 831 832 static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) 833 { 834 int64_t offset; 835 BlockDriverState *bs = s->mirror_top_bs->backing->bs; 836 BlockDriverState *target_bs = blk_bs(s->target); 837 int ret; 838 int64_t count; 839 840 if (s->zero_target) { 841 if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { 842 bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); 843 return 0; 844 } 845 846 s->initial_zeroing_ongoing = true; 847 for (offset = 0; offset < s->bdev_length; ) { 848 int bytes = MIN(s->bdev_length - offset, 849 QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); 850 851 mirror_throttle(s); 852 853 if (job_is_cancelled(&s->common.job)) { 854 s->initial_zeroing_ongoing = false; 855 return 0; 856 } 857 858 if (s->in_flight >= MAX_IN_FLIGHT) { 859 trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, 860 s->in_flight); 861 mirror_wait_for_free_in_flight_slot(s); 862 continue; 863 } 864 865 mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); 866 offset += bytes; 867 } 868 869 mirror_wait_for_all_io(s); 870 s->initial_zeroing_ongoing = false; 871 } 872 873 /* First part, loop on the sectors and initialize the dirty bitmap. */ 874 for (offset = 0; offset < s->bdev_length; ) { 875 /* Just to make sure we are not exceeding int limit. */ 876 int bytes = MIN(s->bdev_length - offset, 877 QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); 878 879 mirror_throttle(s); 880 881 if (job_is_cancelled(&s->common.job)) { 882 return 0; 883 } 884 885 WITH_GRAPH_RDLOCK_GUARD() { 886 ret = bdrv_co_is_allocated_above(bs, s->base_overlay, true, offset, 887 bytes, &count); 888 } 889 if (ret < 0) { 890 return ret; 891 } 892 893 assert(count); 894 if (ret > 0) { 895 bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); 896 } 897 offset += count; 898 } 899 return 0; 900 } 901 902 /* Called when going out of the streaming phase to flush the bulk of the 903 * data to the medium, or just before completing. 904 */ 905 static int coroutine_fn mirror_flush(MirrorBlockJob *s) 906 { 907 int ret = blk_co_flush(s->target); 908 if (ret < 0) { 909 if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { 910 s->ret = ret; 911 } 912 } 913 return ret; 914 } 915 916 static int coroutine_fn mirror_run(Job *job, Error **errp) 917 { 918 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); 919 BlockDriverState *bs = s->mirror_top_bs->backing->bs; 920 MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque; 921 BlockDriverState *target_bs = blk_bs(s->target); 922 bool need_drain = true; 923 BlockDeviceIoStatus iostatus; 924 int64_t length; 925 int64_t target_length; 926 BlockDriverInfo bdi; 927 char backing_filename[2]; /* we only need 2 characters because we are only 928 checking for a NULL string */ 929 int ret = 0; 930 931 if (job_is_cancelled(&s->common.job)) { 932 goto immediate_exit; 933 } 934 935 bdrv_graph_co_rdlock(); 936 s->bdev_length = bdrv_co_getlength(bs); 937 bdrv_graph_co_rdunlock(); 938 939 if (s->bdev_length < 0) { 940 ret = s->bdev_length; 941 goto immediate_exit; 942 } 943 944 target_length = blk_co_getlength(s->target); 945 if (target_length < 0) { 946 ret = target_length; 947 goto immediate_exit; 948 } 949 950 /* Active commit must resize the base image if its size differs from the 951 * active layer. */ 952 if (s->base == blk_bs(s->target)) { 953 if (s->bdev_length > target_length) { 954 ret = blk_co_truncate(s->target, s->bdev_length, false, 955 PREALLOC_MODE_OFF, 0, NULL); 956 if (ret < 0) { 957 goto immediate_exit; 958 } 959 } 960 } else if (s->bdev_length != target_length) { 961 error_setg(errp, "Source and target image have different sizes"); 962 ret = -EINVAL; 963 goto immediate_exit; 964 } 965 966 if (s->bdev_length == 0) { 967 /* Transition to the READY state and wait for complete. */ 968 job_transition_to_ready(&s->common.job); 969 s->actively_synced = true; 970 while (!job_cancel_requested(&s->common.job) && !s->should_complete) { 971 job_yield(&s->common.job); 972 } 973 goto immediate_exit; 974 } 975 976 length = DIV_ROUND_UP(s->bdev_length, s->granularity); 977 s->in_flight_bitmap = bitmap_new(length); 978 979 /* If we have no backing file yet in the destination, we cannot let 980 * the destination do COW. Instead, we copy sectors around the 981 * dirty data if needed. We need a bitmap to do that. 982 */ 983 bdrv_get_backing_filename(target_bs, backing_filename, 984 sizeof(backing_filename)); 985 bdrv_graph_co_rdlock(); 986 if (!bdrv_co_get_info(target_bs, &bdi) && bdi.cluster_size) { 987 s->target_cluster_size = bdi.cluster_size; 988 } else { 989 s->target_cluster_size = BDRV_SECTOR_SIZE; 990 } 991 bdrv_graph_co_rdunlock(); 992 if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) && 993 s->granularity < s->target_cluster_size) { 994 s->buf_size = MAX(s->buf_size, s->target_cluster_size); 995 s->cow_bitmap = bitmap_new(length); 996 } 997 s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); 998 999 s->buf = qemu_try_blockalign(bs, s->buf_size); 1000 if (s->buf == NULL) { 1001 ret = -ENOMEM; 1002 goto immediate_exit; 1003 } 1004 1005 mirror_free_init(s); 1006 1007 s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1008 if (!s->is_none_mode) { 1009 ret = mirror_dirty_init(s); 1010 if (ret < 0 || job_is_cancelled(&s->common.job)) { 1011 goto immediate_exit; 1012 } 1013 } 1014 1015 /* 1016 * Only now the job is fully initialised and mirror_top_bs should start 1017 * accessing it. 1018 */ 1019 mirror_top_opaque->job = s; 1020 1021 assert(!s->dbi); 1022 s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); 1023 for (;;) { 1024 int64_t cnt, delta; 1025 bool should_complete; 1026 1027 if (s->ret < 0) { 1028 ret = s->ret; 1029 goto immediate_exit; 1030 } 1031 1032 job_pause_point(&s->common.job); 1033 1034 if (job_is_cancelled(&s->common.job)) { 1035 ret = 0; 1036 goto immediate_exit; 1037 } 1038 1039 cnt = bdrv_get_dirty_count(s->dirty_bitmap); 1040 /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is 1041 * the number of bytes currently being processed; together those are 1042 * the current remaining operation length */ 1043 job_progress_set_remaining(&s->common.job, 1044 s->bytes_in_flight + cnt + 1045 s->active_write_bytes_in_flight); 1046 1047 /* Note that even when no rate limit is applied we need to yield 1048 * periodically with no pending I/O so that bdrv_drain_all() returns. 1049 * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is 1050 * an error, or when the source is clean, whichever comes first. */ 1051 delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; 1052 WITH_JOB_LOCK_GUARD() { 1053 iostatus = s->common.iostatus; 1054 } 1055 if (delta < BLOCK_JOB_SLICE_TIME && 1056 iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 1057 if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || 1058 (cnt == 0 && s->in_flight > 0)) { 1059 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); 1060 mirror_wait_for_free_in_flight_slot(s); 1061 continue; 1062 } else if (cnt != 0) { 1063 mirror_iteration(s); 1064 } 1065 } 1066 1067 should_complete = false; 1068 if (s->in_flight == 0 && cnt == 0) { 1069 trace_mirror_before_flush(s); 1070 if (!job_is_ready(&s->common.job)) { 1071 if (mirror_flush(s) < 0) { 1072 /* Go check s->ret. */ 1073 continue; 1074 } 1075 /* We're out of the streaming phase. From now on, if the job 1076 * is cancelled we will actually complete all pending I/O and 1077 * report completion. This way, block-job-cancel will leave 1078 * the target in a consistent state. 1079 */ 1080 job_transition_to_ready(&s->common.job); 1081 } 1082 if (qatomic_read(&s->copy_mode) != MIRROR_COPY_MODE_BACKGROUND) { 1083 s->actively_synced = true; 1084 } 1085 1086 should_complete = s->should_complete || 1087 job_cancel_requested(&s->common.job); 1088 cnt = bdrv_get_dirty_count(s->dirty_bitmap); 1089 } 1090 1091 if (cnt == 0 && should_complete) { 1092 /* The dirty bitmap is not updated while operations are pending. 1093 * If we're about to exit, wait for pending operations before 1094 * calling bdrv_get_dirty_count(bs), or we may exit while the 1095 * source has dirty data to copy! 1096 * 1097 * Note that I/O can be submitted by the guest while 1098 * mirror_populate runs, so pause it now. Before deciding 1099 * whether to switch to target check one last time if I/O has 1100 * come in the meanwhile, and if not flush the data to disk. 1101 */ 1102 trace_mirror_before_drain(s, cnt); 1103 1104 s->in_drain = true; 1105 bdrv_drained_begin(bs); 1106 1107 /* Must be zero because we are drained */ 1108 assert(s->in_active_write_counter == 0); 1109 1110 cnt = bdrv_get_dirty_count(s->dirty_bitmap); 1111 if (cnt > 0 || mirror_flush(s) < 0) { 1112 bdrv_drained_end(bs); 1113 s->in_drain = false; 1114 continue; 1115 } 1116 1117 /* The two disks are in sync. Exit and report successful 1118 * completion. 1119 */ 1120 assert(QLIST_EMPTY(&bs->tracked_requests)); 1121 need_drain = false; 1122 break; 1123 } 1124 1125 if (job_is_ready(&s->common.job) && !should_complete) { 1126 if (s->in_flight == 0 && cnt == 0) { 1127 trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), 1128 BLOCK_JOB_SLICE_TIME); 1129 job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME); 1130 } 1131 } else { 1132 block_job_ratelimit_sleep(&s->common); 1133 } 1134 s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1135 } 1136 1137 immediate_exit: 1138 if (s->in_flight > 0) { 1139 /* We get here only if something went wrong. Either the job failed, 1140 * or it was cancelled prematurely so that we do not guarantee that 1141 * the target is a copy of the source. 1142 */ 1143 assert(ret < 0 || job_is_cancelled(&s->common.job)); 1144 assert(need_drain); 1145 mirror_wait_for_all_io(s); 1146 } 1147 1148 assert(s->in_flight == 0); 1149 qemu_vfree(s->buf); 1150 g_free(s->cow_bitmap); 1151 g_free(s->in_flight_bitmap); 1152 bdrv_dirty_iter_free(s->dbi); 1153 1154 if (need_drain) { 1155 s->in_drain = true; 1156 bdrv_drained_begin(bs); 1157 } 1158 1159 return ret; 1160 } 1161 1162 static void mirror_complete(Job *job, Error **errp) 1163 { 1164 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); 1165 1166 if (!job_is_ready(job)) { 1167 error_setg(errp, "The active block job '%s' cannot be completed", 1168 job->id); 1169 return; 1170 } 1171 1172 /* block all operations on to_replace bs */ 1173 if (s->replaces) { 1174 AioContext *replace_aio_context; 1175 1176 s->to_replace = bdrv_find_node(s->replaces); 1177 if (!s->to_replace) { 1178 error_setg(errp, "Node name '%s' not found", s->replaces); 1179 return; 1180 } 1181 1182 replace_aio_context = bdrv_get_aio_context(s->to_replace); 1183 aio_context_acquire(replace_aio_context); 1184 1185 /* TODO Translate this into child freeze system. */ 1186 error_setg(&s->replace_blocker, 1187 "block device is in use by block-job-complete"); 1188 bdrv_op_block_all(s->to_replace, s->replace_blocker); 1189 bdrv_ref(s->to_replace); 1190 1191 aio_context_release(replace_aio_context); 1192 } 1193 1194 s->should_complete = true; 1195 1196 /* If the job is paused, it will be re-entered when it is resumed */ 1197 WITH_JOB_LOCK_GUARD() { 1198 if (!job->paused) { 1199 job_enter_cond_locked(job, NULL); 1200 } 1201 } 1202 } 1203 1204 static void coroutine_fn mirror_pause(Job *job) 1205 { 1206 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); 1207 1208 mirror_wait_for_all_io(s); 1209 } 1210 1211 static bool mirror_drained_poll(BlockJob *job) 1212 { 1213 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 1214 1215 /* If the job isn't paused nor cancelled, we can't be sure that it won't 1216 * issue more requests. We make an exception if we've reached this point 1217 * from one of our own drain sections, to avoid a deadlock waiting for 1218 * ourselves. 1219 */ 1220 WITH_JOB_LOCK_GUARD() { 1221 if (!s->common.job.paused && !job_is_cancelled_locked(&job->job) 1222 && !s->in_drain) { 1223 return true; 1224 } 1225 } 1226 1227 return !!s->in_flight; 1228 } 1229 1230 static bool mirror_cancel(Job *job, bool force) 1231 { 1232 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); 1233 BlockDriverState *target = blk_bs(s->target); 1234 1235 /* 1236 * Before the job is READY, we treat any cancellation like a 1237 * force-cancellation. 1238 */ 1239 force = force || !job_is_ready(job); 1240 1241 if (force) { 1242 bdrv_cancel_in_flight(target); 1243 } 1244 return force; 1245 } 1246 1247 static bool commit_active_cancel(Job *job, bool force) 1248 { 1249 /* Same as above in mirror_cancel() */ 1250 return force || !job_is_ready(job); 1251 } 1252 1253 static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts, 1254 Error **errp) 1255 { 1256 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 1257 BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror; 1258 MirrorCopyMode current; 1259 1260 /* 1261 * The implementation relies on the fact that copy_mode is only written 1262 * under the BQL. Otherwise, further synchronization would be required. 1263 */ 1264 1265 GLOBAL_STATE_CODE(); 1266 1267 if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) { 1268 return; 1269 } 1270 1271 if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) { 1272 error_setg(errp, "Change to copy mode '%s' is not implemented", 1273 MirrorCopyMode_str(change_opts->copy_mode)); 1274 return; 1275 } 1276 1277 current = qatomic_cmpxchg(&s->copy_mode, MIRROR_COPY_MODE_BACKGROUND, 1278 change_opts->copy_mode); 1279 if (current != MIRROR_COPY_MODE_BACKGROUND) { 1280 error_setg(errp, "Expected current copy mode '%s', got '%s'", 1281 MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND), 1282 MirrorCopyMode_str(current)); 1283 } 1284 } 1285 1286 static const BlockJobDriver mirror_job_driver = { 1287 .job_driver = { 1288 .instance_size = sizeof(MirrorBlockJob), 1289 .job_type = JOB_TYPE_MIRROR, 1290 .free = block_job_free, 1291 .user_resume = block_job_user_resume, 1292 .run = mirror_run, 1293 .prepare = mirror_prepare, 1294 .abort = mirror_abort, 1295 .pause = mirror_pause, 1296 .complete = mirror_complete, 1297 .cancel = mirror_cancel, 1298 }, 1299 .drained_poll = mirror_drained_poll, 1300 .change = mirror_change, 1301 }; 1302 1303 static const BlockJobDriver commit_active_job_driver = { 1304 .job_driver = { 1305 .instance_size = sizeof(MirrorBlockJob), 1306 .job_type = JOB_TYPE_COMMIT, 1307 .free = block_job_free, 1308 .user_resume = block_job_user_resume, 1309 .run = mirror_run, 1310 .prepare = mirror_prepare, 1311 .abort = mirror_abort, 1312 .pause = mirror_pause, 1313 .complete = mirror_complete, 1314 .cancel = commit_active_cancel, 1315 }, 1316 .drained_poll = mirror_drained_poll, 1317 }; 1318 1319 static void coroutine_fn 1320 do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, 1321 uint64_t offset, uint64_t bytes, 1322 QEMUIOVector *qiov, int flags) 1323 { 1324 int ret; 1325 size_t qiov_offset = 0; 1326 int64_t bitmap_offset, bitmap_end; 1327 1328 if (!QEMU_IS_ALIGNED(offset, job->granularity) && 1329 bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) 1330 { 1331 /* 1332 * Dirty unaligned padding: ignore it. 1333 * 1334 * Reasoning: 1335 * 1. If we copy it, we can't reset corresponding bit in 1336 * dirty_bitmap as there may be some "dirty" bytes still not 1337 * copied. 1338 * 2. It's already dirty, so skipping it we don't diverge mirror 1339 * progress. 1340 * 1341 * Note, that because of this, guest write may have no contribution 1342 * into mirror converge, but that's not bad, as we have background 1343 * process of mirroring. If under some bad circumstances (high guest 1344 * IO load) background process starve, we will not converge anyway, 1345 * even if each write will contribute, as guest is not guaranteed to 1346 * rewrite the whole disk. 1347 */ 1348 qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset; 1349 if (bytes <= qiov_offset) { 1350 /* nothing to do after shrink */ 1351 return; 1352 } 1353 offset += qiov_offset; 1354 bytes -= qiov_offset; 1355 } 1356 1357 if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) && 1358 bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1)) 1359 { 1360 uint64_t tail = (offset + bytes) % job->granularity; 1361 1362 if (bytes <= tail) { 1363 /* nothing to do after shrink */ 1364 return; 1365 } 1366 bytes -= tail; 1367 } 1368 1369 /* 1370 * Tails are either clean or shrunk, so for bitmap resetting 1371 * we safely align the range down. 1372 */ 1373 bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); 1374 bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); 1375 if (bitmap_offset < bitmap_end) { 1376 bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, 1377 bitmap_end - bitmap_offset); 1378 } 1379 1380 job_progress_increase_remaining(&job->common.job, bytes); 1381 job->active_write_bytes_in_flight += bytes; 1382 1383 switch (method) { 1384 case MIRROR_METHOD_COPY: 1385 ret = blk_co_pwritev_part(job->target, offset, bytes, 1386 qiov, qiov_offset, flags); 1387 break; 1388 1389 case MIRROR_METHOD_ZERO: 1390 assert(!qiov); 1391 ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); 1392 break; 1393 1394 case MIRROR_METHOD_DISCARD: 1395 assert(!qiov); 1396 ret = blk_co_pdiscard(job->target, offset, bytes); 1397 break; 1398 1399 default: 1400 abort(); 1401 } 1402 1403 job->active_write_bytes_in_flight -= bytes; 1404 if (ret >= 0) { 1405 job_progress_update(&job->common.job, bytes); 1406 } else { 1407 BlockErrorAction action; 1408 1409 /* 1410 * We failed, so we should mark dirty the whole area, aligned up. 1411 * Note that we don't care about shrunk tails if any: they were dirty 1412 * at function start, and they must be still dirty, as we've locked 1413 * the region for in-flight op. 1414 */ 1415 bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); 1416 bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); 1417 bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, 1418 bitmap_end - bitmap_offset); 1419 job->actively_synced = false; 1420 1421 action = mirror_error_action(job, false, -ret); 1422 if (action == BLOCK_ERROR_ACTION_REPORT) { 1423 if (!job->ret) { 1424 job->ret = ret; 1425 } 1426 } 1427 } 1428 } 1429 1430 static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, 1431 uint64_t offset, 1432 uint64_t bytes) 1433 { 1434 MirrorOp *op; 1435 uint64_t start_chunk = offset / s->granularity; 1436 uint64_t end_chunk = DIV_ROUND_UP(offset + bytes, s->granularity); 1437 1438 op = g_new(MirrorOp, 1); 1439 *op = (MirrorOp){ 1440 .s = s, 1441 .offset = offset, 1442 .bytes = bytes, 1443 .is_active_write = true, 1444 .is_in_flight = true, 1445 .co = qemu_coroutine_self(), 1446 }; 1447 qemu_co_queue_init(&op->waiting_requests); 1448 QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); 1449 1450 s->in_active_write_counter++; 1451 1452 /* 1453 * Wait for concurrent requests affecting the area. If there are already 1454 * running requests that are copying off now-to-be stale data in the area, 1455 * we must wait for them to finish before we begin writing fresh data to the 1456 * target so that the write operations appear in the correct order. 1457 * Note that background requests (see mirror_iteration()) in contrast only 1458 * wait for conflicting requests at the start of the dirty area, and then 1459 * (based on the in_flight_bitmap) truncate the area to copy so it will not 1460 * conflict with any requests beyond that. For active writes, however, we 1461 * cannot truncate that area. The request from our parent must be blocked 1462 * until the area is copied in full. Therefore, we must wait for the whole 1463 * area to become free of concurrent requests. 1464 */ 1465 mirror_wait_on_conflicts(op, s, offset, bytes); 1466 1467 bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); 1468 1469 return op; 1470 } 1471 1472 static void coroutine_fn GRAPH_RDLOCK active_write_settle(MirrorOp *op) 1473 { 1474 uint64_t start_chunk = op->offset / op->s->granularity; 1475 uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes, 1476 op->s->granularity); 1477 1478 if (!--op->s->in_active_write_counter && op->s->actively_synced) { 1479 BdrvChild *source = op->s->mirror_top_bs->backing; 1480 1481 if (QLIST_FIRST(&source->bs->parents) == source && 1482 QLIST_NEXT(source, next_parent) == NULL) 1483 { 1484 /* Assert that we are back in sync once all active write 1485 * operations are settled. 1486 * Note that we can only assert this if the mirror node 1487 * is the source node's only parent. */ 1488 assert(!bdrv_get_dirty_count(op->s->dirty_bitmap)); 1489 } 1490 } 1491 bitmap_clear(op->s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); 1492 QTAILQ_REMOVE(&op->s->ops_in_flight, op, next); 1493 qemu_co_queue_restart_all(&op->waiting_requests); 1494 g_free(op); 1495 } 1496 1497 static int coroutine_fn GRAPH_RDLOCK 1498 bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, 1499 QEMUIOVector *qiov, BdrvRequestFlags flags) 1500 { 1501 return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); 1502 } 1503 1504 static bool should_copy_to_target(MirrorBDSOpaque *s) 1505 { 1506 return s->job && s->job->ret >= 0 && 1507 !job_is_cancelled(&s->job->common.job) && 1508 qatomic_read(&s->job->copy_mode) == MIRROR_COPY_MODE_WRITE_BLOCKING; 1509 } 1510 1511 static int coroutine_fn GRAPH_RDLOCK 1512 bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method, 1513 bool copy_to_target, uint64_t offset, uint64_t bytes, 1514 QEMUIOVector *qiov, int flags) 1515 { 1516 MirrorOp *op = NULL; 1517 MirrorBDSOpaque *s = bs->opaque; 1518 int ret = 0; 1519 1520 if (copy_to_target) { 1521 op = active_write_prepare(s->job, offset, bytes); 1522 } 1523 1524 switch (method) { 1525 case MIRROR_METHOD_COPY: 1526 ret = bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); 1527 break; 1528 1529 case MIRROR_METHOD_ZERO: 1530 ret = bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); 1531 break; 1532 1533 case MIRROR_METHOD_DISCARD: 1534 ret = bdrv_co_pdiscard(bs->backing, offset, bytes); 1535 break; 1536 1537 default: 1538 abort(); 1539 } 1540 1541 if (!copy_to_target && s->job && s->job->dirty_bitmap) { 1542 s->job->actively_synced = false; 1543 bdrv_set_dirty_bitmap(s->job->dirty_bitmap, offset, bytes); 1544 } 1545 1546 if (ret < 0) { 1547 goto out; 1548 } 1549 1550 if (copy_to_target) { 1551 do_sync_target_write(s->job, method, offset, bytes, qiov, flags); 1552 } 1553 1554 out: 1555 if (copy_to_target) { 1556 active_write_settle(op); 1557 } 1558 return ret; 1559 } 1560 1561 static int coroutine_fn GRAPH_RDLOCK 1562 bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, 1563 QEMUIOVector *qiov, BdrvRequestFlags flags) 1564 { 1565 QEMUIOVector bounce_qiov; 1566 void *bounce_buf; 1567 int ret = 0; 1568 bool copy_to_target = should_copy_to_target(bs->opaque); 1569 1570 if (copy_to_target) { 1571 /* The guest might concurrently modify the data to write; but 1572 * the data on source and destination must match, so we have 1573 * to use a bounce buffer if we are going to write to the 1574 * target now. */ 1575 bounce_buf = qemu_blockalign(bs, bytes); 1576 iov_to_buf_full(qiov->iov, qiov->niov, 0, bounce_buf, bytes); 1577 1578 qemu_iovec_init(&bounce_qiov, 1); 1579 qemu_iovec_add(&bounce_qiov, bounce_buf, bytes); 1580 qiov = &bounce_qiov; 1581 1582 flags &= ~BDRV_REQ_REGISTERED_BUF; 1583 } 1584 1585 ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, copy_to_target, 1586 offset, bytes, qiov, flags); 1587 1588 if (copy_to_target) { 1589 qemu_iovec_destroy(&bounce_qiov); 1590 qemu_vfree(bounce_buf); 1591 } 1592 1593 return ret; 1594 } 1595 1596 static int coroutine_fn GRAPH_RDLOCK bdrv_mirror_top_flush(BlockDriverState *bs) 1597 { 1598 if (bs->backing == NULL) { 1599 /* we can be here after failed bdrv_append in mirror_start_job */ 1600 return 0; 1601 } 1602 return bdrv_co_flush(bs->backing->bs); 1603 } 1604 1605 static int coroutine_fn GRAPH_RDLOCK 1606 bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset, 1607 int64_t bytes, BdrvRequestFlags flags) 1608 { 1609 bool copy_to_target = should_copy_to_target(bs->opaque); 1610 return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, copy_to_target, 1611 offset, bytes, NULL, flags); 1612 } 1613 1614 static int coroutine_fn GRAPH_RDLOCK 1615 bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) 1616 { 1617 bool copy_to_target = should_copy_to_target(bs->opaque); 1618 return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, copy_to_target, 1619 offset, bytes, NULL, 0); 1620 } 1621 1622 static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs) 1623 { 1624 if (bs->backing == NULL) { 1625 /* we can be here after failed bdrv_attach_child in 1626 * bdrv_set_backing_hd */ 1627 return; 1628 } 1629 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 1630 bs->backing->bs->filename); 1631 } 1632 1633 static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, 1634 BdrvChildRole role, 1635 BlockReopenQueue *reopen_queue, 1636 uint64_t perm, uint64_t shared, 1637 uint64_t *nperm, uint64_t *nshared) 1638 { 1639 MirrorBDSOpaque *s = bs->opaque; 1640 1641 if (s->stop) { 1642 /* 1643 * If the job is to be stopped, we do not need to forward 1644 * anything to the real image. 1645 */ 1646 *nperm = 0; 1647 *nshared = BLK_PERM_ALL; 1648 return; 1649 } 1650 1651 bdrv_default_perms(bs, c, role, reopen_queue, 1652 perm, shared, nperm, nshared); 1653 1654 if (s->is_commit) { 1655 /* 1656 * For commit jobs, we cannot take CONSISTENT_READ, because 1657 * that permission is unshared for everything above the base 1658 * node (except for filters on the base node). 1659 * We also have to force-share the WRITE permission, or 1660 * otherwise we would block ourselves at the base node (if 1661 * writes are blocked for a node, they are also blocked for 1662 * its backing file). 1663 * (We could also share RESIZE, because it may be needed for 1664 * the target if its size is less than the top node's; but 1665 * bdrv_default_perms_for_cow() automatically shares RESIZE 1666 * for backing nodes if WRITE is shared, so there is no need 1667 * to do it here.) 1668 */ 1669 *nperm &= ~BLK_PERM_CONSISTENT_READ; 1670 *nshared |= BLK_PERM_WRITE; 1671 } 1672 } 1673 1674 /* Dummy node that provides consistent read to its users without requiring it 1675 * from its backing file and that allows writes on the backing file chain. */ 1676 static BlockDriver bdrv_mirror_top = { 1677 .format_name = "mirror_top", 1678 .bdrv_co_preadv = bdrv_mirror_top_preadv, 1679 .bdrv_co_pwritev = bdrv_mirror_top_pwritev, 1680 .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, 1681 .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, 1682 .bdrv_co_flush = bdrv_mirror_top_flush, 1683 .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, 1684 .bdrv_child_perm = bdrv_mirror_top_child_perm, 1685 1686 .is_filter = true, 1687 .filtered_child_is_backing = true, 1688 }; 1689 1690 static BlockJob *mirror_start_job( 1691 const char *job_id, BlockDriverState *bs, 1692 int creation_flags, BlockDriverState *target, 1693 const char *replaces, int64_t speed, 1694 uint32_t granularity, int64_t buf_size, 1695 BlockMirrorBackingMode backing_mode, 1696 bool zero_target, 1697 BlockdevOnError on_source_error, 1698 BlockdevOnError on_target_error, 1699 bool unmap, 1700 BlockCompletionFunc *cb, 1701 void *opaque, 1702 const BlockJobDriver *driver, 1703 bool is_none_mode, BlockDriverState *base, 1704 bool auto_complete, const char *filter_node_name, 1705 bool is_mirror, MirrorCopyMode copy_mode, 1706 Error **errp) 1707 { 1708 MirrorBlockJob *s; 1709 MirrorBDSOpaque *bs_opaque; 1710 BlockDriverState *mirror_top_bs; 1711 bool target_is_backing; 1712 uint64_t target_perms, target_shared_perms; 1713 int ret; 1714 1715 GLOBAL_STATE_CODE(); 1716 1717 if (granularity == 0) { 1718 granularity = bdrv_get_default_bitmap_granularity(target); 1719 } 1720 1721 assert(is_power_of_2(granularity)); 1722 1723 if (buf_size < 0) { 1724 error_setg(errp, "Invalid parameter 'buf-size'"); 1725 return NULL; 1726 } 1727 1728 if (buf_size == 0) { 1729 buf_size = DEFAULT_MIRROR_BUF_SIZE; 1730 } 1731 1732 if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) { 1733 error_setg(errp, "Can't mirror node into itself"); 1734 return NULL; 1735 } 1736 1737 target_is_backing = bdrv_chain_contains(bs, target); 1738 1739 /* In the case of active commit, add dummy driver to provide consistent 1740 * reads on the top, while disabling it in the intermediate nodes, and make 1741 * the backing chain writable. */ 1742 mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, 1743 BDRV_O_RDWR, errp); 1744 if (mirror_top_bs == NULL) { 1745 return NULL; 1746 } 1747 if (!filter_node_name) { 1748 mirror_top_bs->implicit = true; 1749 } 1750 1751 /* So that we can always drop this node */ 1752 mirror_top_bs->never_freeze = true; 1753 1754 mirror_top_bs->total_sectors = bs->total_sectors; 1755 mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; 1756 mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 1757 BDRV_REQ_NO_FALLBACK; 1758 bs_opaque = g_new0(MirrorBDSOpaque, 1); 1759 mirror_top_bs->opaque = bs_opaque; 1760 1761 bs_opaque->is_commit = target_is_backing; 1762 1763 bdrv_drained_begin(bs); 1764 ret = bdrv_append(mirror_top_bs, bs, errp); 1765 bdrv_drained_end(bs); 1766 1767 if (ret < 0) { 1768 bdrv_unref(mirror_top_bs); 1769 return NULL; 1770 } 1771 1772 /* Make sure that the source is not resized while the job is running */ 1773 s = block_job_create(job_id, driver, NULL, mirror_top_bs, 1774 BLK_PERM_CONSISTENT_READ, 1775 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | 1776 BLK_PERM_WRITE, speed, 1777 creation_flags, cb, opaque, errp); 1778 if (!s) { 1779 goto fail; 1780 } 1781 1782 /* The block job now has a reference to this node */ 1783 bdrv_unref(mirror_top_bs); 1784 1785 s->mirror_top_bs = mirror_top_bs; 1786 1787 /* No resize for the target either; while the mirror is still running, a 1788 * consistent read isn't necessarily possible. We could possibly allow 1789 * writes and graph modifications, though it would likely defeat the 1790 * purpose of a mirror, so leave them blocked for now. 1791 * 1792 * In the case of active commit, things look a bit different, though, 1793 * because the target is an already populated backing file in active use. 1794 * We can allow anything except resize there.*/ 1795 1796 target_perms = BLK_PERM_WRITE; 1797 target_shared_perms = BLK_PERM_WRITE_UNCHANGED; 1798 1799 if (target_is_backing) { 1800 int64_t bs_size, target_size; 1801 bs_size = bdrv_getlength(bs); 1802 if (bs_size < 0) { 1803 error_setg_errno(errp, -bs_size, 1804 "Could not inquire top image size"); 1805 goto fail; 1806 } 1807 1808 target_size = bdrv_getlength(target); 1809 if (target_size < 0) { 1810 error_setg_errno(errp, -target_size, 1811 "Could not inquire base image size"); 1812 goto fail; 1813 } 1814 1815 if (target_size < bs_size) { 1816 target_perms |= BLK_PERM_RESIZE; 1817 } 1818 1819 target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; 1820 } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) { 1821 /* 1822 * We may want to allow this in the future, but it would 1823 * require taking some extra care. 1824 */ 1825 error_setg(errp, "Cannot mirror to a filter on top of a node in the " 1826 "source's backing chain"); 1827 goto fail; 1828 } 1829 1830 s->target = blk_new(s->common.job.aio_context, 1831 target_perms, target_shared_perms); 1832 ret = blk_insert_bs(s->target, target, errp); 1833 if (ret < 0) { 1834 goto fail; 1835 } 1836 if (is_mirror) { 1837 /* XXX: Mirror target could be a NBD server of target QEMU in the case 1838 * of non-shared block migration. To allow migration completion, we 1839 * have to allow "inactivate" of the target BB. When that happens, we 1840 * know the job is drained, and the vcpus are stopped, so no write 1841 * operation will be performed. Block layer already has assertions to 1842 * ensure that. */ 1843 blk_set_force_allow_inactivate(s->target); 1844 } 1845 blk_set_allow_aio_context_change(s->target, true); 1846 blk_set_disable_request_queuing(s->target, true); 1847 1848 s->replaces = g_strdup(replaces); 1849 s->on_source_error = on_source_error; 1850 s->on_target_error = on_target_error; 1851 s->is_none_mode = is_none_mode; 1852 s->backing_mode = backing_mode; 1853 s->zero_target = zero_target; 1854 qatomic_set(&s->copy_mode, copy_mode); 1855 s->base = base; 1856 s->base_overlay = bdrv_find_overlay(bs, base); 1857 s->granularity = granularity; 1858 s->buf_size = ROUND_UP(buf_size, granularity); 1859 s->unmap = unmap; 1860 if (auto_complete) { 1861 s->should_complete = true; 1862 } 1863 1864 s->dirty_bitmap = bdrv_create_dirty_bitmap(s->mirror_top_bs, granularity, 1865 NULL, errp); 1866 if (!s->dirty_bitmap) { 1867 goto fail; 1868 } 1869 1870 /* 1871 * The dirty bitmap is set by bdrv_mirror_top_do_write() when not in active 1872 * mode. 1873 */ 1874 bdrv_disable_dirty_bitmap(s->dirty_bitmap); 1875 1876 ret = block_job_add_bdrv(&s->common, "source", bs, 0, 1877 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | 1878 BLK_PERM_CONSISTENT_READ, 1879 errp); 1880 if (ret < 0) { 1881 goto fail; 1882 } 1883 1884 /* Required permissions are already taken with blk_new() */ 1885 block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, 1886 &error_abort); 1887 1888 /* In commit_active_start() all intermediate nodes disappear, so 1889 * any jobs in them must be blocked */ 1890 if (target_is_backing) { 1891 BlockDriverState *iter, *filtered_target; 1892 uint64_t iter_shared_perms; 1893 1894 /* 1895 * The topmost node with 1896 * bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target) 1897 */ 1898 filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target)); 1899 1900 assert(bdrv_skip_filters(filtered_target) == 1901 bdrv_skip_filters(target)); 1902 1903 /* 1904 * XXX BLK_PERM_WRITE needs to be allowed so we don't block 1905 * ourselves at s->base (if writes are blocked for a node, they are 1906 * also blocked for its backing file). The other options would be a 1907 * second filter driver above s->base (== target). 1908 */ 1909 iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE; 1910 1911 for (iter = bdrv_filter_or_cow_bs(bs); iter != target; 1912 iter = bdrv_filter_or_cow_bs(iter)) 1913 { 1914 if (iter == filtered_target) { 1915 /* 1916 * From here on, all nodes are filters on the base. 1917 * This allows us to share BLK_PERM_CONSISTENT_READ. 1918 */ 1919 iter_shared_perms |= BLK_PERM_CONSISTENT_READ; 1920 } 1921 1922 ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, 1923 iter_shared_perms, errp); 1924 if (ret < 0) { 1925 goto fail; 1926 } 1927 } 1928 1929 if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { 1930 goto fail; 1931 } 1932 } 1933 1934 QTAILQ_INIT(&s->ops_in_flight); 1935 1936 trace_mirror_start(bs, s, opaque); 1937 job_start(&s->common.job); 1938 1939 return &s->common; 1940 1941 fail: 1942 if (s) { 1943 /* Make sure this BDS does not go away until we have completed the graph 1944 * changes below */ 1945 bdrv_ref(mirror_top_bs); 1946 1947 g_free(s->replaces); 1948 blk_unref(s->target); 1949 bs_opaque->job = NULL; 1950 if (s->dirty_bitmap) { 1951 bdrv_release_dirty_bitmap(s->dirty_bitmap); 1952 } 1953 job_early_fail(&s->common.job); 1954 } 1955 1956 bs_opaque->stop = true; 1957 bdrv_graph_rdlock_main_loop(); 1958 bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, 1959 &error_abort); 1960 bdrv_graph_rdunlock_main_loop(); 1961 bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); 1962 1963 bdrv_unref(mirror_top_bs); 1964 1965 return NULL; 1966 } 1967 1968 void mirror_start(const char *job_id, BlockDriverState *bs, 1969 BlockDriverState *target, const char *replaces, 1970 int creation_flags, int64_t speed, 1971 uint32_t granularity, int64_t buf_size, 1972 MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, 1973 bool zero_target, 1974 BlockdevOnError on_source_error, 1975 BlockdevOnError on_target_error, 1976 bool unmap, const char *filter_node_name, 1977 MirrorCopyMode copy_mode, Error **errp) 1978 { 1979 bool is_none_mode; 1980 BlockDriverState *base; 1981 1982 GLOBAL_STATE_CODE(); 1983 1984 if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || 1985 (mode == MIRROR_SYNC_MODE_BITMAP)) { 1986 error_setg(errp, "Sync mode '%s' not supported", 1987 MirrorSyncMode_str(mode)); 1988 return; 1989 } 1990 is_none_mode = mode == MIRROR_SYNC_MODE_NONE; 1991 base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; 1992 mirror_start_job(job_id, bs, creation_flags, target, replaces, 1993 speed, granularity, buf_size, backing_mode, zero_target, 1994 on_source_error, on_target_error, unmap, NULL, NULL, 1995 &mirror_job_driver, is_none_mode, base, false, 1996 filter_node_name, true, copy_mode, errp); 1997 } 1998 1999 BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, 2000 BlockDriverState *base, int creation_flags, 2001 int64_t speed, BlockdevOnError on_error, 2002 const char *filter_node_name, 2003 BlockCompletionFunc *cb, void *opaque, 2004 bool auto_complete, Error **errp) 2005 { 2006 bool base_read_only; 2007 BlockJob *job; 2008 2009 GLOBAL_STATE_CODE(); 2010 2011 base_read_only = bdrv_is_read_only(base); 2012 2013 if (base_read_only) { 2014 if (bdrv_reopen_set_read_only(base, false, errp) < 0) { 2015 return NULL; 2016 } 2017 } 2018 2019 job = mirror_start_job( 2020 job_id, bs, creation_flags, base, NULL, speed, 0, 0, 2021 MIRROR_LEAVE_BACKING_CHAIN, false, 2022 on_error, on_error, true, cb, opaque, 2023 &commit_active_job_driver, false, base, auto_complete, 2024 filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, 2025 errp); 2026 if (!job) { 2027 goto error_restore_flags; 2028 } 2029 2030 return job; 2031 2032 error_restore_flags: 2033 /* ignore error and errp for bdrv_reopen, because we want to propagate 2034 * the original error */ 2035 if (base_read_only) { 2036 bdrv_reopen_set_read_only(base, true, NULL); 2037 } 2038 return NULL; 2039 } 2040