1 /* 2 * QEMU live block migration 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Liran Schour <lirans@il.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu-common.h" 18 #include "block/block.h" 19 #include "qemu/error-report.h" 20 #include "qemu/main-loop.h" 21 #include "hw/hw.h" 22 #include "qemu/queue.h" 23 #include "qemu/timer.h" 24 #include "migration/block.h" 25 #include "migration/migration.h" 26 #include "sysemu/blockdev.h" 27 #include "sysemu/block-backend.h" 28 29 #define BLOCK_SIZE (1 << 20) 30 #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS) 31 32 #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 33 #define BLK_MIG_FLAG_EOS 0x02 34 #define BLK_MIG_FLAG_PROGRESS 0x04 35 #define BLK_MIG_FLAG_ZERO_BLOCK 0x08 36 37 #define MAX_IS_ALLOCATED_SEARCH 65536 38 39 #define MAX_INFLIGHT_IO 512 40 41 //#define DEBUG_BLK_MIGRATION 42 43 #ifdef DEBUG_BLK_MIGRATION 44 #define DPRINTF(fmt, ...) \ 45 do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0) 46 #else 47 #define DPRINTF(fmt, ...) \ 48 do { } while (0) 49 #endif 50 51 typedef struct BlkMigDevState { 52 /* Written during setup phase. Can be read without a lock. */ 53 BlockDriverState *bs; 54 int shared_base; 55 int64_t total_sectors; 56 QSIMPLEQ_ENTRY(BlkMigDevState) entry; 57 58 /* Only used by migration thread. Does not need a lock. */ 59 int bulk_completed; 60 int64_t cur_sector; 61 int64_t cur_dirty; 62 63 /* Protected by block migration lock. */ 64 unsigned long *aio_bitmap; 65 int64_t completed_sectors; 66 BdrvDirtyBitmap *dirty_bitmap; 67 Error *blocker; 68 } BlkMigDevState; 69 70 typedef struct BlkMigBlock { 71 /* Only used by migration thread. */ 72 uint8_t *buf; 73 BlkMigDevState *bmds; 74 int64_t sector; 75 int nr_sectors; 76 struct iovec iov; 77 QEMUIOVector qiov; 78 BlockAIOCB *aiocb; 79 80 /* Protected by block migration lock. */ 81 int ret; 82 QSIMPLEQ_ENTRY(BlkMigBlock) entry; 83 } BlkMigBlock; 84 85 typedef struct BlkMigState { 86 /* Written during setup phase. Can be read without a lock. */ 87 int blk_enable; 88 int shared_base; 89 QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; 90 int64_t total_sector_sum; 91 bool zero_blocks; 92 93 /* Protected by lock. */ 94 QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; 95 int submitted; 96 int read_done; 97 98 /* Only used by migration thread. Does not need a lock. */ 99 int transferred; 100 int prev_progress; 101 int bulk_completed; 102 103 /* Lock must be taken _inside_ the iothread lock. */ 104 QemuMutex lock; 105 } BlkMigState; 106 107 static BlkMigState block_mig_state; 108 109 static void blk_mig_lock(void) 110 { 111 qemu_mutex_lock(&block_mig_state.lock); 112 } 113 114 static void blk_mig_unlock(void) 115 { 116 qemu_mutex_unlock(&block_mig_state.lock); 117 } 118 119 /* Must run outside of the iothread lock during the bulk phase, 120 * or the VM will stall. 121 */ 122 123 static void blk_send(QEMUFile *f, BlkMigBlock * blk) 124 { 125 int len; 126 uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; 127 128 if (block_mig_state.zero_blocks && 129 buffer_is_zero(blk->buf, BLOCK_SIZE)) { 130 flags |= BLK_MIG_FLAG_ZERO_BLOCK; 131 } 132 133 /* sector number and flags */ 134 qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS) 135 | flags); 136 137 /* device name */ 138 len = strlen(bdrv_get_device_name(blk->bmds->bs)); 139 qemu_put_byte(f, len); 140 qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len); 141 142 /* if a block is zero we need to flush here since the network 143 * bandwidth is now a lot higher than the storage device bandwidth. 144 * thus if we queue zero blocks we slow down the migration */ 145 if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { 146 qemu_fflush(f); 147 return; 148 } 149 150 qemu_put_buffer(f, blk->buf, BLOCK_SIZE); 151 } 152 153 int blk_mig_active(void) 154 { 155 return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list); 156 } 157 158 uint64_t blk_mig_bytes_transferred(void) 159 { 160 BlkMigDevState *bmds; 161 uint64_t sum = 0; 162 163 blk_mig_lock(); 164 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 165 sum += bmds->completed_sectors; 166 } 167 blk_mig_unlock(); 168 return sum << BDRV_SECTOR_BITS; 169 } 170 171 uint64_t blk_mig_bytes_remaining(void) 172 { 173 return blk_mig_bytes_total() - blk_mig_bytes_transferred(); 174 } 175 176 uint64_t blk_mig_bytes_total(void) 177 { 178 BlkMigDevState *bmds; 179 uint64_t sum = 0; 180 181 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 182 sum += bmds->total_sectors; 183 } 184 return sum << BDRV_SECTOR_BITS; 185 } 186 187 188 /* Called with migration lock held. */ 189 190 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) 191 { 192 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; 193 194 if (sector < bdrv_nb_sectors(bmds->bs)) { 195 return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] & 196 (1UL << (chunk % (sizeof(unsigned long) * 8)))); 197 } else { 198 return 0; 199 } 200 } 201 202 /* Called with migration lock held. */ 203 204 static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num, 205 int nb_sectors, int set) 206 { 207 int64_t start, end; 208 unsigned long val, idx, bit; 209 210 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK; 211 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK; 212 213 for (; start <= end; start++) { 214 idx = start / (sizeof(unsigned long) * 8); 215 bit = start % (sizeof(unsigned long) * 8); 216 val = bmds->aio_bitmap[idx]; 217 if (set) { 218 val |= 1UL << bit; 219 } else { 220 val &= ~(1UL << bit); 221 } 222 bmds->aio_bitmap[idx] = val; 223 } 224 } 225 226 static void alloc_aio_bitmap(BlkMigDevState *bmds) 227 { 228 BlockDriverState *bs = bmds->bs; 229 int64_t bitmap_size; 230 231 bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; 232 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; 233 234 bmds->aio_bitmap = g_malloc0(bitmap_size); 235 } 236 237 /* Never hold migration lock when yielding to the main loop! */ 238 239 static void blk_mig_read_cb(void *opaque, int ret) 240 { 241 BlkMigBlock *blk = opaque; 242 243 blk_mig_lock(); 244 blk->ret = ret; 245 246 QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); 247 bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0); 248 249 block_mig_state.submitted--; 250 block_mig_state.read_done++; 251 assert(block_mig_state.submitted >= 0); 252 blk_mig_unlock(); 253 } 254 255 /* Called with no lock taken. */ 256 257 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) 258 { 259 int64_t total_sectors = bmds->total_sectors; 260 int64_t cur_sector = bmds->cur_sector; 261 BlockDriverState *bs = bmds->bs; 262 BlkMigBlock *blk; 263 int nr_sectors; 264 265 if (bmds->shared_base) { 266 qemu_mutex_lock_iothread(); 267 while (cur_sector < total_sectors && 268 !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, 269 &nr_sectors)) { 270 cur_sector += nr_sectors; 271 } 272 qemu_mutex_unlock_iothread(); 273 } 274 275 if (cur_sector >= total_sectors) { 276 bmds->cur_sector = bmds->completed_sectors = total_sectors; 277 return 1; 278 } 279 280 bmds->completed_sectors = cur_sector; 281 282 cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1); 283 284 /* we are going to transfer a full block even if it is not allocated */ 285 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; 286 287 if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) { 288 nr_sectors = total_sectors - cur_sector; 289 } 290 291 blk = g_new(BlkMigBlock, 1); 292 blk->buf = g_malloc(BLOCK_SIZE); 293 blk->bmds = bmds; 294 blk->sector = cur_sector; 295 blk->nr_sectors = nr_sectors; 296 297 blk->iov.iov_base = blk->buf; 298 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; 299 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); 300 301 blk_mig_lock(); 302 block_mig_state.submitted++; 303 blk_mig_unlock(); 304 305 qemu_mutex_lock_iothread(); 306 blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, 307 nr_sectors, blk_mig_read_cb, blk); 308 309 bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors); 310 qemu_mutex_unlock_iothread(); 311 312 bmds->cur_sector = cur_sector + nr_sectors; 313 return (bmds->cur_sector >= total_sectors); 314 } 315 316 /* Called with iothread lock taken. */ 317 318 static int set_dirty_tracking(void) 319 { 320 BlkMigDevState *bmds; 321 int ret; 322 323 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 324 bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE, 325 NULL, NULL); 326 if (!bmds->dirty_bitmap) { 327 ret = -errno; 328 goto fail; 329 } 330 } 331 return 0; 332 333 fail: 334 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 335 if (bmds->dirty_bitmap) { 336 bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap); 337 } 338 } 339 return ret; 340 } 341 342 static void unset_dirty_tracking(void) 343 { 344 BlkMigDevState *bmds; 345 346 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 347 bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap); 348 } 349 } 350 351 static void init_blk_migration(QEMUFile *f) 352 { 353 BlockDriverState *bs; 354 BlkMigDevState *bmds; 355 int64_t sectors; 356 357 block_mig_state.submitted = 0; 358 block_mig_state.read_done = 0; 359 block_mig_state.transferred = 0; 360 block_mig_state.total_sector_sum = 0; 361 block_mig_state.prev_progress = -1; 362 block_mig_state.bulk_completed = 0; 363 block_mig_state.zero_blocks = migrate_zero_blocks(); 364 365 for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) { 366 if (bdrv_is_read_only(bs)) { 367 continue; 368 } 369 370 sectors = bdrv_nb_sectors(bs); 371 if (sectors <= 0) { 372 return; 373 } 374 375 bmds = g_new0(BlkMigDevState, 1); 376 bmds->bs = bs; 377 bmds->bulk_completed = 0; 378 bmds->total_sectors = sectors; 379 bmds->completed_sectors = 0; 380 bmds->shared_base = block_mig_state.shared_base; 381 alloc_aio_bitmap(bmds); 382 error_setg(&bmds->blocker, "block device is in use by migration"); 383 bdrv_op_block_all(bs, bmds->blocker); 384 bdrv_ref(bs); 385 386 block_mig_state.total_sector_sum += sectors; 387 388 if (bmds->shared_base) { 389 DPRINTF("Start migration for %s with shared base image\n", 390 bdrv_get_device_name(bs)); 391 } else { 392 DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs)); 393 } 394 395 QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); 396 } 397 } 398 399 /* Called with no lock taken. */ 400 401 static int blk_mig_save_bulked_block(QEMUFile *f) 402 { 403 int64_t completed_sector_sum = 0; 404 BlkMigDevState *bmds; 405 int progress; 406 int ret = 0; 407 408 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 409 if (bmds->bulk_completed == 0) { 410 if (mig_save_device_bulk(f, bmds) == 1) { 411 /* completed bulk section for this device */ 412 bmds->bulk_completed = 1; 413 } 414 completed_sector_sum += bmds->completed_sectors; 415 ret = 1; 416 break; 417 } else { 418 completed_sector_sum += bmds->completed_sectors; 419 } 420 } 421 422 if (block_mig_state.total_sector_sum != 0) { 423 progress = completed_sector_sum * 100 / 424 block_mig_state.total_sector_sum; 425 } else { 426 progress = 100; 427 } 428 if (progress != block_mig_state.prev_progress) { 429 block_mig_state.prev_progress = progress; 430 qemu_put_be64(f, (progress << BDRV_SECTOR_BITS) 431 | BLK_MIG_FLAG_PROGRESS); 432 DPRINTF("Completed %d %%\r", progress); 433 } 434 435 return ret; 436 } 437 438 static void blk_mig_reset_dirty_cursor(void) 439 { 440 BlkMigDevState *bmds; 441 442 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 443 bmds->cur_dirty = 0; 444 } 445 } 446 447 /* Called with iothread lock taken. */ 448 449 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, 450 int is_async) 451 { 452 BlkMigBlock *blk; 453 int64_t total_sectors = bmds->total_sectors; 454 int64_t sector; 455 int nr_sectors; 456 int ret = -EIO; 457 458 for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) { 459 blk_mig_lock(); 460 if (bmds_aio_inflight(bmds, sector)) { 461 blk_mig_unlock(); 462 bdrv_drain(bmds->bs); 463 } else { 464 blk_mig_unlock(); 465 } 466 if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) { 467 468 if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) { 469 nr_sectors = total_sectors - sector; 470 } else { 471 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; 472 } 473 blk = g_new(BlkMigBlock, 1); 474 blk->buf = g_malloc(BLOCK_SIZE); 475 blk->bmds = bmds; 476 blk->sector = sector; 477 blk->nr_sectors = nr_sectors; 478 479 if (is_async) { 480 blk->iov.iov_base = blk->buf; 481 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; 482 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); 483 484 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, 485 nr_sectors, blk_mig_read_cb, blk); 486 487 blk_mig_lock(); 488 block_mig_state.submitted++; 489 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); 490 blk_mig_unlock(); 491 } else { 492 ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); 493 if (ret < 0) { 494 goto error; 495 } 496 blk_send(f, blk); 497 498 g_free(blk->buf); 499 g_free(blk); 500 } 501 502 bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors); 503 break; 504 } 505 sector += BDRV_SECTORS_PER_DIRTY_CHUNK; 506 bmds->cur_dirty = sector; 507 } 508 509 return (bmds->cur_dirty >= bmds->total_sectors); 510 511 error: 512 DPRINTF("Error reading sector %" PRId64 "\n", sector); 513 g_free(blk->buf); 514 g_free(blk); 515 return ret; 516 } 517 518 /* Called with iothread lock taken. 519 * 520 * return value: 521 * 0: too much data for max_downtime 522 * 1: few enough data for max_downtime 523 */ 524 static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) 525 { 526 BlkMigDevState *bmds; 527 int ret = 1; 528 529 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 530 ret = mig_save_device_dirty(f, bmds, is_async); 531 if (ret <= 0) { 532 break; 533 } 534 } 535 536 return ret; 537 } 538 539 /* Called with no locks taken. */ 540 541 static int flush_blks(QEMUFile *f) 542 { 543 BlkMigBlock *blk; 544 int ret = 0; 545 546 DPRINTF("%s Enter submitted %d read_done %d transferred %d\n", 547 __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done, 548 block_mig_state.transferred); 549 550 blk_mig_lock(); 551 while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { 552 if (qemu_file_rate_limit(f)) { 553 break; 554 } 555 if (blk->ret < 0) { 556 ret = blk->ret; 557 break; 558 } 559 560 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); 561 blk_mig_unlock(); 562 blk_send(f, blk); 563 blk_mig_lock(); 564 565 g_free(blk->buf); 566 g_free(blk); 567 568 block_mig_state.read_done--; 569 block_mig_state.transferred++; 570 assert(block_mig_state.read_done >= 0); 571 } 572 blk_mig_unlock(); 573 574 DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__, 575 block_mig_state.submitted, block_mig_state.read_done, 576 block_mig_state.transferred); 577 return ret; 578 } 579 580 /* Called with iothread lock taken. */ 581 582 static int64_t get_remaining_dirty(void) 583 { 584 BlkMigDevState *bmds; 585 int64_t dirty = 0; 586 587 QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { 588 dirty += bdrv_get_dirty_count(bmds->dirty_bitmap); 589 } 590 591 return dirty << BDRV_SECTOR_BITS; 592 } 593 594 /* Called with iothread lock taken. */ 595 596 static void block_migration_cleanup(void *opaque) 597 { 598 BlkMigDevState *bmds; 599 BlkMigBlock *blk; 600 601 bdrv_drain_all(); 602 603 unset_dirty_tracking(); 604 605 blk_mig_lock(); 606 while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { 607 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); 608 bdrv_op_unblock_all(bmds->bs, bmds->blocker); 609 error_free(bmds->blocker); 610 bdrv_unref(bmds->bs); 611 g_free(bmds->aio_bitmap); 612 g_free(bmds); 613 } 614 615 while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { 616 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); 617 g_free(blk->buf); 618 g_free(blk); 619 } 620 blk_mig_unlock(); 621 } 622 623 static int block_save_setup(QEMUFile *f, void *opaque) 624 { 625 int ret; 626 627 DPRINTF("Enter save live setup submitted %d transferred %d\n", 628 block_mig_state.submitted, block_mig_state.transferred); 629 630 qemu_mutex_lock_iothread(); 631 init_blk_migration(f); 632 633 /* start track dirty blocks */ 634 ret = set_dirty_tracking(); 635 636 if (ret) { 637 qemu_mutex_unlock_iothread(); 638 return ret; 639 } 640 641 qemu_mutex_unlock_iothread(); 642 643 ret = flush_blks(f); 644 blk_mig_reset_dirty_cursor(); 645 qemu_put_be64(f, BLK_MIG_FLAG_EOS); 646 647 return ret; 648 } 649 650 static int block_save_iterate(QEMUFile *f, void *opaque) 651 { 652 int ret; 653 int64_t last_ftell = qemu_ftell(f); 654 int64_t delta_ftell; 655 656 DPRINTF("Enter save live iterate submitted %d transferred %d\n", 657 block_mig_state.submitted, block_mig_state.transferred); 658 659 ret = flush_blks(f); 660 if (ret) { 661 return ret; 662 } 663 664 blk_mig_reset_dirty_cursor(); 665 666 /* control the rate of transfer */ 667 blk_mig_lock(); 668 while ((block_mig_state.submitted + 669 block_mig_state.read_done) * BLOCK_SIZE < 670 qemu_file_get_rate_limit(f) && 671 (block_mig_state.submitted + 672 block_mig_state.read_done) < 673 MAX_INFLIGHT_IO) { 674 blk_mig_unlock(); 675 if (block_mig_state.bulk_completed == 0) { 676 /* first finish the bulk phase */ 677 if (blk_mig_save_bulked_block(f) == 0) { 678 /* finished saving bulk on all devices */ 679 block_mig_state.bulk_completed = 1; 680 } 681 ret = 0; 682 } else { 683 /* Always called with iothread lock taken for 684 * simplicity, block_save_complete also calls it. 685 */ 686 qemu_mutex_lock_iothread(); 687 ret = blk_mig_save_dirty_block(f, 1); 688 qemu_mutex_unlock_iothread(); 689 } 690 if (ret < 0) { 691 return ret; 692 } 693 blk_mig_lock(); 694 if (ret != 0) { 695 /* no more dirty blocks */ 696 break; 697 } 698 } 699 blk_mig_unlock(); 700 701 ret = flush_blks(f); 702 if (ret) { 703 return ret; 704 } 705 706 qemu_put_be64(f, BLK_MIG_FLAG_EOS); 707 delta_ftell = qemu_ftell(f) - last_ftell; 708 if (delta_ftell > 0) { 709 return 1; 710 } else if (delta_ftell < 0) { 711 return -1; 712 } else { 713 return 0; 714 } 715 } 716 717 /* Called with iothread lock taken. */ 718 719 static int block_save_complete(QEMUFile *f, void *opaque) 720 { 721 int ret; 722 723 DPRINTF("Enter save live complete submitted %d transferred %d\n", 724 block_mig_state.submitted, block_mig_state.transferred); 725 726 ret = flush_blks(f); 727 if (ret) { 728 return ret; 729 } 730 731 blk_mig_reset_dirty_cursor(); 732 733 /* we know for sure that save bulk is completed and 734 all async read completed */ 735 blk_mig_lock(); 736 assert(block_mig_state.submitted == 0); 737 blk_mig_unlock(); 738 739 do { 740 ret = blk_mig_save_dirty_block(f, 0); 741 if (ret < 0) { 742 return ret; 743 } 744 } while (ret == 0); 745 746 /* report completion */ 747 qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); 748 749 DPRINTF("Block migration completed\n"); 750 751 qemu_put_be64(f, BLK_MIG_FLAG_EOS); 752 753 return 0; 754 } 755 756 static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 757 uint64_t *non_postcopiable_pending, 758 uint64_t *postcopiable_pending) 759 { 760 /* Estimate pending number of bytes to send */ 761 uint64_t pending; 762 763 qemu_mutex_lock_iothread(); 764 blk_mig_lock(); 765 pending = get_remaining_dirty() + 766 block_mig_state.submitted * BLOCK_SIZE + 767 block_mig_state.read_done * BLOCK_SIZE; 768 769 /* Report at least one block pending during bulk phase */ 770 if (pending <= max_size && !block_mig_state.bulk_completed) { 771 pending = max_size + BLOCK_SIZE; 772 } 773 blk_mig_unlock(); 774 qemu_mutex_unlock_iothread(); 775 776 DPRINTF("Enter save live pending %" PRIu64 "\n", pending); 777 /* We don't do postcopy */ 778 *non_postcopiable_pending += pending; 779 } 780 781 static int block_load(QEMUFile *f, void *opaque, int version_id) 782 { 783 static int banner_printed; 784 int len, flags; 785 char device_name[256]; 786 int64_t addr; 787 BlockDriverState *bs, *bs_prev = NULL; 788 BlockBackend *blk; 789 uint8_t *buf; 790 int64_t total_sectors = 0; 791 int nr_sectors; 792 int ret; 793 794 do { 795 addr = qemu_get_be64(f); 796 797 flags = addr & ~BDRV_SECTOR_MASK; 798 addr >>= BDRV_SECTOR_BITS; 799 800 if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) { 801 /* get device name */ 802 len = qemu_get_byte(f); 803 qemu_get_buffer(f, (uint8_t *)device_name, len); 804 device_name[len] = '\0'; 805 806 blk = blk_by_name(device_name); 807 if (!blk) { 808 fprintf(stderr, "Error unknown block device %s\n", 809 device_name); 810 return -EINVAL; 811 } 812 bs = blk_bs(blk); 813 if (!bs) { 814 fprintf(stderr, "Block device %s has no medium\n", 815 device_name); 816 return -EINVAL; 817 } 818 819 if (bs != bs_prev) { 820 bs_prev = bs; 821 total_sectors = bdrv_nb_sectors(bs); 822 if (total_sectors <= 0) { 823 error_report("Error getting length of block device %s", 824 device_name); 825 return -EINVAL; 826 } 827 } 828 829 if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) { 830 nr_sectors = total_sectors - addr; 831 } else { 832 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; 833 } 834 835 if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { 836 ret = bdrv_write_zeroes(bs, addr, nr_sectors, 837 BDRV_REQ_MAY_UNMAP); 838 } else { 839 buf = g_malloc(BLOCK_SIZE); 840 qemu_get_buffer(f, buf, BLOCK_SIZE); 841 ret = bdrv_write(bs, addr, buf, nr_sectors); 842 g_free(buf); 843 } 844 845 if (ret < 0) { 846 return ret; 847 } 848 } else if (flags & BLK_MIG_FLAG_PROGRESS) { 849 if (!banner_printed) { 850 printf("Receiving block device images\n"); 851 banner_printed = 1; 852 } 853 printf("Completed %d %%%c", (int)addr, 854 (addr == 100) ? '\n' : '\r'); 855 fflush(stdout); 856 } else if (!(flags & BLK_MIG_FLAG_EOS)) { 857 fprintf(stderr, "Unknown block migration flags: %#x\n", flags); 858 return -EINVAL; 859 } 860 ret = qemu_file_get_error(f); 861 if (ret != 0) { 862 return ret; 863 } 864 } while (!(flags & BLK_MIG_FLAG_EOS)); 865 866 return 0; 867 } 868 869 static void block_set_params(const MigrationParams *params, void *opaque) 870 { 871 block_mig_state.blk_enable = params->blk; 872 block_mig_state.shared_base = params->shared; 873 874 /* shared base means that blk_enable = 1 */ 875 block_mig_state.blk_enable |= params->shared; 876 } 877 878 static bool block_is_active(void *opaque) 879 { 880 return block_mig_state.blk_enable == 1; 881 } 882 883 static SaveVMHandlers savevm_block_handlers = { 884 .set_params = block_set_params, 885 .save_live_setup = block_save_setup, 886 .save_live_iterate = block_save_iterate, 887 .save_live_complete_precopy = block_save_complete, 888 .save_live_pending = block_save_pending, 889 .load_state = block_load, 890 .cleanup = block_migration_cleanup, 891 .is_active = block_is_active, 892 }; 893 894 void blk_mig_init(void) 895 { 896 QSIMPLEQ_INIT(&block_mig_state.bmds_list); 897 QSIMPLEQ_INIT(&block_mig_state.blk_list); 898 qemu_mutex_init(&block_mig_state.lock); 899 900 register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, 901 &block_mig_state); 902 } 903