xref: /qemu/migration/migration.c (revision aef04fc7)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpu-throttle.h"
27 #include "rdma.h"
28 #include "ram.h"
29 #include "migration/global_state.h"
30 #include "migration/misc.h"
31 #include "migration.h"
32 #include "savevm.h"
33 #include "qemu-file.h"
34 #include "channel.h"
35 #include "migration/vmstate.h"
36 #include "block/block.h"
37 #include "qapi/error.h"
38 #include "qapi/clone-visitor.h"
39 #include "qapi/qapi-visit-migration.h"
40 #include "qapi/qapi-visit-sockets.h"
41 #include "qapi/qapi-commands-migration.h"
42 #include "qapi/qapi-events-migration.h"
43 #include "qapi/qmp/qerror.h"
44 #include "qapi/qmp/qnull.h"
45 #include "qemu/rcu.h"
46 #include "block.h"
47 #include "postcopy-ram.h"
48 #include "qemu/thread.h"
49 #include "trace.h"
50 #include "exec/target_page.h"
51 #include "io/channel-buffer.h"
52 #include "io/channel-tls.h"
53 #include "migration/colo.h"
54 #include "hw/boards.h"
55 #include "hw/qdev-properties.h"
56 #include "hw/qdev-properties-system.h"
57 #include "monitor/monitor.h"
58 #include "net/announce.h"
59 #include "qemu/queue.h"
60 #include "multifd.h"
61 #include "threadinfo.h"
62 #include "qemu/yank.h"
63 #include "sysemu/cpus.h"
64 #include "yank_functions.h"
65 #include "sysemu/qtest.h"
66 #include "options.h"
67 
68 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
69 
70 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
71  * data. */
72 #define BUFFER_DELAY     100
73 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
74 
75 /* Time in milliseconds we are allowed to stop the source,
76  * for sending the last part */
77 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
78 
79 /* Maximum migrate downtime set to 2000 seconds */
80 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
81 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
82 
83 /* Default compression thread count */
84 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
85 /* Default decompression thread count, usually decompression is at
86  * least 4 times as fast as compression.*/
87 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
88 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
89 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
90 /* Define default autoconverge cpu throttle migration parameters */
91 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
92 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
93 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
94 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
95 
96 /* Migration XBZRLE default cache size */
97 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
98 
99 /* The delay time (in ms) between two COLO checkpoints */
100 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
101 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
102 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
103 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
104 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
105 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
106 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
107 
108 /* Background transfer rate for postcopy, 0 means unlimited, note
109  * that page requests can still exceed this limit.
110  */
111 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
112 
113 /*
114  * Parameters for self_announce_delay giving a stream of RARP/ARP
115  * packets after migration.
116  */
117 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
118 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
119 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
120 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
121 
122 static NotifierList migration_state_notifiers =
123     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
124 
125 /* Messages sent on the return path from destination to source */
126 enum mig_rp_message_type {
127     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
128     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
129     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
130 
131     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
132     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
133     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
134     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
135 
136     MIG_RP_MSG_MAX
137 };
138 
139 /* When we add fault tolerance, we could have several
140    migrations at once.  For now we don't need to add
141    dynamic creation of migration */
142 
143 static MigrationState *current_migration;
144 static MigrationIncomingState *current_incoming;
145 
146 static GSList *migration_blockers;
147 
148 static bool migration_object_check(MigrationState *ms, Error **errp);
149 static int migration_maybe_pause(MigrationState *s,
150                                  int *current_active_state,
151                                  int new_state);
152 static void migrate_fd_cancel(MigrationState *s);
153 
154 static bool migration_needs_multiple_sockets(void)
155 {
156     return migrate_multifd() || migrate_postcopy_preempt();
157 }
158 
159 static bool uri_supports_multi_channels(const char *uri)
160 {
161     return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
162            strstart(uri, "vsock:", NULL);
163 }
164 
165 static bool
166 migration_channels_and_uri_compatible(const char *uri, Error **errp)
167 {
168     if (migration_needs_multiple_sockets() &&
169         !uri_supports_multi_channels(uri)) {
170         error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
171         return false;
172     }
173 
174     return true;
175 }
176 
177 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
178 {
179     uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
180 
181     return (a > b) - (a < b);
182 }
183 
184 void migration_object_init(void)
185 {
186     /* This can only be called once. */
187     assert(!current_migration);
188     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
189 
190     /*
191      * Init the migrate incoming object as well no matter whether
192      * we'll use it or not.
193      */
194     assert(!current_incoming);
195     current_incoming = g_new0(MigrationIncomingState, 1);
196     current_incoming->state = MIGRATION_STATUS_NONE;
197     current_incoming->postcopy_remote_fds =
198         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
199     qemu_mutex_init(&current_incoming->rp_mutex);
200     qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
201     qemu_event_init(&current_incoming->main_thread_load_event, false);
202     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
203     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
204     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
205     qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
206 
207     qemu_mutex_init(&current_incoming->page_request_mutex);
208     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
209 
210     migration_object_check(current_migration, &error_fatal);
211 
212     blk_mig_init();
213     ram_mig_init();
214     dirty_bitmap_mig_init();
215 }
216 
217 void migration_cancel(const Error *error)
218 {
219     if (error) {
220         migrate_set_error(current_migration, error);
221     }
222     migrate_fd_cancel(current_migration);
223 }
224 
225 void migration_shutdown(void)
226 {
227     /*
228      * When the QEMU main thread exit, the COLO thread
229      * may wait a semaphore. So, we should wakeup the
230      * COLO thread before migration shutdown.
231      */
232     colo_shutdown();
233     /*
234      * Cancel the current migration - that will (eventually)
235      * stop the migration using this structure
236      */
237     migration_cancel(NULL);
238     object_unref(OBJECT(current_migration));
239 
240     /*
241      * Cancel outgoing migration of dirty bitmaps. It should
242      * at least unref used block nodes.
243      */
244     dirty_bitmap_mig_cancel_outgoing();
245 
246     /*
247      * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
248      * are non-critical data, and their loss never considered as
249      * something serious.
250      */
251     dirty_bitmap_mig_cancel_incoming();
252 }
253 
254 /* For outgoing */
255 MigrationState *migrate_get_current(void)
256 {
257     /* This can only be called after the object created. */
258     assert(current_migration);
259     return current_migration;
260 }
261 
262 MigrationIncomingState *migration_incoming_get_current(void)
263 {
264     assert(current_incoming);
265     return current_incoming;
266 }
267 
268 void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
269 {
270     if (mis->socket_address_list) {
271         qapi_free_SocketAddressList(mis->socket_address_list);
272         mis->socket_address_list = NULL;
273     }
274 
275     if (mis->transport_cleanup) {
276         mis->transport_cleanup(mis->transport_data);
277         mis->transport_data = mis->transport_cleanup = NULL;
278     }
279 }
280 
281 void migration_incoming_state_destroy(void)
282 {
283     struct MigrationIncomingState *mis = migration_incoming_get_current();
284 
285     multifd_load_cleanup();
286 
287     if (mis->to_src_file) {
288         /* Tell source that we are done */
289         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
290         qemu_fclose(mis->to_src_file);
291         mis->to_src_file = NULL;
292     }
293 
294     if (mis->from_src_file) {
295         migration_ioc_unregister_yank_from_file(mis->from_src_file);
296         qemu_fclose(mis->from_src_file);
297         mis->from_src_file = NULL;
298     }
299     if (mis->postcopy_remote_fds) {
300         g_array_free(mis->postcopy_remote_fds, TRUE);
301         mis->postcopy_remote_fds = NULL;
302     }
303 
304     migration_incoming_transport_cleanup(mis);
305     qemu_event_reset(&mis->main_thread_load_event);
306 
307     if (mis->page_requested) {
308         g_tree_destroy(mis->page_requested);
309         mis->page_requested = NULL;
310     }
311 
312     if (mis->postcopy_qemufile_dst) {
313         migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
314         qemu_fclose(mis->postcopy_qemufile_dst);
315         mis->postcopy_qemufile_dst = NULL;
316     }
317 
318     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
319 }
320 
321 static void migrate_generate_event(int new_state)
322 {
323     if (migrate_events()) {
324         qapi_event_send_migration(new_state);
325     }
326 }
327 
328 /*
329  * Send a message on the return channel back to the source
330  * of the migration.
331  */
332 static int migrate_send_rp_message(MigrationIncomingState *mis,
333                                    enum mig_rp_message_type message_type,
334                                    uint16_t len, void *data)
335 {
336     int ret = 0;
337 
338     trace_migrate_send_rp_message((int)message_type, len);
339     QEMU_LOCK_GUARD(&mis->rp_mutex);
340 
341     /*
342      * It's possible that the file handle got lost due to network
343      * failures.
344      */
345     if (!mis->to_src_file) {
346         ret = -EIO;
347         return ret;
348     }
349 
350     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
351     qemu_put_be16(mis->to_src_file, len);
352     qemu_put_buffer(mis->to_src_file, data, len);
353     qemu_fflush(mis->to_src_file);
354 
355     /* It's possible that qemu file got error during sending */
356     ret = qemu_file_get_error(mis->to_src_file);
357 
358     return ret;
359 }
360 
361 /* Request one page from the source VM at the given start address.
362  *   rb: the RAMBlock to request the page in
363  *   Start: Address offset within the RB
364  *   Len: Length in bytes required - must be a multiple of pagesize
365  */
366 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
367                                       RAMBlock *rb, ram_addr_t start)
368 {
369     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
370     size_t msglen = 12; /* start + len */
371     size_t len = qemu_ram_pagesize(rb);
372     enum mig_rp_message_type msg_type;
373     const char *rbname;
374     int rbname_len;
375 
376     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
377     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
378 
379     /*
380      * We maintain the last ramblock that we requested for page.  Note that we
381      * don't need locking because this function will only be called within the
382      * postcopy ram fault thread.
383      */
384     if (rb != mis->last_rb) {
385         mis->last_rb = rb;
386 
387         rbname = qemu_ram_get_idstr(rb);
388         rbname_len = strlen(rbname);
389 
390         assert(rbname_len < 256);
391 
392         bufc[msglen++] = rbname_len;
393         memcpy(bufc + msglen, rbname, rbname_len);
394         msglen += rbname_len;
395         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
396     } else {
397         msg_type = MIG_RP_MSG_REQ_PAGES;
398     }
399 
400     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
401 }
402 
403 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
404                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
405 {
406     void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
407     bool received = false;
408 
409     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
410         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
411         if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
412             /*
413              * The page has not been received, and it's not yet in the page
414              * request list.  Queue it.  Set the value of element to 1, so that
415              * things like g_tree_lookup() will return TRUE (1) when found.
416              */
417             g_tree_insert(mis->page_requested, aligned, (gpointer)1);
418             mis->page_requested_count++;
419             trace_postcopy_page_req_add(aligned, mis->page_requested_count);
420         }
421     }
422 
423     /*
424      * If the page is there, skip sending the message.  We don't even need the
425      * lock because as long as the page arrived, it'll be there forever.
426      */
427     if (received) {
428         return 0;
429     }
430 
431     return migrate_send_rp_message_req_pages(mis, rb, start);
432 }
433 
434 static bool migration_colo_enabled;
435 bool migration_incoming_colo_enabled(void)
436 {
437     return migration_colo_enabled;
438 }
439 
440 void migration_incoming_disable_colo(void)
441 {
442     ram_block_discard_disable(false);
443     migration_colo_enabled = false;
444 }
445 
446 int migration_incoming_enable_colo(void)
447 {
448     if (ram_block_discard_disable(true)) {
449         error_report("COLO: cannot disable RAM discard");
450         return -EBUSY;
451     }
452     migration_colo_enabled = true;
453     return 0;
454 }
455 
456 void migrate_add_address(SocketAddress *address)
457 {
458     MigrationIncomingState *mis = migration_incoming_get_current();
459 
460     QAPI_LIST_PREPEND(mis->socket_address_list,
461                       QAPI_CLONE(SocketAddress, address));
462 }
463 
464 static void qemu_start_incoming_migration(const char *uri, Error **errp)
465 {
466     const char *p = NULL;
467 
468     /* URI is not suitable for migration? */
469     if (!migration_channels_and_uri_compatible(uri, errp)) {
470         return;
471     }
472 
473     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
474     if (strstart(uri, "tcp:", &p) ||
475         strstart(uri, "unix:", NULL) ||
476         strstart(uri, "vsock:", NULL)) {
477         socket_start_incoming_migration(p ? p : uri, errp);
478 #ifdef CONFIG_RDMA
479     } else if (strstart(uri, "rdma:", &p)) {
480         rdma_start_incoming_migration(p, errp);
481 #endif
482     } else if (strstart(uri, "exec:", &p)) {
483         exec_start_incoming_migration(p, errp);
484     } else if (strstart(uri, "fd:", &p)) {
485         fd_start_incoming_migration(p, errp);
486     } else {
487         error_setg(errp, "unknown migration protocol: %s", uri);
488     }
489 }
490 
491 static void process_incoming_migration_bh(void *opaque)
492 {
493     Error *local_err = NULL;
494     MigrationIncomingState *mis = opaque;
495 
496     /* If capability late_block_activate is set:
497      * Only fire up the block code now if we're going to restart the
498      * VM, else 'cont' will do it.
499      * This causes file locking to happen; so we don't want it to happen
500      * unless we really are starting the VM.
501      */
502     if (!migrate_late_block_activate() ||
503          (autostart && (!global_state_received() ||
504             global_state_get_runstate() == RUN_STATE_RUNNING))) {
505         /* Make sure all file formats throw away their mutable metadata.
506          * If we get an error here, just don't restart the VM yet. */
507         bdrv_activate_all(&local_err);
508         if (local_err) {
509             error_report_err(local_err);
510             local_err = NULL;
511             autostart = false;
512         }
513     }
514 
515     /*
516      * This must happen after all error conditions are dealt with and
517      * we're sure the VM is going to be running on this host.
518      */
519     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
520 
521     multifd_load_shutdown();
522 
523     dirty_bitmap_mig_before_vm_start();
524 
525     if (!global_state_received() ||
526         global_state_get_runstate() == RUN_STATE_RUNNING) {
527         if (autostart) {
528             vm_start();
529         } else {
530             runstate_set(RUN_STATE_PAUSED);
531         }
532     } else if (migration_incoming_colo_enabled()) {
533         migration_incoming_disable_colo();
534         vm_start();
535     } else {
536         runstate_set(global_state_get_runstate());
537     }
538     /*
539      * This must happen after any state changes since as soon as an external
540      * observer sees this event they might start to prod at the VM assuming
541      * it's ready to use.
542      */
543     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
544                       MIGRATION_STATUS_COMPLETED);
545     qemu_bh_delete(mis->bh);
546     migration_incoming_state_destroy();
547 }
548 
549 static void coroutine_fn
550 process_incoming_migration_co(void *opaque)
551 {
552     MigrationIncomingState *mis = migration_incoming_get_current();
553     PostcopyState ps;
554     int ret;
555     Error *local_err = NULL;
556 
557     assert(mis->from_src_file);
558     mis->migration_incoming_co = qemu_coroutine_self();
559     mis->largest_page_size = qemu_ram_pagesize_largest();
560     postcopy_state_set(POSTCOPY_INCOMING_NONE);
561     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
562                       MIGRATION_STATUS_ACTIVE);
563     ret = qemu_loadvm_state(mis->from_src_file);
564 
565     ps = postcopy_state_get();
566     trace_process_incoming_migration_co_end(ret, ps);
567     if (ps != POSTCOPY_INCOMING_NONE) {
568         if (ps == POSTCOPY_INCOMING_ADVISE) {
569             /*
570              * Where a migration had postcopy enabled (and thus went to advise)
571              * but managed to complete within the precopy period, we can use
572              * the normal exit.
573              */
574             postcopy_ram_incoming_cleanup(mis);
575         } else if (ret >= 0) {
576             /*
577              * Postcopy was started, cleanup should happen at the end of the
578              * postcopy thread.
579              */
580             trace_process_incoming_migration_co_postcopy_end_main();
581             return;
582         }
583         /* Else if something went wrong then just fall out of the normal exit */
584     }
585 
586     /* we get COLO info, and know if we are in COLO mode */
587     if (!ret && migration_incoming_colo_enabled()) {
588         /* Make sure all file formats throw away their mutable metadata */
589         bdrv_activate_all(&local_err);
590         if (local_err) {
591             error_report_err(local_err);
592             goto fail;
593         }
594 
595         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
596              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
597         mis->have_colo_incoming_thread = true;
598         qemu_coroutine_yield();
599 
600         qemu_mutex_unlock_iothread();
601         /* Wait checkpoint incoming thread exit before free resource */
602         qemu_thread_join(&mis->colo_incoming_thread);
603         qemu_mutex_lock_iothread();
604         /* We hold the global iothread lock, so it is safe here */
605         colo_release_ram_cache();
606     }
607 
608     if (ret < 0) {
609         error_report("load of migration failed: %s", strerror(-ret));
610         goto fail;
611     }
612     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
613     qemu_bh_schedule(mis->bh);
614     mis->migration_incoming_co = NULL;
615     return;
616 fail:
617     local_err = NULL;
618     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
619                       MIGRATION_STATUS_FAILED);
620     qemu_fclose(mis->from_src_file);
621 
622     multifd_load_cleanup();
623 
624     exit(EXIT_FAILURE);
625 }
626 
627 /**
628  * migration_incoming_setup: Setup incoming migration
629  * @f: file for main migration channel
630  * @errp: where to put errors
631  *
632  * Returns: %true on success, %false on error.
633  */
634 static bool migration_incoming_setup(QEMUFile *f, Error **errp)
635 {
636     MigrationIncomingState *mis = migration_incoming_get_current();
637 
638     if (!mis->from_src_file) {
639         mis->from_src_file = f;
640     }
641     qemu_file_set_blocking(f, false);
642     return true;
643 }
644 
645 void migration_incoming_process(void)
646 {
647     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
648     qemu_coroutine_enter(co);
649 }
650 
651 /* Returns true if recovered from a paused migration, otherwise false */
652 static bool postcopy_try_recover(void)
653 {
654     MigrationIncomingState *mis = migration_incoming_get_current();
655 
656     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
657         /* Resumed from a paused postcopy migration */
658 
659         /* This should be set already in migration_incoming_setup() */
660         assert(mis->from_src_file);
661         /* Postcopy has standalone thread to do vm load */
662         qemu_file_set_blocking(mis->from_src_file, true);
663 
664         /* Re-configure the return path */
665         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
666 
667         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
668                           MIGRATION_STATUS_POSTCOPY_RECOVER);
669 
670         /*
671          * Here, we only wake up the main loading thread (while the
672          * rest threads will still be waiting), so that we can receive
673          * commands from source now, and answer it if needed. The
674          * rest threads will be woken up afterwards until we are sure
675          * that source is ready to reply to page requests.
676          */
677         qemu_sem_post(&mis->postcopy_pause_sem_dst);
678         return true;
679     }
680 
681     return false;
682 }
683 
684 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
685 {
686     if (!migration_incoming_setup(f, errp)) {
687         return;
688     }
689     if (postcopy_try_recover()) {
690         return;
691     }
692     migration_incoming_process();
693 }
694 
695 /*
696  * Returns true when we want to start a new incoming migration process,
697  * false otherwise.
698  */
699 static bool migration_should_start_incoming(bool main_channel)
700 {
701     /* Multifd doesn't start unless all channels are established */
702     if (migrate_multifd()) {
703         return migration_has_all_channels();
704     }
705 
706     /* Preempt channel only starts when the main channel is created */
707     if (migrate_postcopy_preempt()) {
708         return main_channel;
709     }
710 
711     /*
712      * For all the rest types of migration, we should only reach here when
713      * it's the main channel that's being created, and we should always
714      * proceed with this channel.
715      */
716     assert(main_channel);
717     return true;
718 }
719 
720 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
721 {
722     MigrationIncomingState *mis = migration_incoming_get_current();
723     Error *local_err = NULL;
724     QEMUFile *f;
725     bool default_channel = true;
726     uint32_t channel_magic = 0;
727     int ret = 0;
728 
729     if (migrate_multifd() && !migrate_postcopy_ram() &&
730         qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
731         /*
732          * With multiple channels, it is possible that we receive channels
733          * out of order on destination side, causing incorrect mapping of
734          * source channels on destination side. Check channel MAGIC to
735          * decide type of channel. Please note this is best effort, postcopy
736          * preempt channel does not send any magic number so avoid it for
737          * postcopy live migration. Also tls live migration already does
738          * tls handshake while initializing main channel so with tls this
739          * issue is not possible.
740          */
741         ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
742                                           sizeof(channel_magic), &local_err);
743 
744         if (ret != 0) {
745             error_propagate(errp, local_err);
746             return;
747         }
748 
749         default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
750     } else {
751         default_channel = !mis->from_src_file;
752     }
753 
754     if (multifd_load_setup(errp) != 0) {
755         error_setg(errp, "Failed to setup multifd channels");
756         return;
757     }
758 
759     if (default_channel) {
760         f = qemu_file_new_input(ioc);
761 
762         if (!migration_incoming_setup(f, errp)) {
763             return;
764         }
765     } else {
766         /* Multiple connections */
767         assert(migration_needs_multiple_sockets());
768         if (migrate_multifd()) {
769             multifd_recv_new_channel(ioc, &local_err);
770         } else {
771             assert(migrate_postcopy_preempt());
772             f = qemu_file_new_input(ioc);
773             postcopy_preempt_new_channel(mis, f);
774         }
775         if (local_err) {
776             error_propagate(errp, local_err);
777             return;
778         }
779     }
780 
781     if (migration_should_start_incoming(default_channel)) {
782         /* If it's a recovery, we're done */
783         if (postcopy_try_recover()) {
784             return;
785         }
786         migration_incoming_process();
787     }
788 }
789 
790 /**
791  * @migration_has_all_channels: We have received all channels that we need
792  *
793  * Returns true when we have got connections to all the channels that
794  * we need for migration.
795  */
796 bool migration_has_all_channels(void)
797 {
798     MigrationIncomingState *mis = migration_incoming_get_current();
799 
800     if (!mis->from_src_file) {
801         return false;
802     }
803 
804     if (migrate_multifd()) {
805         return multifd_recv_all_channels_created();
806     }
807 
808     if (migrate_postcopy_preempt()) {
809         return mis->postcopy_qemufile_dst != NULL;
810     }
811 
812     return true;
813 }
814 
815 /*
816  * Send a 'SHUT' message on the return channel with the given value
817  * to indicate that we've finished with the RP.  Non-0 value indicates
818  * error.
819  */
820 void migrate_send_rp_shut(MigrationIncomingState *mis,
821                           uint32_t value)
822 {
823     uint32_t buf;
824 
825     buf = cpu_to_be32(value);
826     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
827 }
828 
829 /*
830  * Send a 'PONG' message on the return channel with the given value
831  * (normally in response to a 'PING')
832  */
833 void migrate_send_rp_pong(MigrationIncomingState *mis,
834                           uint32_t value)
835 {
836     uint32_t buf;
837 
838     buf = cpu_to_be32(value);
839     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
840 }
841 
842 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
843                                  char *block_name)
844 {
845     char buf[512];
846     int len;
847     int64_t res;
848 
849     /*
850      * First, we send the header part. It contains only the len of
851      * idstr, and the idstr itself.
852      */
853     len = strlen(block_name);
854     buf[0] = len;
855     memcpy(buf + 1, block_name, len);
856 
857     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
858         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
859                      __func__);
860         return;
861     }
862 
863     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
864 
865     /*
866      * Next, we dump the received bitmap to the stream.
867      *
868      * TODO: currently we are safe since we are the only one that is
869      * using the to_src_file handle (fault thread is still paused),
870      * and it's ok even not taking the mutex. However the best way is
871      * to take the lock before sending the message header, and release
872      * the lock after sending the bitmap.
873      */
874     qemu_mutex_lock(&mis->rp_mutex);
875     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
876     qemu_mutex_unlock(&mis->rp_mutex);
877 
878     trace_migrate_send_rp_recv_bitmap(block_name, res);
879 }
880 
881 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
882 {
883     uint32_t buf;
884 
885     buf = cpu_to_be32(value);
886     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
887 }
888 
889 /*
890  * Return true if we're already in the middle of a migration
891  * (i.e. any of the active or setup states)
892  */
893 bool migration_is_setup_or_active(int state)
894 {
895     switch (state) {
896     case MIGRATION_STATUS_ACTIVE:
897     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
898     case MIGRATION_STATUS_POSTCOPY_PAUSED:
899     case MIGRATION_STATUS_POSTCOPY_RECOVER:
900     case MIGRATION_STATUS_SETUP:
901     case MIGRATION_STATUS_PRE_SWITCHOVER:
902     case MIGRATION_STATUS_DEVICE:
903     case MIGRATION_STATUS_WAIT_UNPLUG:
904     case MIGRATION_STATUS_COLO:
905         return true;
906 
907     default:
908         return false;
909 
910     }
911 }
912 
913 bool migration_is_running(int state)
914 {
915     switch (state) {
916     case MIGRATION_STATUS_ACTIVE:
917     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
918     case MIGRATION_STATUS_POSTCOPY_PAUSED:
919     case MIGRATION_STATUS_POSTCOPY_RECOVER:
920     case MIGRATION_STATUS_SETUP:
921     case MIGRATION_STATUS_PRE_SWITCHOVER:
922     case MIGRATION_STATUS_DEVICE:
923     case MIGRATION_STATUS_WAIT_UNPLUG:
924     case MIGRATION_STATUS_CANCELLING:
925         return true;
926 
927     default:
928         return false;
929 
930     }
931 }
932 
933 static bool migrate_show_downtime(MigrationState *s)
934 {
935     return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
936 }
937 
938 static void populate_time_info(MigrationInfo *info, MigrationState *s)
939 {
940     info->has_status = true;
941     info->has_setup_time = true;
942     info->setup_time = s->setup_time;
943 
944     if (s->state == MIGRATION_STATUS_COMPLETED) {
945         info->has_total_time = true;
946         info->total_time = s->total_time;
947     } else {
948         info->has_total_time = true;
949         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
950                            s->start_time;
951     }
952 
953     if (migrate_show_downtime(s)) {
954         info->has_downtime = true;
955         info->downtime = s->downtime;
956     } else {
957         info->has_expected_downtime = true;
958         info->expected_downtime = s->expected_downtime;
959     }
960 }
961 
962 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
963 {
964     size_t page_size = qemu_target_page_size();
965 
966     info->ram = g_malloc0(sizeof(*info->ram));
967     info->ram->transferred = stat64_get(&ram_counters.transferred);
968     info->ram->total = ram_bytes_total();
969     info->ram->duplicate = stat64_get(&ram_counters.zero_pages);
970     /* legacy value.  It is not used anymore */
971     info->ram->skipped = 0;
972     info->ram->normal = stat64_get(&ram_counters.normal_pages);
973     info->ram->normal_bytes = info->ram->normal * page_size;
974     info->ram->mbps = s->mbps;
975     info->ram->dirty_sync_count =
976         stat64_get(&ram_counters.dirty_sync_count);
977     info->ram->dirty_sync_missed_zero_copy =
978         stat64_get(&ram_counters.dirty_sync_missed_zero_copy);
979     info->ram->postcopy_requests =
980         stat64_get(&ram_counters.postcopy_requests);
981     info->ram->page_size = page_size;
982     info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes);
983     info->ram->pages_per_second = s->pages_per_second;
984     info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes);
985     info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes);
986     info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes);
987 
988     if (migrate_xbzrle()) {
989         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
990         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
991         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
992         info->xbzrle_cache->pages = xbzrle_counters.pages;
993         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
994         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
995         info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
996         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
997     }
998 
999     if (migrate_compress()) {
1000         info->compression = g_malloc0(sizeof(*info->compression));
1001         info->compression->pages = compression_counters.pages;
1002         info->compression->busy = compression_counters.busy;
1003         info->compression->busy_rate = compression_counters.busy_rate;
1004         info->compression->compressed_size =
1005                                     compression_counters.compressed_size;
1006         info->compression->compression_rate =
1007                                     compression_counters.compression_rate;
1008     }
1009 
1010     if (cpu_throttle_active()) {
1011         info->has_cpu_throttle_percentage = true;
1012         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1013     }
1014 
1015     if (s->state != MIGRATION_STATUS_COMPLETED) {
1016         info->ram->remaining = ram_bytes_remaining();
1017         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
1018     }
1019 }
1020 
1021 static void populate_disk_info(MigrationInfo *info)
1022 {
1023     if (blk_mig_active()) {
1024         info->disk = g_malloc0(sizeof(*info->disk));
1025         info->disk->transferred = blk_mig_bytes_transferred();
1026         info->disk->remaining = blk_mig_bytes_remaining();
1027         info->disk->total = blk_mig_bytes_total();
1028     }
1029 }
1030 
1031 static void fill_source_migration_info(MigrationInfo *info)
1032 {
1033     MigrationState *s = migrate_get_current();
1034     int state = qatomic_read(&s->state);
1035     GSList *cur_blocker = migration_blockers;
1036 
1037     info->blocked_reasons = NULL;
1038 
1039     /*
1040      * There are two types of reasons a migration might be blocked;
1041      * a) devices marked in VMState as non-migratable, and
1042      * b) Explicit migration blockers
1043      * We need to add both of them here.
1044      */
1045     qemu_savevm_non_migratable_list(&info->blocked_reasons);
1046 
1047     while (cur_blocker) {
1048         QAPI_LIST_PREPEND(info->blocked_reasons,
1049                           g_strdup(error_get_pretty(cur_blocker->data)));
1050         cur_blocker = g_slist_next(cur_blocker);
1051     }
1052     info->has_blocked_reasons = info->blocked_reasons != NULL;
1053 
1054     switch (state) {
1055     case MIGRATION_STATUS_NONE:
1056         /* no migration has happened ever */
1057         /* do not overwrite destination migration status */
1058         return;
1059     case MIGRATION_STATUS_SETUP:
1060         info->has_status = true;
1061         info->has_total_time = false;
1062         break;
1063     case MIGRATION_STATUS_ACTIVE:
1064     case MIGRATION_STATUS_CANCELLING:
1065     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1066     case MIGRATION_STATUS_PRE_SWITCHOVER:
1067     case MIGRATION_STATUS_DEVICE:
1068     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1069     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1070         /* TODO add some postcopy stats */
1071         populate_time_info(info, s);
1072         populate_ram_info(info, s);
1073         populate_disk_info(info);
1074         populate_vfio_info(info);
1075         break;
1076     case MIGRATION_STATUS_COLO:
1077         info->has_status = true;
1078         /* TODO: display COLO specific information (checkpoint info etc.) */
1079         break;
1080     case MIGRATION_STATUS_COMPLETED:
1081         populate_time_info(info, s);
1082         populate_ram_info(info, s);
1083         populate_vfio_info(info);
1084         break;
1085     case MIGRATION_STATUS_FAILED:
1086         info->has_status = true;
1087         if (s->error) {
1088             info->error_desc = g_strdup(error_get_pretty(s->error));
1089         }
1090         break;
1091     case MIGRATION_STATUS_CANCELLED:
1092         info->has_status = true;
1093         break;
1094     case MIGRATION_STATUS_WAIT_UNPLUG:
1095         info->has_status = true;
1096         break;
1097     }
1098     info->status = state;
1099 }
1100 
1101 static void fill_destination_migration_info(MigrationInfo *info)
1102 {
1103     MigrationIncomingState *mis = migration_incoming_get_current();
1104 
1105     if (mis->socket_address_list) {
1106         info->has_socket_address = true;
1107         info->socket_address =
1108             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1109     }
1110 
1111     switch (mis->state) {
1112     case MIGRATION_STATUS_NONE:
1113         return;
1114     case MIGRATION_STATUS_SETUP:
1115     case MIGRATION_STATUS_CANCELLING:
1116     case MIGRATION_STATUS_CANCELLED:
1117     case MIGRATION_STATUS_ACTIVE:
1118     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1119     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1120     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1121     case MIGRATION_STATUS_FAILED:
1122     case MIGRATION_STATUS_COLO:
1123         info->has_status = true;
1124         break;
1125     case MIGRATION_STATUS_COMPLETED:
1126         info->has_status = true;
1127         fill_destination_postcopy_migration_info(info);
1128         break;
1129     }
1130     info->status = mis->state;
1131 }
1132 
1133 MigrationInfo *qmp_query_migrate(Error **errp)
1134 {
1135     MigrationInfo *info = g_malloc0(sizeof(*info));
1136 
1137     fill_destination_migration_info(info);
1138     fill_source_migration_info(info);
1139 
1140     return info;
1141 }
1142 
1143 /*
1144  * Check whether the parameters are valid. Error will be put into errp
1145  * (if provided). Return true if valid, otherwise false.
1146  */
1147 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1148 {
1149     if (params->has_compress_level &&
1150         (params->compress_level > 9)) {
1151         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1152                    "a value between 0 and 9");
1153         return false;
1154     }
1155 
1156     if (params->has_compress_threads && (params->compress_threads < 1)) {
1157         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1158                    "compress_threads",
1159                    "a value between 1 and 255");
1160         return false;
1161     }
1162 
1163     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1164         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1165                    "decompress_threads",
1166                    "a value between 1 and 255");
1167         return false;
1168     }
1169 
1170     if (params->has_throttle_trigger_threshold &&
1171         (params->throttle_trigger_threshold < 1 ||
1172          params->throttle_trigger_threshold > 100)) {
1173         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1174                    "throttle_trigger_threshold",
1175                    "an integer in the range of 1 to 100");
1176         return false;
1177     }
1178 
1179     if (params->has_cpu_throttle_initial &&
1180         (params->cpu_throttle_initial < 1 ||
1181          params->cpu_throttle_initial > 99)) {
1182         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1183                    "cpu_throttle_initial",
1184                    "an integer in the range of 1 to 99");
1185         return false;
1186     }
1187 
1188     if (params->has_cpu_throttle_increment &&
1189         (params->cpu_throttle_increment < 1 ||
1190          params->cpu_throttle_increment > 99)) {
1191         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1192                    "cpu_throttle_increment",
1193                    "an integer in the range of 1 to 99");
1194         return false;
1195     }
1196 
1197     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1198         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1199                    "max_bandwidth",
1200                    "an integer in the range of 0 to "stringify(SIZE_MAX)
1201                    " bytes/second");
1202         return false;
1203     }
1204 
1205     if (params->has_downtime_limit &&
1206         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1207         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1208                    "downtime_limit",
1209                    "an integer in the range of 0 to "
1210                     stringify(MAX_MIGRATE_DOWNTIME)" ms");
1211         return false;
1212     }
1213 
1214     /* x_checkpoint_delay is now always positive */
1215 
1216     if (params->has_multifd_channels && (params->multifd_channels < 1)) {
1217         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1218                    "multifd_channels",
1219                    "a value between 1 and 255");
1220         return false;
1221     }
1222 
1223     if (params->has_multifd_zlib_level &&
1224         (params->multifd_zlib_level > 9)) {
1225         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
1226                    "a value between 0 and 9");
1227         return false;
1228     }
1229 
1230     if (params->has_multifd_zstd_level &&
1231         (params->multifd_zstd_level > 20)) {
1232         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
1233                    "a value between 0 and 20");
1234         return false;
1235     }
1236 
1237     if (params->has_xbzrle_cache_size &&
1238         (params->xbzrle_cache_size < qemu_target_page_size() ||
1239          !is_power_of_2(params->xbzrle_cache_size))) {
1240         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1241                    "xbzrle_cache_size",
1242                    "a power of two no less than the target page size");
1243         return false;
1244     }
1245 
1246     if (params->has_max_cpu_throttle &&
1247         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1248          params->max_cpu_throttle > 99)) {
1249         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1250                    "max_cpu_throttle",
1251                    "an integer in the range of cpu_throttle_initial to 99");
1252         return false;
1253     }
1254 
1255     if (params->has_announce_initial &&
1256         params->announce_initial > 100000) {
1257         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1258                    "announce_initial",
1259                    "a value between 0 and 100000");
1260         return false;
1261     }
1262     if (params->has_announce_max &&
1263         params->announce_max > 100000) {
1264         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1265                    "announce_max",
1266                    "a value between 0 and 100000");
1267        return false;
1268     }
1269     if (params->has_announce_rounds &&
1270         params->announce_rounds > 1000) {
1271         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1272                    "announce_rounds",
1273                    "a value between 0 and 1000");
1274        return false;
1275     }
1276     if (params->has_announce_step &&
1277         (params->announce_step < 1 ||
1278         params->announce_step > 10000)) {
1279         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1280                    "announce_step",
1281                    "a value between 0 and 10000");
1282        return false;
1283     }
1284 
1285     if (params->has_block_bitmap_mapping &&
1286         !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
1287         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
1288         return false;
1289     }
1290 
1291 #ifdef CONFIG_LINUX
1292     if (migrate_zero_copy_send() &&
1293         ((params->has_multifd_compression && params->multifd_compression) ||
1294          (params->tls_creds && *params->tls_creds))) {
1295         error_setg(errp,
1296                    "Zero copy only available for non-compressed non-TLS multifd migration");
1297         return false;
1298     }
1299 #endif
1300 
1301     return true;
1302 }
1303 
1304 static void migrate_params_test_apply(MigrateSetParameters *params,
1305                                       MigrationParameters *dest)
1306 {
1307     *dest = migrate_get_current()->parameters;
1308 
1309     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1310 
1311     if (params->has_compress_level) {
1312         dest->compress_level = params->compress_level;
1313     }
1314 
1315     if (params->has_compress_threads) {
1316         dest->compress_threads = params->compress_threads;
1317     }
1318 
1319     if (params->has_compress_wait_thread) {
1320         dest->compress_wait_thread = params->compress_wait_thread;
1321     }
1322 
1323     if (params->has_decompress_threads) {
1324         dest->decompress_threads = params->decompress_threads;
1325     }
1326 
1327     if (params->has_throttle_trigger_threshold) {
1328         dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
1329     }
1330 
1331     if (params->has_cpu_throttle_initial) {
1332         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1333     }
1334 
1335     if (params->has_cpu_throttle_increment) {
1336         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1337     }
1338 
1339     if (params->has_cpu_throttle_tailslow) {
1340         dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1341     }
1342 
1343     if (params->tls_creds) {
1344         assert(params->tls_creds->type == QTYPE_QSTRING);
1345         dest->tls_creds = params->tls_creds->u.s;
1346     }
1347 
1348     if (params->tls_hostname) {
1349         assert(params->tls_hostname->type == QTYPE_QSTRING);
1350         dest->tls_hostname = params->tls_hostname->u.s;
1351     }
1352 
1353     if (params->has_max_bandwidth) {
1354         dest->max_bandwidth = params->max_bandwidth;
1355     }
1356 
1357     if (params->has_downtime_limit) {
1358         dest->downtime_limit = params->downtime_limit;
1359     }
1360 
1361     if (params->has_x_checkpoint_delay) {
1362         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1363     }
1364 
1365     if (params->has_block_incremental) {
1366         dest->block_incremental = params->block_incremental;
1367     }
1368     if (params->has_multifd_channels) {
1369         dest->multifd_channels = params->multifd_channels;
1370     }
1371     if (params->has_multifd_compression) {
1372         dest->multifd_compression = params->multifd_compression;
1373     }
1374     if (params->has_xbzrle_cache_size) {
1375         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1376     }
1377     if (params->has_max_postcopy_bandwidth) {
1378         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1379     }
1380     if (params->has_max_cpu_throttle) {
1381         dest->max_cpu_throttle = params->max_cpu_throttle;
1382     }
1383     if (params->has_announce_initial) {
1384         dest->announce_initial = params->announce_initial;
1385     }
1386     if (params->has_announce_max) {
1387         dest->announce_max = params->announce_max;
1388     }
1389     if (params->has_announce_rounds) {
1390         dest->announce_rounds = params->announce_rounds;
1391     }
1392     if (params->has_announce_step) {
1393         dest->announce_step = params->announce_step;
1394     }
1395 
1396     if (params->has_block_bitmap_mapping) {
1397         dest->has_block_bitmap_mapping = true;
1398         dest->block_bitmap_mapping = params->block_bitmap_mapping;
1399     }
1400 }
1401 
1402 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1403 {
1404     MigrationState *s = migrate_get_current();
1405 
1406     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1407 
1408     if (params->has_compress_level) {
1409         s->parameters.compress_level = params->compress_level;
1410     }
1411 
1412     if (params->has_compress_threads) {
1413         s->parameters.compress_threads = params->compress_threads;
1414     }
1415 
1416     if (params->has_compress_wait_thread) {
1417         s->parameters.compress_wait_thread = params->compress_wait_thread;
1418     }
1419 
1420     if (params->has_decompress_threads) {
1421         s->parameters.decompress_threads = params->decompress_threads;
1422     }
1423 
1424     if (params->has_throttle_trigger_threshold) {
1425         s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
1426     }
1427 
1428     if (params->has_cpu_throttle_initial) {
1429         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1430     }
1431 
1432     if (params->has_cpu_throttle_increment) {
1433         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1434     }
1435 
1436     if (params->has_cpu_throttle_tailslow) {
1437         s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1438     }
1439 
1440     if (params->tls_creds) {
1441         g_free(s->parameters.tls_creds);
1442         assert(params->tls_creds->type == QTYPE_QSTRING);
1443         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1444     }
1445 
1446     if (params->tls_hostname) {
1447         g_free(s->parameters.tls_hostname);
1448         assert(params->tls_hostname->type == QTYPE_QSTRING);
1449         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1450     }
1451 
1452     if (params->tls_authz) {
1453         g_free(s->parameters.tls_authz);
1454         assert(params->tls_authz->type == QTYPE_QSTRING);
1455         s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
1456     }
1457 
1458     if (params->has_max_bandwidth) {
1459         s->parameters.max_bandwidth = params->max_bandwidth;
1460         if (s->to_dst_file && !migration_in_postcopy()) {
1461             qemu_file_set_rate_limit(s->to_dst_file,
1462                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1463         }
1464     }
1465 
1466     if (params->has_downtime_limit) {
1467         s->parameters.downtime_limit = params->downtime_limit;
1468     }
1469 
1470     if (params->has_x_checkpoint_delay) {
1471         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1472         if (migration_in_colo_state()) {
1473             colo_checkpoint_notify(s);
1474         }
1475     }
1476 
1477     if (params->has_block_incremental) {
1478         s->parameters.block_incremental = params->block_incremental;
1479     }
1480     if (params->has_multifd_channels) {
1481         s->parameters.multifd_channels = params->multifd_channels;
1482     }
1483     if (params->has_multifd_compression) {
1484         s->parameters.multifd_compression = params->multifd_compression;
1485     }
1486     if (params->has_xbzrle_cache_size) {
1487         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1488         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1489     }
1490     if (params->has_max_postcopy_bandwidth) {
1491         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1492         if (s->to_dst_file && migration_in_postcopy()) {
1493             qemu_file_set_rate_limit(s->to_dst_file,
1494                     s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
1495         }
1496     }
1497     if (params->has_max_cpu_throttle) {
1498         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1499     }
1500     if (params->has_announce_initial) {
1501         s->parameters.announce_initial = params->announce_initial;
1502     }
1503     if (params->has_announce_max) {
1504         s->parameters.announce_max = params->announce_max;
1505     }
1506     if (params->has_announce_rounds) {
1507         s->parameters.announce_rounds = params->announce_rounds;
1508     }
1509     if (params->has_announce_step) {
1510         s->parameters.announce_step = params->announce_step;
1511     }
1512 
1513     if (params->has_block_bitmap_mapping) {
1514         qapi_free_BitmapMigrationNodeAliasList(
1515             s->parameters.block_bitmap_mapping);
1516 
1517         s->parameters.has_block_bitmap_mapping = true;
1518         s->parameters.block_bitmap_mapping =
1519             QAPI_CLONE(BitmapMigrationNodeAliasList,
1520                        params->block_bitmap_mapping);
1521     }
1522 }
1523 
1524 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1525 {
1526     MigrationParameters tmp;
1527 
1528     /* TODO Rewrite "" to null instead */
1529     if (params->tls_creds
1530         && params->tls_creds->type == QTYPE_QNULL) {
1531         qobject_unref(params->tls_creds->u.n);
1532         params->tls_creds->type = QTYPE_QSTRING;
1533         params->tls_creds->u.s = strdup("");
1534     }
1535     /* TODO Rewrite "" to null instead */
1536     if (params->tls_hostname
1537         && params->tls_hostname->type == QTYPE_QNULL) {
1538         qobject_unref(params->tls_hostname->u.n);
1539         params->tls_hostname->type = QTYPE_QSTRING;
1540         params->tls_hostname->u.s = strdup("");
1541     }
1542 
1543     migrate_params_test_apply(params, &tmp);
1544 
1545     if (!migrate_params_check(&tmp, errp)) {
1546         /* Invalid parameter */
1547         return;
1548     }
1549 
1550     migrate_params_apply(params, errp);
1551 }
1552 
1553 
1554 void qmp_migrate_start_postcopy(Error **errp)
1555 {
1556     MigrationState *s = migrate_get_current();
1557 
1558     if (!migrate_postcopy()) {
1559         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1560                          " the start of migration");
1561         return;
1562     }
1563 
1564     if (s->state == MIGRATION_STATUS_NONE) {
1565         error_setg(errp, "Postcopy must be started after migration has been"
1566                          " started");
1567         return;
1568     }
1569     /*
1570      * we don't error if migration has finished since that would be racy
1571      * with issuing this command.
1572      */
1573     qatomic_set(&s->start_postcopy, true);
1574 }
1575 
1576 /* shared migration helpers */
1577 
1578 void migrate_set_state(int *state, int old_state, int new_state)
1579 {
1580     assert(new_state < MIGRATION_STATUS__MAX);
1581     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1582         trace_migrate_set_state(MigrationStatus_str(new_state));
1583         migrate_generate_event(new_state);
1584     }
1585 }
1586 
1587 static void migrate_set_block_incremental(MigrationState *s, bool value)
1588 {
1589     s->parameters.block_incremental = value;
1590 }
1591 
1592 static void block_cleanup_parameters(MigrationState *s)
1593 {
1594     if (s->must_remove_block_options) {
1595         /* setting to false can never fail */
1596         migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort);
1597         migrate_set_block_incremental(s, false);
1598         s->must_remove_block_options = false;
1599     }
1600 }
1601 
1602 static void migrate_fd_cleanup(MigrationState *s)
1603 {
1604     qemu_bh_delete(s->cleanup_bh);
1605     s->cleanup_bh = NULL;
1606 
1607     g_free(s->hostname);
1608     s->hostname = NULL;
1609     json_writer_free(s->vmdesc);
1610     s->vmdesc = NULL;
1611 
1612     qemu_savevm_state_cleanup();
1613 
1614     if (s->to_dst_file) {
1615         QEMUFile *tmp;
1616 
1617         trace_migrate_fd_cleanup();
1618         qemu_mutex_unlock_iothread();
1619         if (s->migration_thread_running) {
1620             qemu_thread_join(&s->thread);
1621             s->migration_thread_running = false;
1622         }
1623         qemu_mutex_lock_iothread();
1624 
1625         multifd_save_cleanup();
1626         qemu_mutex_lock(&s->qemu_file_lock);
1627         tmp = s->to_dst_file;
1628         s->to_dst_file = NULL;
1629         qemu_mutex_unlock(&s->qemu_file_lock);
1630         /*
1631          * Close the file handle without the lock to make sure the
1632          * critical section won't block for long.
1633          */
1634         migration_ioc_unregister_yank_from_file(tmp);
1635         qemu_fclose(tmp);
1636     }
1637 
1638     if (s->postcopy_qemufile_src) {
1639         migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
1640         qemu_fclose(s->postcopy_qemufile_src);
1641         s->postcopy_qemufile_src = NULL;
1642     }
1643 
1644     assert(!migration_is_active(s));
1645 
1646     if (s->state == MIGRATION_STATUS_CANCELLING) {
1647         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1648                           MIGRATION_STATUS_CANCELLED);
1649     }
1650 
1651     if (s->error) {
1652         /* It is used on info migrate.  We can't free it */
1653         error_report_err(error_copy(s->error));
1654     }
1655     notifier_list_notify(&migration_state_notifiers, s);
1656     block_cleanup_parameters(s);
1657     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1658 }
1659 
1660 static void migrate_fd_cleanup_schedule(MigrationState *s)
1661 {
1662     /*
1663      * Ref the state for bh, because it may be called when
1664      * there're already no other refs
1665      */
1666     object_ref(OBJECT(s));
1667     qemu_bh_schedule(s->cleanup_bh);
1668 }
1669 
1670 static void migrate_fd_cleanup_bh(void *opaque)
1671 {
1672     MigrationState *s = opaque;
1673     migrate_fd_cleanup(s);
1674     object_unref(OBJECT(s));
1675 }
1676 
1677 void migrate_set_error(MigrationState *s, const Error *error)
1678 {
1679     QEMU_LOCK_GUARD(&s->error_mutex);
1680     if (!s->error) {
1681         s->error = error_copy(error);
1682     }
1683 }
1684 
1685 static void migrate_error_free(MigrationState *s)
1686 {
1687     QEMU_LOCK_GUARD(&s->error_mutex);
1688     if (s->error) {
1689         error_free(s->error);
1690         s->error = NULL;
1691     }
1692 }
1693 
1694 void migrate_fd_error(MigrationState *s, const Error *error)
1695 {
1696     trace_migrate_fd_error(error_get_pretty(error));
1697     assert(s->to_dst_file == NULL);
1698     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1699                       MIGRATION_STATUS_FAILED);
1700     migrate_set_error(s, error);
1701 }
1702 
1703 static void migrate_fd_cancel(MigrationState *s)
1704 {
1705     int old_state ;
1706     QEMUFile *f = migrate_get_current()->to_dst_file;
1707     trace_migrate_fd_cancel();
1708 
1709     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1710         if (s->rp_state.from_dst_file) {
1711             /* shutdown the rp socket, so causing the rp thread to shutdown */
1712             qemu_file_shutdown(s->rp_state.from_dst_file);
1713         }
1714     }
1715 
1716     do {
1717         old_state = s->state;
1718         if (!migration_is_running(old_state)) {
1719             break;
1720         }
1721         /* If the migration is paused, kick it out of the pause */
1722         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1723             qemu_sem_post(&s->pause_sem);
1724         }
1725         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1726     } while (s->state != MIGRATION_STATUS_CANCELLING);
1727 
1728     /*
1729      * If we're unlucky the migration code might be stuck somewhere in a
1730      * send/write while the network has failed and is waiting to timeout;
1731      * if we've got shutdown(2) available then we can force it to quit.
1732      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1733      * called in a bh, so there is no race against this cancel.
1734      */
1735     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1736         qemu_file_shutdown(f);
1737     }
1738     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1739         Error *local_err = NULL;
1740 
1741         bdrv_activate_all(&local_err);
1742         if (local_err) {
1743             error_report_err(local_err);
1744         } else {
1745             s->block_inactive = false;
1746         }
1747     }
1748 }
1749 
1750 void add_migration_state_change_notifier(Notifier *notify)
1751 {
1752     notifier_list_add(&migration_state_notifiers, notify);
1753 }
1754 
1755 void remove_migration_state_change_notifier(Notifier *notify)
1756 {
1757     notifier_remove(notify);
1758 }
1759 
1760 bool migration_in_setup(MigrationState *s)
1761 {
1762     return s->state == MIGRATION_STATUS_SETUP;
1763 }
1764 
1765 bool migration_has_finished(MigrationState *s)
1766 {
1767     return s->state == MIGRATION_STATUS_COMPLETED;
1768 }
1769 
1770 bool migration_has_failed(MigrationState *s)
1771 {
1772     return (s->state == MIGRATION_STATUS_CANCELLED ||
1773             s->state == MIGRATION_STATUS_FAILED);
1774 }
1775 
1776 bool migration_in_postcopy(void)
1777 {
1778     MigrationState *s = migrate_get_current();
1779 
1780     switch (s->state) {
1781     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1782     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1783     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1784         return true;
1785     default:
1786         return false;
1787     }
1788 }
1789 
1790 bool migration_in_postcopy_after_devices(MigrationState *s)
1791 {
1792     return migration_in_postcopy() && s->postcopy_after_devices;
1793 }
1794 
1795 bool migration_in_incoming_postcopy(void)
1796 {
1797     PostcopyState ps = postcopy_state_get();
1798 
1799     return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
1800 }
1801 
1802 bool migration_incoming_postcopy_advised(void)
1803 {
1804     PostcopyState ps = postcopy_state_get();
1805 
1806     return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
1807 }
1808 
1809 bool migration_in_bg_snapshot(void)
1810 {
1811     MigrationState *s = migrate_get_current();
1812 
1813     return migrate_background_snapshot() &&
1814             migration_is_setup_or_active(s->state);
1815 }
1816 
1817 bool migration_is_idle(void)
1818 {
1819     MigrationState *s = current_migration;
1820 
1821     if (!s) {
1822         return true;
1823     }
1824 
1825     switch (s->state) {
1826     case MIGRATION_STATUS_NONE:
1827     case MIGRATION_STATUS_CANCELLED:
1828     case MIGRATION_STATUS_COMPLETED:
1829     case MIGRATION_STATUS_FAILED:
1830         return true;
1831     case MIGRATION_STATUS_SETUP:
1832     case MIGRATION_STATUS_CANCELLING:
1833     case MIGRATION_STATUS_ACTIVE:
1834     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1835     case MIGRATION_STATUS_COLO:
1836     case MIGRATION_STATUS_PRE_SWITCHOVER:
1837     case MIGRATION_STATUS_DEVICE:
1838     case MIGRATION_STATUS_WAIT_UNPLUG:
1839         return false;
1840     case MIGRATION_STATUS__MAX:
1841         g_assert_not_reached();
1842     }
1843 
1844     return false;
1845 }
1846 
1847 bool migration_is_active(MigrationState *s)
1848 {
1849     return (s->state == MIGRATION_STATUS_ACTIVE ||
1850             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1851 }
1852 
1853 void migrate_init(MigrationState *s)
1854 {
1855     /*
1856      * Reinitialise all migration state, except
1857      * parameters/capabilities that the user set, and
1858      * locks.
1859      */
1860     s->cleanup_bh = 0;
1861     s->vm_start_bh = 0;
1862     s->to_dst_file = NULL;
1863     s->state = MIGRATION_STATUS_NONE;
1864     s->rp_state.from_dst_file = NULL;
1865     s->rp_state.error = false;
1866     s->mbps = 0.0;
1867     s->pages_per_second = 0.0;
1868     s->downtime = 0;
1869     s->expected_downtime = 0;
1870     s->setup_time = 0;
1871     s->start_postcopy = false;
1872     s->postcopy_after_devices = false;
1873     s->migration_thread_running = false;
1874     error_free(s->error);
1875     s->error = NULL;
1876     s->hostname = NULL;
1877 
1878     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1879 
1880     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1881     s->total_time = 0;
1882     s->vm_was_running = false;
1883     s->iteration_initial_bytes = 0;
1884     s->threshold_size = 0;
1885 }
1886 
1887 int migrate_add_blocker_internal(Error *reason, Error **errp)
1888 {
1889     /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
1890     if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
1891         error_propagate_prepend(errp, error_copy(reason),
1892                                 "disallowing migration blocker "
1893                                 "(migration/snapshot in progress) for: ");
1894         return -EBUSY;
1895     }
1896 
1897     migration_blockers = g_slist_prepend(migration_blockers, reason);
1898     return 0;
1899 }
1900 
1901 int migrate_add_blocker(Error *reason, Error **errp)
1902 {
1903     if (only_migratable) {
1904         error_propagate_prepend(errp, error_copy(reason),
1905                                 "disallowing migration blocker "
1906                                 "(--only-migratable) for: ");
1907         return -EACCES;
1908     }
1909 
1910     return migrate_add_blocker_internal(reason, errp);
1911 }
1912 
1913 void migrate_del_blocker(Error *reason)
1914 {
1915     migration_blockers = g_slist_remove(migration_blockers, reason);
1916 }
1917 
1918 void qmp_migrate_incoming(const char *uri, Error **errp)
1919 {
1920     Error *local_err = NULL;
1921     static bool once = true;
1922 
1923     if (!once) {
1924         error_setg(errp, "The incoming migration has already been started");
1925         return;
1926     }
1927     if (!runstate_check(RUN_STATE_INMIGRATE)) {
1928         error_setg(errp, "'-incoming' was not specified on the command line");
1929         return;
1930     }
1931 
1932     if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
1933         return;
1934     }
1935 
1936     qemu_start_incoming_migration(uri, &local_err);
1937 
1938     if (local_err) {
1939         yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1940         error_propagate(errp, local_err);
1941         return;
1942     }
1943 
1944     once = false;
1945 }
1946 
1947 void qmp_migrate_recover(const char *uri, Error **errp)
1948 {
1949     MigrationIncomingState *mis = migration_incoming_get_current();
1950 
1951     /*
1952      * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
1953      * callers (no one should ignore a recover failure); if there is, it's a
1954      * programming error.
1955      */
1956     assert(errp);
1957 
1958     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1959         error_setg(errp, "Migrate recover can only be run "
1960                    "when postcopy is paused.");
1961         return;
1962     }
1963 
1964     /* If there's an existing transport, release it */
1965     migration_incoming_transport_cleanup(mis);
1966 
1967     /*
1968      * Note that this call will never start a real migration; it will
1969      * only re-setup the migration stream and poke existing migration
1970      * to continue using that newly established channel.
1971      */
1972     qemu_start_incoming_migration(uri, errp);
1973 }
1974 
1975 void qmp_migrate_pause(Error **errp)
1976 {
1977     MigrationState *ms = migrate_get_current();
1978     MigrationIncomingState *mis = migration_incoming_get_current();
1979     int ret;
1980 
1981     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1982         /* Source side, during postcopy */
1983         qemu_mutex_lock(&ms->qemu_file_lock);
1984         ret = qemu_file_shutdown(ms->to_dst_file);
1985         qemu_mutex_unlock(&ms->qemu_file_lock);
1986         if (ret) {
1987             error_setg(errp, "Failed to pause source migration");
1988         }
1989         return;
1990     }
1991 
1992     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1993         ret = qemu_file_shutdown(mis->from_src_file);
1994         if (ret) {
1995             error_setg(errp, "Failed to pause destination migration");
1996         }
1997         return;
1998     }
1999 
2000     error_setg(errp, "migrate-pause is currently only supported "
2001                "during postcopy-active state");
2002 }
2003 
2004 bool migration_is_blocked(Error **errp)
2005 {
2006     if (qemu_savevm_state_blocked(errp)) {
2007         return true;
2008     }
2009 
2010     if (migration_blockers) {
2011         error_propagate(errp, error_copy(migration_blockers->data));
2012         return true;
2013     }
2014 
2015     return false;
2016 }
2017 
2018 /* Returns true if continue to migrate, or false if error detected */
2019 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
2020                             bool resume, Error **errp)
2021 {
2022     Error *local_err = NULL;
2023 
2024     if (resume) {
2025         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2026             error_setg(errp, "Cannot resume if there is no "
2027                        "paused migration");
2028             return false;
2029         }
2030 
2031         /*
2032          * Postcopy recovery won't work well with release-ram
2033          * capability since release-ram will drop the page buffer as
2034          * long as the page is put into the send buffer.  So if there
2035          * is a network failure happened, any page buffers that have
2036          * not yet reached the destination VM but have already been
2037          * sent from the source VM will be lost forever.  Let's refuse
2038          * the client from resuming such a postcopy migration.
2039          * Luckily release-ram was designed to only be used when src
2040          * and destination VMs are on the same host, so it should be
2041          * fine.
2042          */
2043         if (migrate_release_ram()) {
2044             error_setg(errp, "Postcopy recovery cannot work "
2045                        "when release-ram capability is set");
2046             return false;
2047         }
2048 
2049         /* This is a resume, skip init status */
2050         return true;
2051     }
2052 
2053     if (migration_is_running(s->state)) {
2054         error_setg(errp, QERR_MIGRATION_ACTIVE);
2055         return false;
2056     }
2057 
2058     if (runstate_check(RUN_STATE_INMIGRATE)) {
2059         error_setg(errp, "Guest is waiting for an incoming migration");
2060         return false;
2061     }
2062 
2063     if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2064         error_setg(errp, "Can't migrate the vm that was paused due to "
2065                    "previous migration");
2066         return false;
2067     }
2068 
2069     if (migration_is_blocked(errp)) {
2070         return false;
2071     }
2072 
2073     if (blk || blk_inc) {
2074         if (migrate_colo()) {
2075             error_setg(errp, "No disk migration is required in COLO mode");
2076             return false;
2077         }
2078         if (migrate_block() || migrate_block_incremental()) {
2079             error_setg(errp, "Command options are incompatible with "
2080                        "current migration capabilities");
2081             return false;
2082         }
2083         if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) {
2084             error_propagate(errp, local_err);
2085             return false;
2086         }
2087         s->must_remove_block_options = true;
2088     }
2089 
2090     if (blk_inc) {
2091         migrate_set_block_incremental(s, true);
2092     }
2093 
2094     migrate_init(s);
2095     /*
2096      * set ram_counters compression_counters memory to zero for a
2097      * new migration
2098      */
2099     memset(&ram_counters, 0, sizeof(ram_counters));
2100     memset(&compression_counters, 0, sizeof(compression_counters));
2101 
2102     return true;
2103 }
2104 
2105 void qmp_migrate(const char *uri, bool has_blk, bool blk,
2106                  bool has_inc, bool inc, bool has_detach, bool detach,
2107                  bool has_resume, bool resume, Error **errp)
2108 {
2109     Error *local_err = NULL;
2110     MigrationState *s = migrate_get_current();
2111     const char *p = NULL;
2112 
2113     /* URI is not suitable for migration? */
2114     if (!migration_channels_and_uri_compatible(uri, errp)) {
2115         return;
2116     }
2117 
2118     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
2119                          has_resume && resume, errp)) {
2120         /* Error detected, put into errp */
2121         return;
2122     }
2123 
2124     if (!(has_resume && resume)) {
2125         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2126             return;
2127         }
2128     }
2129 
2130     if (strstart(uri, "tcp:", &p) ||
2131         strstart(uri, "unix:", NULL) ||
2132         strstart(uri, "vsock:", NULL)) {
2133         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
2134 #ifdef CONFIG_RDMA
2135     } else if (strstart(uri, "rdma:", &p)) {
2136         rdma_start_outgoing_migration(s, p, &local_err);
2137 #endif
2138     } else if (strstart(uri, "exec:", &p)) {
2139         exec_start_outgoing_migration(s, p, &local_err);
2140     } else if (strstart(uri, "fd:", &p)) {
2141         fd_start_outgoing_migration(s, p, &local_err);
2142     } else {
2143         if (!(has_resume && resume)) {
2144             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2145         }
2146         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
2147                    "a valid migration protocol");
2148         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2149                           MIGRATION_STATUS_FAILED);
2150         block_cleanup_parameters(s);
2151         return;
2152     }
2153 
2154     if (local_err) {
2155         if (!(has_resume && resume)) {
2156             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2157         }
2158         migrate_fd_error(s, local_err);
2159         error_propagate(errp, local_err);
2160         return;
2161     }
2162 }
2163 
2164 void qmp_migrate_cancel(Error **errp)
2165 {
2166     migration_cancel(NULL);
2167 }
2168 
2169 void qmp_migrate_continue(MigrationStatus state, Error **errp)
2170 {
2171     MigrationState *s = migrate_get_current();
2172     if (s->state != state) {
2173         error_setg(errp,  "Migration not in expected state: %s",
2174                    MigrationStatus_str(s->state));
2175         return;
2176     }
2177     qemu_sem_post(&s->pause_sem);
2178 }
2179 
2180 int migrate_use_tls(void)
2181 {
2182     MigrationState *s;
2183 
2184     s = migrate_get_current();
2185 
2186     return s->parameters.tls_creds && *s->parameters.tls_creds;
2187 }
2188 
2189 /* migration thread support */
2190 /*
2191  * Something bad happened to the RP stream, mark an error
2192  * The caller shall print or trace something to indicate why
2193  */
2194 static void mark_source_rp_bad(MigrationState *s)
2195 {
2196     s->rp_state.error = true;
2197 }
2198 
2199 static struct rp_cmd_args {
2200     ssize_t     len; /* -1 = variable */
2201     const char *name;
2202 } rp_cmd_args[] = {
2203     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2204     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2205     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2206     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2207     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2208     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2209     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2210     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2211 };
2212 
2213 /*
2214  * Process a request for pages received on the return path,
2215  * We're allowed to send more than requested (e.g. to round to our page size)
2216  * and we don't need to send pages that have already been sent.
2217  */
2218 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2219                                        ram_addr_t start, size_t len)
2220 {
2221     long our_host_ps = qemu_real_host_page_size();
2222 
2223     trace_migrate_handle_rp_req_pages(rbname, start, len);
2224 
2225     /*
2226      * Since we currently insist on matching page sizes, just sanity check
2227      * we're being asked for whole host pages.
2228      */
2229     if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2230         !QEMU_IS_ALIGNED(len, our_host_ps)) {
2231         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2232                      " len: %zd", __func__, start, len);
2233         mark_source_rp_bad(ms);
2234         return;
2235     }
2236 
2237     if (ram_save_queue_pages(rbname, start, len)) {
2238         mark_source_rp_bad(ms);
2239     }
2240 }
2241 
2242 /* Return true to retry, false to quit */
2243 static bool postcopy_pause_return_path_thread(MigrationState *s)
2244 {
2245     trace_postcopy_pause_return_path();
2246 
2247     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2248 
2249     trace_postcopy_pause_return_path_continued();
2250 
2251     return true;
2252 }
2253 
2254 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2255 {
2256     RAMBlock *block = qemu_ram_block_by_name(block_name);
2257 
2258     if (!block) {
2259         error_report("%s: invalid block name '%s'", __func__, block_name);
2260         return -EINVAL;
2261     }
2262 
2263     /* Fetch the received bitmap and refresh the dirty bitmap */
2264     return ram_dirty_bitmap_reload(s, block);
2265 }
2266 
2267 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2268 {
2269     trace_source_return_path_thread_resume_ack(value);
2270 
2271     if (value != MIGRATION_RESUME_ACK_VALUE) {
2272         error_report("%s: illegal resume_ack value %"PRIu32,
2273                      __func__, value);
2274         return -1;
2275     }
2276 
2277     /* Now both sides are active. */
2278     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2279                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2280 
2281     /* Notify send thread that time to continue send pages */
2282     qemu_sem_post(&s->rp_state.rp_sem);
2283 
2284     return 0;
2285 }
2286 
2287 /*
2288  * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
2289  * existed) in a safe way.
2290  */
2291 static void migration_release_dst_files(MigrationState *ms)
2292 {
2293     QEMUFile *file;
2294 
2295     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2296         /*
2297          * Reset the from_dst_file pointer first before releasing it, as we
2298          * can't block within lock section
2299          */
2300         file = ms->rp_state.from_dst_file;
2301         ms->rp_state.from_dst_file = NULL;
2302     }
2303 
2304     /*
2305      * Do the same to postcopy fast path socket too if there is.  No
2306      * locking needed because this qemufile should only be managed by
2307      * return path thread.
2308      */
2309     if (ms->postcopy_qemufile_src) {
2310         migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
2311         qemu_file_shutdown(ms->postcopy_qemufile_src);
2312         qemu_fclose(ms->postcopy_qemufile_src);
2313         ms->postcopy_qemufile_src = NULL;
2314     }
2315 
2316     qemu_fclose(file);
2317 }
2318 
2319 /*
2320  * Handles messages sent on the return path towards the source VM
2321  *
2322  */
2323 static void *source_return_path_thread(void *opaque)
2324 {
2325     MigrationState *ms = opaque;
2326     QEMUFile *rp = ms->rp_state.from_dst_file;
2327     uint16_t header_len, header_type;
2328     uint8_t buf[512];
2329     uint32_t tmp32, sibling_error;
2330     ram_addr_t start = 0; /* =0 to silence warning */
2331     size_t  len = 0, expected_len;
2332     int res;
2333 
2334     trace_source_return_path_thread_entry();
2335     rcu_register_thread();
2336 
2337 retry:
2338     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2339            migration_is_setup_or_active(ms->state)) {
2340         trace_source_return_path_thread_loop_top();
2341         header_type = qemu_get_be16(rp);
2342         header_len = qemu_get_be16(rp);
2343 
2344         if (qemu_file_get_error(rp)) {
2345             mark_source_rp_bad(ms);
2346             goto out;
2347         }
2348 
2349         if (header_type >= MIG_RP_MSG_MAX ||
2350             header_type == MIG_RP_MSG_INVALID) {
2351             error_report("RP: Received invalid message 0x%04x length 0x%04x",
2352                          header_type, header_len);
2353             mark_source_rp_bad(ms);
2354             goto out;
2355         }
2356 
2357         if ((rp_cmd_args[header_type].len != -1 &&
2358             header_len != rp_cmd_args[header_type].len) ||
2359             header_len > sizeof(buf)) {
2360             error_report("RP: Received '%s' message (0x%04x) with"
2361                          "incorrect length %d expecting %zu",
2362                          rp_cmd_args[header_type].name, header_type, header_len,
2363                          (size_t)rp_cmd_args[header_type].len);
2364             mark_source_rp_bad(ms);
2365             goto out;
2366         }
2367 
2368         /* We know we've got a valid header by this point */
2369         res = qemu_get_buffer(rp, buf, header_len);
2370         if (res != header_len) {
2371             error_report("RP: Failed reading data for message 0x%04x"
2372                          " read %d expected %d",
2373                          header_type, res, header_len);
2374             mark_source_rp_bad(ms);
2375             goto out;
2376         }
2377 
2378         /* OK, we have the message and the data */
2379         switch (header_type) {
2380         case MIG_RP_MSG_SHUT:
2381             sibling_error = ldl_be_p(buf);
2382             trace_source_return_path_thread_shut(sibling_error);
2383             if (sibling_error) {
2384                 error_report("RP: Sibling indicated error %d", sibling_error);
2385                 mark_source_rp_bad(ms);
2386             }
2387             /*
2388              * We'll let the main thread deal with closing the RP
2389              * we could do a shutdown(2) on it, but we're the only user
2390              * anyway, so there's nothing gained.
2391              */
2392             goto out;
2393 
2394         case MIG_RP_MSG_PONG:
2395             tmp32 = ldl_be_p(buf);
2396             trace_source_return_path_thread_pong(tmp32);
2397             qemu_sem_post(&ms->rp_state.rp_pong_acks);
2398             break;
2399 
2400         case MIG_RP_MSG_REQ_PAGES:
2401             start = ldq_be_p(buf);
2402             len = ldl_be_p(buf + 8);
2403             migrate_handle_rp_req_pages(ms, NULL, start, len);
2404             break;
2405 
2406         case MIG_RP_MSG_REQ_PAGES_ID:
2407             expected_len = 12 + 1; /* header + termination */
2408 
2409             if (header_len >= expected_len) {
2410                 start = ldq_be_p(buf);
2411                 len = ldl_be_p(buf + 8);
2412                 /* Now we expect an idstr */
2413                 tmp32 = buf[12]; /* Length of the following idstr */
2414                 buf[13 + tmp32] = '\0';
2415                 expected_len += tmp32;
2416             }
2417             if (header_len != expected_len) {
2418                 error_report("RP: Req_Page_id with length %d expecting %zd",
2419                              header_len, expected_len);
2420                 mark_source_rp_bad(ms);
2421                 goto out;
2422             }
2423             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
2424             break;
2425 
2426         case MIG_RP_MSG_RECV_BITMAP:
2427             if (header_len < 1) {
2428                 error_report("%s: missing block name", __func__);
2429                 mark_source_rp_bad(ms);
2430                 goto out;
2431             }
2432             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2433             buf[buf[0] + 1] = '\0';
2434             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
2435                 mark_source_rp_bad(ms);
2436                 goto out;
2437             }
2438             break;
2439 
2440         case MIG_RP_MSG_RESUME_ACK:
2441             tmp32 = ldl_be_p(buf);
2442             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
2443                 mark_source_rp_bad(ms);
2444                 goto out;
2445             }
2446             break;
2447 
2448         default:
2449             break;
2450         }
2451     }
2452 
2453 out:
2454     res = qemu_file_get_error(rp);
2455     if (res) {
2456         if (res && migration_in_postcopy()) {
2457             /*
2458              * Maybe there is something we can do: it looks like a
2459              * network down issue, and we pause for a recovery.
2460              */
2461             migration_release_dst_files(ms);
2462             rp = NULL;
2463             if (postcopy_pause_return_path_thread(ms)) {
2464                 /*
2465                  * Reload rp, reset the rest.  Referencing it is safe since
2466                  * it's reset only by us above, or when migration completes
2467                  */
2468                 rp = ms->rp_state.from_dst_file;
2469                 ms->rp_state.error = false;
2470                 goto retry;
2471             }
2472         }
2473 
2474         trace_source_return_path_thread_bad_end();
2475         mark_source_rp_bad(ms);
2476     }
2477 
2478     trace_source_return_path_thread_end();
2479     migration_release_dst_files(ms);
2480     rcu_unregister_thread();
2481     return NULL;
2482 }
2483 
2484 static int open_return_path_on_source(MigrationState *ms,
2485                                       bool create_thread)
2486 {
2487     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2488     if (!ms->rp_state.from_dst_file) {
2489         return -1;
2490     }
2491 
2492     trace_open_return_path_on_source();
2493 
2494     if (!create_thread) {
2495         /* We're done */
2496         return 0;
2497     }
2498 
2499     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
2500                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2501     ms->rp_state.rp_thread_created = true;
2502 
2503     trace_open_return_path_on_source_continue();
2504 
2505     return 0;
2506 }
2507 
2508 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
2509 static int await_return_path_close_on_source(MigrationState *ms)
2510 {
2511     /*
2512      * If this is a normal exit then the destination will send a SHUT and the
2513      * rp_thread will exit, however if there's an error we need to cause
2514      * it to exit.
2515      */
2516     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
2517         /*
2518          * shutdown(2), if we have it, will cause it to unblock if it's stuck
2519          * waiting for the destination.
2520          */
2521         qemu_file_shutdown(ms->rp_state.from_dst_file);
2522         mark_source_rp_bad(ms);
2523     }
2524     trace_await_return_path_close_on_source_joining();
2525     qemu_thread_join(&ms->rp_state.rp_thread);
2526     ms->rp_state.rp_thread_created = false;
2527     trace_await_return_path_close_on_source_close();
2528     return ms->rp_state.error;
2529 }
2530 
2531 static inline void
2532 migration_wait_main_channel(MigrationState *ms)
2533 {
2534     /* Wait until one PONG message received */
2535     qemu_sem_wait(&ms->rp_state.rp_pong_acks);
2536 }
2537 
2538 /*
2539  * Switch from normal iteration to postcopy
2540  * Returns non-0 on error
2541  */
2542 static int postcopy_start(MigrationState *ms)
2543 {
2544     int ret;
2545     QIOChannelBuffer *bioc;
2546     QEMUFile *fb;
2547     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2548     int64_t bandwidth = migrate_max_postcopy_bandwidth();
2549     bool restart_block = false;
2550     int cur_state = MIGRATION_STATUS_ACTIVE;
2551 
2552     if (migrate_postcopy_preempt()) {
2553         migration_wait_main_channel(ms);
2554         if (postcopy_preempt_establish_channel(ms)) {
2555             migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
2556             return -1;
2557         }
2558     }
2559 
2560     if (!migrate_pause_before_switchover()) {
2561         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
2562                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
2563     }
2564 
2565     trace_postcopy_start();
2566     qemu_mutex_lock_iothread();
2567     trace_postcopy_start_set_run();
2568 
2569     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2570     global_state_store();
2571     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2572     if (ret < 0) {
2573         goto fail;
2574     }
2575 
2576     ret = migration_maybe_pause(ms, &cur_state,
2577                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
2578     if (ret < 0) {
2579         goto fail;
2580     }
2581 
2582     ret = bdrv_inactivate_all();
2583     if (ret < 0) {
2584         goto fail;
2585     }
2586     restart_block = true;
2587 
2588     /*
2589      * Cause any non-postcopiable, but iterative devices to
2590      * send out their final data.
2591      */
2592     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
2593 
2594     /*
2595      * in Finish migrate and with the io-lock held everything should
2596      * be quiet, but we've potentially still got dirty pages and we
2597      * need to tell the destination to throw any pages it's already received
2598      * that are dirty
2599      */
2600     if (migrate_postcopy_ram()) {
2601         ram_postcopy_send_discard_bitmap(ms);
2602     }
2603 
2604     /*
2605      * send rest of state - note things that are doing postcopy
2606      * will notice we're in POSTCOPY_ACTIVE and not actually
2607      * wrap their state up here
2608      */
2609     /* 0 max-postcopy-bandwidth means unlimited */
2610     if (!bandwidth) {
2611         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
2612     } else {
2613         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
2614     }
2615     if (migrate_postcopy_ram()) {
2616         /* Ping just for debugging, helps line traces up */
2617         qemu_savevm_send_ping(ms->to_dst_file, 2);
2618     }
2619 
2620     /*
2621      * While loading the device state we may trigger page transfer
2622      * requests and the fd must be free to process those, and thus
2623      * the destination must read the whole device state off the fd before
2624      * it starts processing it.  Unfortunately the ad-hoc migration format
2625      * doesn't allow the destination to know the size to read without fully
2626      * parsing it through each devices load-state code (especially the open
2627      * coded devices that use get/put).
2628      * So we wrap the device state up in a package with a length at the start;
2629      * to do this we use a qemu_buf to hold the whole of the device state.
2630      */
2631     bioc = qio_channel_buffer_new(4096);
2632     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
2633     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
2634     object_unref(OBJECT(bioc));
2635 
2636     /*
2637      * Make sure the receiver can get incoming pages before we send the rest
2638      * of the state
2639      */
2640     qemu_savevm_send_postcopy_listen(fb);
2641 
2642     qemu_savevm_state_complete_precopy(fb, false, false);
2643     if (migrate_postcopy_ram()) {
2644         qemu_savevm_send_ping(fb, 3);
2645     }
2646 
2647     qemu_savevm_send_postcopy_run(fb);
2648 
2649     /* <><> end of stuff going into the package */
2650 
2651     /* Last point of recovery; as soon as we send the package the destination
2652      * can open devices and potentially start running.
2653      * Lets just check again we've not got any errors.
2654      */
2655     ret = qemu_file_get_error(ms->to_dst_file);
2656     if (ret) {
2657         error_report("postcopy_start: Migration stream errored (pre package)");
2658         goto fail_closefb;
2659     }
2660 
2661     restart_block = false;
2662 
2663     /* Now send that blob */
2664     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2665         goto fail_closefb;
2666     }
2667     qemu_fclose(fb);
2668 
2669     /* Send a notify to give a chance for anything that needs to happen
2670      * at the transition to postcopy and after the device state; in particular
2671      * spice needs to trigger a transition now
2672      */
2673     ms->postcopy_after_devices = true;
2674     notifier_list_notify(&migration_state_notifiers, ms);
2675 
2676     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
2677 
2678     qemu_mutex_unlock_iothread();
2679 
2680     if (migrate_postcopy_ram()) {
2681         /*
2682          * Although this ping is just for debug, it could potentially be
2683          * used for getting a better measurement of downtime at the source.
2684          */
2685         qemu_savevm_send_ping(ms->to_dst_file, 4);
2686     }
2687 
2688     if (migrate_release_ram()) {
2689         ram_postcopy_migrated_memory_release(ms);
2690     }
2691 
2692     ret = qemu_file_get_error(ms->to_dst_file);
2693     if (ret) {
2694         error_report("postcopy_start: Migration stream errored");
2695         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2696                               MIGRATION_STATUS_FAILED);
2697     }
2698 
2699     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
2700 
2701     return ret;
2702 
2703 fail_closefb:
2704     qemu_fclose(fb);
2705 fail:
2706     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2707                           MIGRATION_STATUS_FAILED);
2708     if (restart_block) {
2709         /* A failure happened early enough that we know the destination hasn't
2710          * accessed block devices, so we're safe to recover.
2711          */
2712         Error *local_err = NULL;
2713 
2714         bdrv_activate_all(&local_err);
2715         if (local_err) {
2716             error_report_err(local_err);
2717         }
2718     }
2719     qemu_mutex_unlock_iothread();
2720     return -1;
2721 }
2722 
2723 /**
2724  * migration_maybe_pause: Pause if required to by
2725  * migrate_pause_before_switchover called with the iothread locked
2726  * Returns: 0 on success
2727  */
2728 static int migration_maybe_pause(MigrationState *s,
2729                                  int *current_active_state,
2730                                  int new_state)
2731 {
2732     if (!migrate_pause_before_switchover()) {
2733         return 0;
2734     }
2735 
2736     /* Since leaving this state is not atomic with posting the semaphore
2737      * it's possible that someone could have issued multiple migrate_continue
2738      * and the semaphore is incorrectly positive at this point;
2739      * the docs say it's undefined to reinit a semaphore that's already
2740      * init'd, so use timedwait to eat up any existing posts.
2741      */
2742     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
2743         /* This block intentionally left blank */
2744     }
2745 
2746     /*
2747      * If the migration is cancelled when it is in the completion phase,
2748      * the migration state is set to MIGRATION_STATUS_CANCELLING.
2749      * So we don't need to wait a semaphore, otherwise we would always
2750      * wait for the 'pause_sem' semaphore.
2751      */
2752     if (s->state != MIGRATION_STATUS_CANCELLING) {
2753         qemu_mutex_unlock_iothread();
2754         migrate_set_state(&s->state, *current_active_state,
2755                           MIGRATION_STATUS_PRE_SWITCHOVER);
2756         qemu_sem_wait(&s->pause_sem);
2757         migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2758                           new_state);
2759         *current_active_state = new_state;
2760         qemu_mutex_lock_iothread();
2761     }
2762 
2763     return s->state == new_state ? 0 : -EINVAL;
2764 }
2765 
2766 /**
2767  * migration_completion: Used by migration_thread when there's not much left.
2768  *   The caller 'breaks' the loop when this returns.
2769  *
2770  * @s: Current migration state
2771  */
2772 static void migration_completion(MigrationState *s)
2773 {
2774     int ret;
2775     int current_active_state = s->state;
2776 
2777     if (s->state == MIGRATION_STATUS_ACTIVE) {
2778         qemu_mutex_lock_iothread();
2779         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2780         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2781         s->vm_was_running = runstate_is_running();
2782         ret = global_state_store();
2783 
2784         if (!ret) {
2785             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2786             trace_migration_completion_vm_stop(ret);
2787             if (ret >= 0) {
2788                 ret = migration_maybe_pause(s, &current_active_state,
2789                                             MIGRATION_STATUS_DEVICE);
2790             }
2791             if (ret >= 0) {
2792                 s->block_inactive = !migrate_colo();
2793                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
2794                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
2795                                                          s->block_inactive);
2796             }
2797         }
2798         qemu_mutex_unlock_iothread();
2799 
2800         if (ret < 0) {
2801             goto fail;
2802         }
2803     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2804         trace_migration_completion_postcopy_end();
2805 
2806         qemu_mutex_lock_iothread();
2807         qemu_savevm_state_complete_postcopy(s->to_dst_file);
2808         qemu_mutex_unlock_iothread();
2809 
2810         /*
2811          * Shutdown the postcopy fast path thread.  This is only needed
2812          * when dest QEMU binary is old (7.1/7.2).  QEMU 8.0+ doesn't need
2813          * this.
2814          */
2815         if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
2816             postcopy_preempt_shutdown_file(s);
2817         }
2818 
2819         trace_migration_completion_postcopy_end_after_complete();
2820     } else {
2821         goto fail;
2822     }
2823 
2824     /*
2825      * If rp was opened we must clean up the thread before
2826      * cleaning everything else up (since if there are no failures
2827      * it will wait for the destination to send it's status in
2828      * a SHUT command).
2829      */
2830     if (s->rp_state.rp_thread_created) {
2831         int rp_error;
2832         trace_migration_return_path_end_before();
2833         rp_error = await_return_path_close_on_source(s);
2834         trace_migration_return_path_end_after(rp_error);
2835         if (rp_error) {
2836             goto fail_invalidate;
2837         }
2838     }
2839 
2840     if (qemu_file_get_error(s->to_dst_file)) {
2841         trace_migration_completion_file_err();
2842         goto fail_invalidate;
2843     }
2844 
2845     if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
2846         /* COLO does not support postcopy */
2847         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
2848                           MIGRATION_STATUS_COLO);
2849     } else {
2850         migrate_set_state(&s->state, current_active_state,
2851                           MIGRATION_STATUS_COMPLETED);
2852     }
2853 
2854     return;
2855 
2856 fail_invalidate:
2857     /* If not doing postcopy, vm_start() will be called: let's regain
2858      * control on images.
2859      */
2860     if (s->state == MIGRATION_STATUS_ACTIVE ||
2861         s->state == MIGRATION_STATUS_DEVICE) {
2862         Error *local_err = NULL;
2863 
2864         qemu_mutex_lock_iothread();
2865         bdrv_activate_all(&local_err);
2866         if (local_err) {
2867             error_report_err(local_err);
2868             s->block_inactive = true;
2869         } else {
2870             s->block_inactive = false;
2871         }
2872         qemu_mutex_unlock_iothread();
2873     }
2874 
2875 fail:
2876     migrate_set_state(&s->state, current_active_state,
2877                       MIGRATION_STATUS_FAILED);
2878 }
2879 
2880 /**
2881  * bg_migration_completion: Used by bg_migration_thread when after all the
2882  *   RAM has been saved. The caller 'breaks' the loop when this returns.
2883  *
2884  * @s: Current migration state
2885  */
2886 static void bg_migration_completion(MigrationState *s)
2887 {
2888     int current_active_state = s->state;
2889 
2890     /*
2891      * Stop tracking RAM writes - un-protect memory, un-register UFFD
2892      * memory ranges, flush kernel wait queues and wake up threads
2893      * waiting for write fault to be resolved.
2894      */
2895     ram_write_tracking_stop();
2896 
2897     if (s->state == MIGRATION_STATUS_ACTIVE) {
2898         /*
2899          * By this moment we have RAM content saved into the migration stream.
2900          * The next step is to flush the non-RAM content (device state)
2901          * right after the ram content. The device state has been stored into
2902          * the temporary buffer before RAM saving started.
2903          */
2904         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
2905         qemu_fflush(s->to_dst_file);
2906     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
2907         goto fail;
2908     }
2909 
2910     if (qemu_file_get_error(s->to_dst_file)) {
2911         trace_migration_completion_file_err();
2912         goto fail;
2913     }
2914 
2915     migrate_set_state(&s->state, current_active_state,
2916                       MIGRATION_STATUS_COMPLETED);
2917     return;
2918 
2919 fail:
2920     migrate_set_state(&s->state, current_active_state,
2921                       MIGRATION_STATUS_FAILED);
2922 }
2923 
2924 typedef enum MigThrError {
2925     /* No error detected */
2926     MIG_THR_ERR_NONE = 0,
2927     /* Detected error, but resumed successfully */
2928     MIG_THR_ERR_RECOVERED = 1,
2929     /* Detected fatal error, need to exit */
2930     MIG_THR_ERR_FATAL = 2,
2931 } MigThrError;
2932 
2933 static int postcopy_resume_handshake(MigrationState *s)
2934 {
2935     qemu_savevm_send_postcopy_resume(s->to_dst_file);
2936 
2937     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2938         qemu_sem_wait(&s->rp_state.rp_sem);
2939     }
2940 
2941     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2942         return 0;
2943     }
2944 
2945     return -1;
2946 }
2947 
2948 /* Return zero if success, or <0 for error */
2949 static int postcopy_do_resume(MigrationState *s)
2950 {
2951     int ret;
2952 
2953     /*
2954      * Call all the resume_prepare() hooks, so that modules can be
2955      * ready for the migration resume.
2956      */
2957     ret = qemu_savevm_state_resume_prepare(s);
2958     if (ret) {
2959         error_report("%s: resume_prepare() failure detected: %d",
2960                      __func__, ret);
2961         return ret;
2962     }
2963 
2964     /*
2965      * If preempt is enabled, re-establish the preempt channel.  Note that
2966      * we do it after resume prepare to make sure the main channel will be
2967      * created before the preempt channel.  E.g. with weak network, the
2968      * dest QEMU may get messed up with the preempt and main channels on
2969      * the order of connection setup.  This guarantees the correct order.
2970      */
2971     ret = postcopy_preempt_establish_channel(s);
2972     if (ret) {
2973         error_report("%s: postcopy_preempt_establish_channel(): %d",
2974                      __func__, ret);
2975         return ret;
2976     }
2977 
2978     /*
2979      * Last handshake with destination on the resume (destination will
2980      * switch to postcopy-active afterwards)
2981      */
2982     ret = postcopy_resume_handshake(s);
2983     if (ret) {
2984         error_report("%s: handshake failed: %d", __func__, ret);
2985         return ret;
2986     }
2987 
2988     return 0;
2989 }
2990 
2991 /*
2992  * We don't return until we are in a safe state to continue current
2993  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
2994  * MIG_THR_ERR_FATAL if unrecovery failure happened.
2995  */
2996 static MigThrError postcopy_pause(MigrationState *s)
2997 {
2998     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2999 
3000     while (true) {
3001         QEMUFile *file;
3002 
3003         /*
3004          * Current channel is possibly broken. Release it.  Note that this is
3005          * guaranteed even without lock because to_dst_file should only be
3006          * modified by the migration thread.  That also guarantees that the
3007          * unregister of yank is safe too without the lock.  It should be safe
3008          * even to be within the qemu_file_lock, but we didn't do that to avoid
3009          * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3010          * the qemu_file_lock critical section as small as possible.
3011          */
3012         assert(s->to_dst_file);
3013         migration_ioc_unregister_yank_from_file(s->to_dst_file);
3014         qemu_mutex_lock(&s->qemu_file_lock);
3015         file = s->to_dst_file;
3016         s->to_dst_file = NULL;
3017         qemu_mutex_unlock(&s->qemu_file_lock);
3018 
3019         qemu_file_shutdown(file);
3020         qemu_fclose(file);
3021 
3022         migrate_set_state(&s->state, s->state,
3023                           MIGRATION_STATUS_POSTCOPY_PAUSED);
3024 
3025         error_report("Detected IO failure for postcopy. "
3026                      "Migration paused.");
3027 
3028         /*
3029          * We wait until things fixed up. Then someone will setup the
3030          * status back for us.
3031          */
3032         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
3033             qemu_sem_wait(&s->postcopy_pause_sem);
3034         }
3035 
3036         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3037             /* Woken up by a recover procedure. Give it a shot */
3038 
3039             /*
3040              * Firstly, let's wake up the return path now, with a new
3041              * return path channel.
3042              */
3043             qemu_sem_post(&s->postcopy_pause_rp_sem);
3044 
3045             /* Do the resume logic */
3046             if (postcopy_do_resume(s) == 0) {
3047                 /* Let's continue! */
3048                 trace_postcopy_pause_continued();
3049                 return MIG_THR_ERR_RECOVERED;
3050             } else {
3051                 /*
3052                  * Something wrong happened during the recovery, let's
3053                  * pause again. Pause is always better than throwing
3054                  * data away.
3055                  */
3056                 continue;
3057             }
3058         } else {
3059             /* This is not right... Time to quit. */
3060             return MIG_THR_ERR_FATAL;
3061         }
3062     }
3063 }
3064 
3065 static MigThrError migration_detect_error(MigrationState *s)
3066 {
3067     int ret;
3068     int state = s->state;
3069     Error *local_error = NULL;
3070 
3071     if (state == MIGRATION_STATUS_CANCELLING ||
3072         state == MIGRATION_STATUS_CANCELLED) {
3073         /* End the migration, but don't set the state to failed */
3074         return MIG_THR_ERR_FATAL;
3075     }
3076 
3077     /*
3078      * Try to detect any file errors.  Note that postcopy_qemufile_src will
3079      * be NULL when postcopy preempt is not enabled.
3080      */
3081     ret = qemu_file_get_error_obj_any(s->to_dst_file,
3082                                       s->postcopy_qemufile_src,
3083                                       &local_error);
3084     if (!ret) {
3085         /* Everything is fine */
3086         assert(!local_error);
3087         return MIG_THR_ERR_NONE;
3088     }
3089 
3090     if (local_error) {
3091         migrate_set_error(s, local_error);
3092         error_free(local_error);
3093     }
3094 
3095     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3096         /*
3097          * For postcopy, we allow the network to be down for a
3098          * while. After that, it can be continued by a
3099          * recovery phase.
3100          */
3101         return postcopy_pause(s);
3102     } else {
3103         /*
3104          * For precopy (or postcopy with error outside IO), we fail
3105          * with no time.
3106          */
3107         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3108         trace_migration_thread_file_err();
3109 
3110         /* Time to stop the migration, now. */
3111         return MIG_THR_ERR_FATAL;
3112     }
3113 }
3114 
3115 /* How many bytes have we transferred since the beginning of the migration */
3116 static uint64_t migration_total_bytes(MigrationState *s)
3117 {
3118     return qemu_file_total_transferred(s->to_dst_file) +
3119         stat64_get(&ram_counters.multifd_bytes);
3120 }
3121 
3122 static void migration_calculate_complete(MigrationState *s)
3123 {
3124     uint64_t bytes = migration_total_bytes(s);
3125     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3126     int64_t transfer_time;
3127 
3128     s->total_time = end_time - s->start_time;
3129     if (!s->downtime) {
3130         /*
3131          * It's still not set, so we are precopy migration.  For
3132          * postcopy, downtime is calculated during postcopy_start().
3133          */
3134         s->downtime = end_time - s->downtime_start;
3135     }
3136 
3137     transfer_time = s->total_time - s->setup_time;
3138     if (transfer_time) {
3139         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3140     }
3141 }
3142 
3143 static void update_iteration_initial_status(MigrationState *s)
3144 {
3145     /*
3146      * Update these three fields at the same time to avoid mismatch info lead
3147      * wrong speed calculation.
3148      */
3149     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3150     s->iteration_initial_bytes = migration_total_bytes(s);
3151     s->iteration_initial_pages = ram_get_total_transferred_pages();
3152 }
3153 
3154 static void migration_update_counters(MigrationState *s,
3155                                       int64_t current_time)
3156 {
3157     uint64_t transferred, transferred_pages, time_spent;
3158     uint64_t current_bytes; /* bytes transferred since the beginning */
3159     double bandwidth;
3160 
3161     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3162         return;
3163     }
3164 
3165     current_bytes = migration_total_bytes(s);
3166     transferred = current_bytes - s->iteration_initial_bytes;
3167     time_spent = current_time - s->iteration_start_time;
3168     bandwidth = (double)transferred / time_spent;
3169     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3170 
3171     s->mbps = (((double) transferred * 8.0) /
3172                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3173 
3174     transferred_pages = ram_get_total_transferred_pages() -
3175                             s->iteration_initial_pages;
3176     s->pages_per_second = (double) transferred_pages /
3177                              (((double) time_spent / 1000.0));
3178 
3179     /*
3180      * if we haven't sent anything, we don't want to
3181      * recalculate. 10000 is a small enough number for our purposes
3182      */
3183     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3184         s->expected_downtime = ram_counters.remaining / bandwidth;
3185     }
3186 
3187     qemu_file_reset_rate_limit(s->to_dst_file);
3188 
3189     update_iteration_initial_status(s);
3190 
3191     trace_migrate_transferred(transferred, time_spent,
3192                               bandwidth, s->threshold_size);
3193 }
3194 
3195 /* Migration thread iteration status */
3196 typedef enum {
3197     MIG_ITERATE_RESUME,         /* Resume current iteration */
3198     MIG_ITERATE_SKIP,           /* Skip current iteration */
3199     MIG_ITERATE_BREAK,          /* Break the loop */
3200 } MigIterateState;
3201 
3202 /*
3203  * Return true if continue to the next iteration directly, false
3204  * otherwise.
3205  */
3206 static MigIterateState migration_iteration_run(MigrationState *s)
3207 {
3208     uint64_t must_precopy, can_postcopy;
3209     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3210 
3211     qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
3212     uint64_t pending_size = must_precopy + can_postcopy;
3213 
3214     trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
3215 
3216     if (must_precopy <= s->threshold_size) {
3217         qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
3218         pending_size = must_precopy + can_postcopy;
3219         trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
3220     }
3221 
3222     if (!pending_size || pending_size < s->threshold_size) {
3223         trace_migration_thread_low_pending(pending_size);
3224         migration_completion(s);
3225         return MIG_ITERATE_BREAK;
3226     }
3227 
3228     /* Still a significant amount to transfer */
3229     if (!in_postcopy && must_precopy <= s->threshold_size &&
3230         qatomic_read(&s->start_postcopy)) {
3231         if (postcopy_start(s)) {
3232             error_report("%s: postcopy failed to start", __func__);
3233         }
3234         return MIG_ITERATE_SKIP;
3235     }
3236 
3237     /* Just another iteration step */
3238     qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3239     return MIG_ITERATE_RESUME;
3240 }
3241 
3242 static void migration_iteration_finish(MigrationState *s)
3243 {
3244     /* If we enabled cpu throttling for auto-converge, turn it off. */
3245     cpu_throttle_stop();
3246 
3247     qemu_mutex_lock_iothread();
3248     switch (s->state) {
3249     case MIGRATION_STATUS_COMPLETED:
3250         migration_calculate_complete(s);
3251         runstate_set(RUN_STATE_POSTMIGRATE);
3252         break;
3253     case MIGRATION_STATUS_COLO:
3254         if (!migrate_colo()) {
3255             error_report("%s: critical error: calling COLO code without "
3256                          "COLO enabled", __func__);
3257         }
3258         migrate_start_colo_process(s);
3259         s->vm_was_running = true;
3260         /* Fallthrough */
3261     case MIGRATION_STATUS_FAILED:
3262     case MIGRATION_STATUS_CANCELLED:
3263     case MIGRATION_STATUS_CANCELLING:
3264         if (s->vm_was_running) {
3265             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3266                 vm_start();
3267             }
3268         } else {
3269             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3270                 runstate_set(RUN_STATE_POSTMIGRATE);
3271             }
3272         }
3273         break;
3274 
3275     default:
3276         /* Should not reach here, but if so, forgive the VM. */
3277         error_report("%s: Unknown ending state %d", __func__, s->state);
3278         break;
3279     }
3280     migrate_fd_cleanup_schedule(s);
3281     qemu_mutex_unlock_iothread();
3282 }
3283 
3284 static void bg_migration_iteration_finish(MigrationState *s)
3285 {
3286     qemu_mutex_lock_iothread();
3287     switch (s->state) {
3288     case MIGRATION_STATUS_COMPLETED:
3289         migration_calculate_complete(s);
3290         break;
3291 
3292     case MIGRATION_STATUS_ACTIVE:
3293     case MIGRATION_STATUS_FAILED:
3294     case MIGRATION_STATUS_CANCELLED:
3295     case MIGRATION_STATUS_CANCELLING:
3296         break;
3297 
3298     default:
3299         /* Should not reach here, but if so, forgive the VM. */
3300         error_report("%s: Unknown ending state %d", __func__, s->state);
3301         break;
3302     }
3303 
3304     migrate_fd_cleanup_schedule(s);
3305     qemu_mutex_unlock_iothread();
3306 }
3307 
3308 /*
3309  * Return true if continue to the next iteration directly, false
3310  * otherwise.
3311  */
3312 static MigIterateState bg_migration_iteration_run(MigrationState *s)
3313 {
3314     int res;
3315 
3316     res = qemu_savevm_state_iterate(s->to_dst_file, false);
3317     if (res > 0) {
3318         bg_migration_completion(s);
3319         return MIG_ITERATE_BREAK;
3320     }
3321 
3322     return MIG_ITERATE_RESUME;
3323 }
3324 
3325 void migration_make_urgent_request(void)
3326 {
3327     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3328 }
3329 
3330 void migration_consume_urgent_request(void)
3331 {
3332     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3333 }
3334 
3335 /* Returns true if the rate limiting was broken by an urgent request */
3336 bool migration_rate_limit(void)
3337 {
3338     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3339     MigrationState *s = migrate_get_current();
3340 
3341     bool urgent = false;
3342     migration_update_counters(s, now);
3343     if (qemu_file_rate_limit(s->to_dst_file)) {
3344 
3345         if (qemu_file_get_error(s->to_dst_file)) {
3346             return false;
3347         }
3348         /*
3349          * Wait for a delay to do rate limiting OR
3350          * something urgent to post the semaphore.
3351          */
3352         int ms = s->iteration_start_time + BUFFER_DELAY - now;
3353         trace_migration_rate_limit_pre(ms);
3354         if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3355             /*
3356              * We were woken by one or more urgent things but
3357              * the timedwait will have consumed one of them.
3358              * The service routine for the urgent wake will dec
3359              * the semaphore itself for each item it consumes,
3360              * so add this one we just eat back.
3361              */
3362             qemu_sem_post(&s->rate_limit_sem);
3363             urgent = true;
3364         }
3365         trace_migration_rate_limit_post(urgent);
3366     }
3367     return urgent;
3368 }
3369 
3370 /*
3371  * if failover devices are present, wait they are completely
3372  * unplugged
3373  */
3374 
3375 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
3376                                     int new_state)
3377 {
3378     if (qemu_savevm_state_guest_unplug_pending()) {
3379         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
3380 
3381         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
3382                qemu_savevm_state_guest_unplug_pending()) {
3383             qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3384         }
3385         if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
3386             int timeout = 120; /* 30 seconds */
3387             /*
3388              * migration has been canceled
3389              * but as we have started an unplug we must wait the end
3390              * to be able to plug back the card
3391              */
3392             while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
3393                 qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3394             }
3395             if (qemu_savevm_state_guest_unplug_pending() &&
3396                 !qtest_enabled()) {
3397                 warn_report("migration: partially unplugged device on "
3398                             "failure");
3399             }
3400         }
3401 
3402         migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
3403     } else {
3404         migrate_set_state(&s->state, old_state, new_state);
3405     }
3406 }
3407 
3408 /*
3409  * Master migration thread on the source VM.
3410  * It drives the migration and pumps the data down the outgoing channel.
3411  */
3412 static void *migration_thread(void *opaque)
3413 {
3414     MigrationState *s = opaque;
3415     MigrationThread *thread = NULL;
3416     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3417     MigThrError thr_error;
3418     bool urgent = false;
3419 
3420     thread = MigrationThreadAdd("live_migration", qemu_get_thread_id());
3421 
3422     rcu_register_thread();
3423 
3424     object_ref(OBJECT(s));
3425     update_iteration_initial_status(s);
3426 
3427     qemu_savevm_state_header(s->to_dst_file);
3428 
3429     /*
3430      * If we opened the return path, we need to make sure dst has it
3431      * opened as well.
3432      */
3433     if (s->rp_state.rp_thread_created) {
3434         /* Now tell the dest that it should open its end so it can reply */
3435         qemu_savevm_send_open_return_path(s->to_dst_file);
3436 
3437         /* And do a ping that will make stuff easier to debug */
3438         qemu_savevm_send_ping(s->to_dst_file, 1);
3439     }
3440 
3441     if (migrate_postcopy()) {
3442         /*
3443          * Tell the destination that we *might* want to do postcopy later;
3444          * if the other end can't do postcopy it should fail now, nice and
3445          * early.
3446          */
3447         qemu_savevm_send_postcopy_advise(s->to_dst_file);
3448     }
3449 
3450     if (migrate_colo()) {
3451         /* Notify migration destination that we enable COLO */
3452         qemu_savevm_send_colo_enable(s->to_dst_file);
3453     }
3454 
3455     qemu_savevm_state_setup(s->to_dst_file);
3456 
3457     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3458                                MIGRATION_STATUS_ACTIVE);
3459 
3460     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3461 
3462     trace_migration_thread_setup_complete();
3463 
3464     while (migration_is_active(s)) {
3465         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
3466             MigIterateState iter_state = migration_iteration_run(s);
3467             if (iter_state == MIG_ITERATE_SKIP) {
3468                 continue;
3469             } else if (iter_state == MIG_ITERATE_BREAK) {
3470                 break;
3471             }
3472         }
3473 
3474         /*
3475          * Try to detect any kind of failures, and see whether we
3476          * should stop the migration now.
3477          */
3478         thr_error = migration_detect_error(s);
3479         if (thr_error == MIG_THR_ERR_FATAL) {
3480             /* Stop migration */
3481             break;
3482         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3483             /*
3484              * Just recovered from a e.g. network failure, reset all
3485              * the local variables. This is important to avoid
3486              * breaking transferred_bytes and bandwidth calculation
3487              */
3488             update_iteration_initial_status(s);
3489         }
3490 
3491         urgent = migration_rate_limit();
3492     }
3493 
3494     trace_migration_thread_after_loop();
3495     migration_iteration_finish(s);
3496     object_unref(OBJECT(s));
3497     rcu_unregister_thread();
3498     MigrationThreadDel(thread);
3499     return NULL;
3500 }
3501 
3502 static void bg_migration_vm_start_bh(void *opaque)
3503 {
3504     MigrationState *s = opaque;
3505 
3506     qemu_bh_delete(s->vm_start_bh);
3507     s->vm_start_bh = NULL;
3508 
3509     vm_start();
3510     s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
3511 }
3512 
3513 /**
3514  * Background snapshot thread, based on live migration code.
3515  * This is an alternative implementation of live migration mechanism
3516  * introduced specifically to support background snapshots.
3517  *
3518  * It takes advantage of userfault_fd write protection mechanism introduced
3519  * in v5.7 kernel. Compared to existing dirty page logging migration much
3520  * lesser stream traffic is produced resulting in smaller snapshot images,
3521  * simply cause of no page duplicates can get into the stream.
3522  *
3523  * Another key point is that generated vmstate stream reflects machine state
3524  * 'frozen' at the beginning of snapshot creation compared to dirty page logging
3525  * mechanism, which effectively results in that saved snapshot is the state of VM
3526  * at the end of the process.
3527  */
3528 static void *bg_migration_thread(void *opaque)
3529 {
3530     MigrationState *s = opaque;
3531     int64_t setup_start;
3532     MigThrError thr_error;
3533     QEMUFile *fb;
3534     bool early_fail = true;
3535 
3536     rcu_register_thread();
3537     object_ref(OBJECT(s));
3538 
3539     qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
3540 
3541     setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3542     /*
3543      * We want to save vmstate for the moment when migration has been
3544      * initiated but also we want to save RAM content while VM is running.
3545      * The RAM content should appear first in the vmstate. So, we first
3546      * stash the non-RAM part of the vmstate to the temporary buffer,
3547      * then write RAM part of the vmstate to the migration stream
3548      * with vCPUs running and, finally, write stashed non-RAM part of
3549      * the vmstate from the buffer to the migration stream.
3550      */
3551     s->bioc = qio_channel_buffer_new(512 * 1024);
3552     qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
3553     fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
3554     object_unref(OBJECT(s->bioc));
3555 
3556     update_iteration_initial_status(s);
3557 
3558     /*
3559      * Prepare for tracking memory writes with UFFD-WP - populate
3560      * RAM pages before protecting.
3561      */
3562 #ifdef __linux__
3563     ram_write_tracking_prepare();
3564 #endif
3565 
3566     qemu_savevm_state_header(s->to_dst_file);
3567     qemu_savevm_state_setup(s->to_dst_file);
3568 
3569     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3570                                MIGRATION_STATUS_ACTIVE);
3571 
3572     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3573 
3574     trace_migration_thread_setup_complete();
3575     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3576 
3577     qemu_mutex_lock_iothread();
3578 
3579     /*
3580      * If VM is currently in suspended state, then, to make a valid runstate
3581      * transition in vm_stop_force_state() we need to wakeup it up.
3582      */
3583     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3584     s->vm_was_running = runstate_is_running();
3585 
3586     if (global_state_store()) {
3587         goto fail;
3588     }
3589     /* Forcibly stop VM before saving state of vCPUs and devices */
3590     if (vm_stop_force_state(RUN_STATE_PAUSED)) {
3591         goto fail;
3592     }
3593     /*
3594      * Put vCPUs in sync with shadow context structures, then
3595      * save their state to channel-buffer along with devices.
3596      */
3597     cpu_synchronize_all_states();
3598     if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
3599         goto fail;
3600     }
3601     /*
3602      * Since we are going to get non-iterable state data directly
3603      * from s->bioc->data, explicit flush is needed here.
3604      */
3605     qemu_fflush(fb);
3606 
3607     /* Now initialize UFFD context and start tracking RAM writes */
3608     if (ram_write_tracking_start()) {
3609         goto fail;
3610     }
3611     early_fail = false;
3612 
3613     /*
3614      * Start VM from BH handler to avoid write-fault lock here.
3615      * UFFD-WP protection for the whole RAM is already enabled so
3616      * calling VM state change notifiers from vm_start() would initiate
3617      * writes to virtio VQs memory which is in write-protected region.
3618      */
3619     s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
3620     qemu_bh_schedule(s->vm_start_bh);
3621 
3622     qemu_mutex_unlock_iothread();
3623 
3624     while (migration_is_active(s)) {
3625         MigIterateState iter_state = bg_migration_iteration_run(s);
3626         if (iter_state == MIG_ITERATE_SKIP) {
3627             continue;
3628         } else if (iter_state == MIG_ITERATE_BREAK) {
3629             break;
3630         }
3631 
3632         /*
3633          * Try to detect any kind of failures, and see whether we
3634          * should stop the migration now.
3635          */
3636         thr_error = migration_detect_error(s);
3637         if (thr_error == MIG_THR_ERR_FATAL) {
3638             /* Stop migration */
3639             break;
3640         }
3641 
3642         migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
3643     }
3644 
3645     trace_migration_thread_after_loop();
3646 
3647 fail:
3648     if (early_fail) {
3649         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3650                 MIGRATION_STATUS_FAILED);
3651         qemu_mutex_unlock_iothread();
3652     }
3653 
3654     bg_migration_iteration_finish(s);
3655 
3656     qemu_fclose(fb);
3657     object_unref(OBJECT(s));
3658     rcu_unregister_thread();
3659 
3660     return NULL;
3661 }
3662 
3663 void migrate_fd_connect(MigrationState *s, Error *error_in)
3664 {
3665     Error *local_err = NULL;
3666     int64_t rate_limit;
3667     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
3668 
3669     /*
3670      * If there's a previous error, free it and prepare for another one.
3671      * Meanwhile if migration completes successfully, there won't have an error
3672      * dumped when calling migrate_fd_cleanup().
3673      */
3674     migrate_error_free(s);
3675 
3676     s->expected_downtime = s->parameters.downtime_limit;
3677     if (resume) {
3678         assert(s->cleanup_bh);
3679     } else {
3680         assert(!s->cleanup_bh);
3681         s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
3682     }
3683     if (error_in) {
3684         migrate_fd_error(s, error_in);
3685         if (resume) {
3686             /*
3687              * Don't do cleanup for resume if channel is invalid, but only dump
3688              * the error.  We wait for another channel connect from the user.
3689              * The error_report still gives HMP user a hint on what failed.
3690              * It's normally done in migrate_fd_cleanup(), but call it here
3691              * explicitly.
3692              */
3693             error_report_err(error_copy(s->error));
3694         } else {
3695             migrate_fd_cleanup(s);
3696         }
3697         return;
3698     }
3699 
3700     if (resume) {
3701         /* This is a resumed migration */
3702         rate_limit = migrate_max_postcopy_bandwidth() /
3703             XFER_LIMIT_RATIO;
3704     } else {
3705         /* This is a fresh new migration */
3706         rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO;
3707 
3708         /* Notify before starting migration thread */
3709         notifier_list_notify(&migration_state_notifiers, s);
3710     }
3711 
3712     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
3713     qemu_file_set_blocking(s->to_dst_file, true);
3714 
3715     /*
3716      * Open the return path. For postcopy, it is used exclusively. For
3717      * precopy, only if user specified "return-path" capability would
3718      * QEMU uses the return path.
3719      */
3720     if (migrate_postcopy_ram() || migrate_return_path()) {
3721         if (open_return_path_on_source(s, !resume)) {
3722             error_report("Unable to open return-path for postcopy");
3723             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
3724             migrate_fd_cleanup(s);
3725             return;
3726         }
3727     }
3728 
3729     /*
3730      * This needs to be done before resuming a postcopy.  Note: for newer
3731      * QEMUs we will delay the channel creation until postcopy_start(), to
3732      * avoid disorder of channel creations.
3733      */
3734     if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
3735         postcopy_preempt_setup(s);
3736     }
3737 
3738     if (resume) {
3739         /* Wakeup the main migration thread to do the recovery */
3740         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
3741                           MIGRATION_STATUS_POSTCOPY_RECOVER);
3742         qemu_sem_post(&s->postcopy_pause_sem);
3743         return;
3744     }
3745 
3746     if (multifd_save_setup(&local_err) != 0) {
3747         error_report_err(local_err);
3748         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
3749                           MIGRATION_STATUS_FAILED);
3750         migrate_fd_cleanup(s);
3751         return;
3752     }
3753 
3754     if (migrate_background_snapshot()) {
3755         qemu_thread_create(&s->thread, "bg_snapshot",
3756                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
3757     } else {
3758         qemu_thread_create(&s->thread, "live_migration",
3759                 migration_thread, s, QEMU_THREAD_JOINABLE);
3760     }
3761     s->migration_thread_running = true;
3762 }
3763 
3764 #define DEFINE_PROP_MIG_CAP(name, x)             \
3765     DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false)
3766 
3767 static Property migration_properties[] = {
3768     DEFINE_PROP_BOOL("store-global-state", MigrationState,
3769                      store_global_state, true),
3770     DEFINE_PROP_BOOL("send-configuration", MigrationState,
3771                      send_configuration, true),
3772     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
3773                      send_section_footer, true),
3774     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
3775                       decompress_error_check, true),
3776     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
3777                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
3778     DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
3779                      preempt_pre_7_2, false),
3780 
3781     /* Migration parameters */
3782     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
3783                       parameters.compress_level,
3784                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
3785     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
3786                       parameters.compress_threads,
3787                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
3788     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
3789                       parameters.compress_wait_thread, true),
3790     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
3791                       parameters.decompress_threads,
3792                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
3793     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
3794                       parameters.throttle_trigger_threshold,
3795                       DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
3796     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
3797                       parameters.cpu_throttle_initial,
3798                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
3799     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
3800                       parameters.cpu_throttle_increment,
3801                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
3802     DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
3803                       parameters.cpu_throttle_tailslow, false),
3804     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
3805                       parameters.max_bandwidth, MAX_THROTTLE),
3806     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
3807                       parameters.downtime_limit,
3808                       DEFAULT_MIGRATE_SET_DOWNTIME),
3809     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
3810                       parameters.x_checkpoint_delay,
3811                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
3812     DEFINE_PROP_UINT8("multifd-channels", MigrationState,
3813                       parameters.multifd_channels,
3814                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
3815     DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
3816                       parameters.multifd_compression,
3817                       DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
3818     DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
3819                       parameters.multifd_zlib_level,
3820                       DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
3821     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
3822                       parameters.multifd_zstd_level,
3823                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
3824     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
3825                       parameters.xbzrle_cache_size,
3826                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
3827     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
3828                       parameters.max_postcopy_bandwidth,
3829                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
3830     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
3831                       parameters.max_cpu_throttle,
3832                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
3833     DEFINE_PROP_SIZE("announce-initial", MigrationState,
3834                       parameters.announce_initial,
3835                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
3836     DEFINE_PROP_SIZE("announce-max", MigrationState,
3837                       parameters.announce_max,
3838                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
3839     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
3840                       parameters.announce_rounds,
3841                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
3842     DEFINE_PROP_SIZE("announce-step", MigrationState,
3843                       parameters.announce_step,
3844                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
3845     DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
3846     DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
3847     DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
3848 
3849     /* Migration capabilities */
3850     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
3851     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
3852     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
3853     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
3854     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
3855     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
3856     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
3857     DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
3858                         MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
3859     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
3860     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
3861     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
3862     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
3863     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
3864     DEFINE_PROP_MIG_CAP("x-background-snapshot",
3865             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
3866 #ifdef CONFIG_LINUX
3867     DEFINE_PROP_MIG_CAP("x-zero-copy-send",
3868             MIGRATION_CAPABILITY_ZERO_COPY_SEND),
3869 #endif
3870 
3871     DEFINE_PROP_END_OF_LIST(),
3872 };
3873 
3874 static void migration_class_init(ObjectClass *klass, void *data)
3875 {
3876     DeviceClass *dc = DEVICE_CLASS(klass);
3877 
3878     dc->user_creatable = false;
3879     device_class_set_props(dc, migration_properties);
3880 }
3881 
3882 static void migration_instance_finalize(Object *obj)
3883 {
3884     MigrationState *ms = MIGRATION_OBJ(obj);
3885 
3886     qemu_mutex_destroy(&ms->error_mutex);
3887     qemu_mutex_destroy(&ms->qemu_file_lock);
3888     qemu_sem_destroy(&ms->wait_unplug_sem);
3889     qemu_sem_destroy(&ms->rate_limit_sem);
3890     qemu_sem_destroy(&ms->pause_sem);
3891     qemu_sem_destroy(&ms->postcopy_pause_sem);
3892     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
3893     qemu_sem_destroy(&ms->rp_state.rp_sem);
3894     qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
3895     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
3896     error_free(ms->error);
3897 }
3898 
3899 static void migration_instance_init(Object *obj)
3900 {
3901     MigrationState *ms = MIGRATION_OBJ(obj);
3902     MigrationParameters *params = &ms->parameters;
3903 
3904     ms->state = MIGRATION_STATUS_NONE;
3905     ms->mbps = -1;
3906     ms->pages_per_second = -1;
3907     qemu_sem_init(&ms->pause_sem, 0);
3908     qemu_mutex_init(&ms->error_mutex);
3909 
3910     params->tls_hostname = g_strdup("");
3911     params->tls_creds = g_strdup("");
3912 
3913     /* Set has_* up only for parameter checks */
3914     params->has_compress_level = true;
3915     params->has_compress_threads = true;
3916     params->has_compress_wait_thread = true;
3917     params->has_decompress_threads = true;
3918     params->has_throttle_trigger_threshold = true;
3919     params->has_cpu_throttle_initial = true;
3920     params->has_cpu_throttle_increment = true;
3921     params->has_cpu_throttle_tailslow = true;
3922     params->has_max_bandwidth = true;
3923     params->has_downtime_limit = true;
3924     params->has_x_checkpoint_delay = true;
3925     params->has_block_incremental = true;
3926     params->has_multifd_channels = true;
3927     params->has_multifd_compression = true;
3928     params->has_multifd_zlib_level = true;
3929     params->has_multifd_zstd_level = true;
3930     params->has_xbzrle_cache_size = true;
3931     params->has_max_postcopy_bandwidth = true;
3932     params->has_max_cpu_throttle = true;
3933     params->has_announce_initial = true;
3934     params->has_announce_max = true;
3935     params->has_announce_rounds = true;
3936     params->has_announce_step = true;
3937 
3938     qemu_sem_init(&ms->postcopy_pause_sem, 0);
3939     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
3940     qemu_sem_init(&ms->rp_state.rp_sem, 0);
3941     qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
3942     qemu_sem_init(&ms->rate_limit_sem, 0);
3943     qemu_sem_init(&ms->wait_unplug_sem, 0);
3944     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
3945     qemu_mutex_init(&ms->qemu_file_lock);
3946 }
3947 
3948 /*
3949  * Return true if check pass, false otherwise. Error will be put
3950  * inside errp if provided.
3951  */
3952 static bool migration_object_check(MigrationState *ms, Error **errp)
3953 {
3954     /* Assuming all off */
3955     bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 };
3956 
3957     if (!migrate_params_check(&ms->parameters, errp)) {
3958         return false;
3959     }
3960 
3961     return migrate_caps_check(old_caps, ms->capabilities, errp);
3962 }
3963 
3964 static const TypeInfo migration_type = {
3965     .name = TYPE_MIGRATION,
3966     /*
3967      * NOTE: TYPE_MIGRATION is not really a device, as the object is
3968      * not created using qdev_new(), it is not attached to the qdev
3969      * device tree, and it is never realized.
3970      *
3971      * TODO: Make this TYPE_OBJECT once QOM provides something like
3972      * TYPE_DEVICE's "-global" properties.
3973      */
3974     .parent = TYPE_DEVICE,
3975     .class_init = migration_class_init,
3976     .class_size = sizeof(MigrationClass),
3977     .instance_size = sizeof(MigrationState),
3978     .instance_init = migration_instance_init,
3979     .instance_finalize = migration_instance_finalize,
3980 };
3981 
3982 static void register_migration_types(void)
3983 {
3984     type_register_static(&migration_type);
3985 }
3986 
3987 type_init(register_migration_types);
3988