xref: /qemu/migration/migration.c (revision 84615a19)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpu-throttle.h"
27 #include "rdma.h"
28 #include "ram.h"
29 #include "migration/global_state.h"
30 #include "migration/misc.h"
31 #include "migration.h"
32 #include "savevm.h"
33 #include "qemu-file.h"
34 #include "migration/vmstate.h"
35 #include "block/block.h"
36 #include "qapi/error.h"
37 #include "qapi/clone-visitor.h"
38 #include "qapi/qapi-visit-migration.h"
39 #include "qapi/qapi-visit-sockets.h"
40 #include "qapi/qapi-commands-migration.h"
41 #include "qapi/qapi-events-migration.h"
42 #include "qapi/qmp/qerror.h"
43 #include "qapi/qmp/qnull.h"
44 #include "qemu/rcu.h"
45 #include "block.h"
46 #include "postcopy-ram.h"
47 #include "qemu/thread.h"
48 #include "trace.h"
49 #include "exec/target_page.h"
50 #include "io/channel-buffer.h"
51 #include "io/channel-tls.h"
52 #include "migration/colo.h"
53 #include "hw/boards.h"
54 #include "hw/qdev-properties.h"
55 #include "hw/qdev-properties-system.h"
56 #include "monitor/monitor.h"
57 #include "net/announce.h"
58 #include "qemu/queue.h"
59 #include "multifd.h"
60 #include "qemu/yank.h"
61 #include "sysemu/cpus.h"
62 #include "yank_functions.h"
63 #include "sysemu/qtest.h"
64 #include "ui/qemu-spice.h"
65 
66 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
67 
68 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
69  * data. */
70 #define BUFFER_DELAY     100
71 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
72 
73 /* Time in milliseconds we are allowed to stop the source,
74  * for sending the last part */
75 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
76 
77 /* Maximum migrate downtime set to 2000 seconds */
78 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
79 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
80 
81 /* Default compression thread count */
82 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
83 /* Default decompression thread count, usually decompression is at
84  * least 4 times as fast as compression.*/
85 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
86 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
87 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
88 /* Define default autoconverge cpu throttle migration parameters */
89 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
90 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
91 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
92 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
93 
94 /* Migration XBZRLE default cache size */
95 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
96 
97 /* The delay time (in ms) between two COLO checkpoints */
98 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
99 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
100 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
101 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
102 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
103 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
104 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
105 
106 /* Background transfer rate for postcopy, 0 means unlimited, note
107  * that page requests can still exceed this limit.
108  */
109 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
110 
111 /*
112  * Parameters for self_announce_delay giving a stream of RARP/ARP
113  * packets after migration.
114  */
115 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
116 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
117 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
118 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
119 
120 static NotifierList migration_state_notifiers =
121     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
122 
123 /* Messages sent on the return path from destination to source */
124 enum mig_rp_message_type {
125     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
126     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
127     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
128 
129     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
130     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
131     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
132     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
133 
134     MIG_RP_MSG_MAX
135 };
136 
137 /* Migration capabilities set */
138 struct MigrateCapsSet {
139     int size;                       /* Capability set size */
140     MigrationCapability caps[];     /* Variadic array of capabilities */
141 };
142 typedef struct MigrateCapsSet MigrateCapsSet;
143 
144 /* Define and initialize MigrateCapsSet */
145 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...)   \
146     MigrateCapsSet _name = {    \
147         .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
148         .caps = { __VA_ARGS__ } \
149     }
150 
151 /* Background-snapshot compatibility check list */
152 static const
153 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
154     MIGRATION_CAPABILITY_POSTCOPY_RAM,
155     MIGRATION_CAPABILITY_DIRTY_BITMAPS,
156     MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
157     MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
158     MIGRATION_CAPABILITY_RETURN_PATH,
159     MIGRATION_CAPABILITY_MULTIFD,
160     MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
161     MIGRATION_CAPABILITY_AUTO_CONVERGE,
162     MIGRATION_CAPABILITY_RELEASE_RAM,
163     MIGRATION_CAPABILITY_RDMA_PIN_ALL,
164     MIGRATION_CAPABILITY_COMPRESS,
165     MIGRATION_CAPABILITY_XBZRLE,
166     MIGRATION_CAPABILITY_X_COLO,
167     MIGRATION_CAPABILITY_VALIDATE_UUID,
168     MIGRATION_CAPABILITY_ZERO_COPY_SEND);
169 
170 /* When we add fault tolerance, we could have several
171    migrations at once.  For now we don't need to add
172    dynamic creation of migration */
173 
174 static MigrationState *current_migration;
175 static MigrationIncomingState *current_incoming;
176 
177 static GSList *migration_blockers;
178 
179 static bool migration_object_check(MigrationState *ms, Error **errp);
180 static int migration_maybe_pause(MigrationState *s,
181                                  int *current_active_state,
182                                  int new_state);
183 static void migrate_fd_cancel(MigrationState *s);
184 
185 static bool migrate_allow_multi_channels = true;
186 
187 void migrate_protocol_allow_multi_channels(bool allow)
188 {
189     migrate_allow_multi_channels = allow;
190 }
191 
192 bool migrate_multi_channels_is_allowed(void)
193 {
194     return migrate_allow_multi_channels;
195 }
196 
197 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
198 {
199     uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
200 
201     return (a > b) - (a < b);
202 }
203 
204 void migration_object_init(void)
205 {
206     /* This can only be called once. */
207     assert(!current_migration);
208     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
209 
210     /*
211      * Init the migrate incoming object as well no matter whether
212      * we'll use it or not.
213      */
214     assert(!current_incoming);
215     current_incoming = g_new0(MigrationIncomingState, 1);
216     current_incoming->state = MIGRATION_STATUS_NONE;
217     current_incoming->postcopy_remote_fds =
218         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
219     qemu_mutex_init(&current_incoming->rp_mutex);
220     qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
221     qemu_event_init(&current_incoming->main_thread_load_event, false);
222     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
223     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
224     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
225     qemu_mutex_init(&current_incoming->page_request_mutex);
226     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
227 
228     migration_object_check(current_migration, &error_fatal);
229 
230     blk_mig_init();
231     ram_mig_init();
232     dirty_bitmap_mig_init();
233 }
234 
235 void migration_cancel(const Error *error)
236 {
237     if (error) {
238         migrate_set_error(current_migration, error);
239     }
240     migrate_fd_cancel(current_migration);
241 }
242 
243 void migration_shutdown(void)
244 {
245     /*
246      * When the QEMU main thread exit, the COLO thread
247      * may wait a semaphore. So, we should wakeup the
248      * COLO thread before migration shutdown.
249      */
250     colo_shutdown();
251     /*
252      * Cancel the current migration - that will (eventually)
253      * stop the migration using this structure
254      */
255     migration_cancel(NULL);
256     object_unref(OBJECT(current_migration));
257 
258     /*
259      * Cancel outgoing migration of dirty bitmaps. It should
260      * at least unref used block nodes.
261      */
262     dirty_bitmap_mig_cancel_outgoing();
263 
264     /*
265      * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
266      * are non-critical data, and their loss never considered as
267      * something serious.
268      */
269     dirty_bitmap_mig_cancel_incoming();
270 }
271 
272 /* For outgoing */
273 MigrationState *migrate_get_current(void)
274 {
275     /* This can only be called after the object created. */
276     assert(current_migration);
277     return current_migration;
278 }
279 
280 MigrationIncomingState *migration_incoming_get_current(void)
281 {
282     assert(current_incoming);
283     return current_incoming;
284 }
285 
286 void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
287 {
288     if (mis->socket_address_list) {
289         qapi_free_SocketAddressList(mis->socket_address_list);
290         mis->socket_address_list = NULL;
291     }
292 
293     if (mis->transport_cleanup) {
294         mis->transport_cleanup(mis->transport_data);
295         mis->transport_data = mis->transport_cleanup = NULL;
296     }
297 }
298 
299 void migration_incoming_state_destroy(void)
300 {
301     struct MigrationIncomingState *mis = migration_incoming_get_current();
302 
303     if (mis->to_src_file) {
304         /* Tell source that we are done */
305         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
306         qemu_fclose(mis->to_src_file);
307         mis->to_src_file = NULL;
308     }
309 
310     if (mis->from_src_file) {
311         migration_ioc_unregister_yank_from_file(mis->from_src_file);
312         qemu_fclose(mis->from_src_file);
313         mis->from_src_file = NULL;
314     }
315     if (mis->postcopy_remote_fds) {
316         g_array_free(mis->postcopy_remote_fds, TRUE);
317         mis->postcopy_remote_fds = NULL;
318     }
319 
320     migration_incoming_transport_cleanup(mis);
321     qemu_event_reset(&mis->main_thread_load_event);
322 
323     if (mis->page_requested) {
324         g_tree_destroy(mis->page_requested);
325         mis->page_requested = NULL;
326     }
327 
328     if (mis->postcopy_qemufile_dst) {
329         migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
330         qemu_fclose(mis->postcopy_qemufile_dst);
331         mis->postcopy_qemufile_dst = NULL;
332     }
333 
334     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
335 }
336 
337 static void migrate_generate_event(int new_state)
338 {
339     if (migrate_use_events()) {
340         qapi_event_send_migration(new_state);
341     }
342 }
343 
344 static bool migrate_late_block_activate(void)
345 {
346     MigrationState *s;
347 
348     s = migrate_get_current();
349 
350     return s->enabled_capabilities[
351         MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
352 }
353 
354 /*
355  * Send a message on the return channel back to the source
356  * of the migration.
357  */
358 static int migrate_send_rp_message(MigrationIncomingState *mis,
359                                    enum mig_rp_message_type message_type,
360                                    uint16_t len, void *data)
361 {
362     int ret = 0;
363 
364     trace_migrate_send_rp_message((int)message_type, len);
365     QEMU_LOCK_GUARD(&mis->rp_mutex);
366 
367     /*
368      * It's possible that the file handle got lost due to network
369      * failures.
370      */
371     if (!mis->to_src_file) {
372         ret = -EIO;
373         return ret;
374     }
375 
376     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
377     qemu_put_be16(mis->to_src_file, len);
378     qemu_put_buffer(mis->to_src_file, data, len);
379     qemu_fflush(mis->to_src_file);
380 
381     /* It's possible that qemu file got error during sending */
382     ret = qemu_file_get_error(mis->to_src_file);
383 
384     return ret;
385 }
386 
387 /* Request one page from the source VM at the given start address.
388  *   rb: the RAMBlock to request the page in
389  *   Start: Address offset within the RB
390  *   Len: Length in bytes required - must be a multiple of pagesize
391  */
392 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
393                                       RAMBlock *rb, ram_addr_t start)
394 {
395     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
396     size_t msglen = 12; /* start + len */
397     size_t len = qemu_ram_pagesize(rb);
398     enum mig_rp_message_type msg_type;
399     const char *rbname;
400     int rbname_len;
401 
402     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
403     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
404 
405     /*
406      * We maintain the last ramblock that we requested for page.  Note that we
407      * don't need locking because this function will only be called within the
408      * postcopy ram fault thread.
409      */
410     if (rb != mis->last_rb) {
411         mis->last_rb = rb;
412 
413         rbname = qemu_ram_get_idstr(rb);
414         rbname_len = strlen(rbname);
415 
416         assert(rbname_len < 256);
417 
418         bufc[msglen++] = rbname_len;
419         memcpy(bufc + msglen, rbname, rbname_len);
420         msglen += rbname_len;
421         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
422     } else {
423         msg_type = MIG_RP_MSG_REQ_PAGES;
424     }
425 
426     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
427 }
428 
429 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
430                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
431 {
432     void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
433     bool received = false;
434 
435     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
436         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
437         if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
438             /*
439              * The page has not been received, and it's not yet in the page
440              * request list.  Queue it.  Set the value of element to 1, so that
441              * things like g_tree_lookup() will return TRUE (1) when found.
442              */
443             g_tree_insert(mis->page_requested, aligned, (gpointer)1);
444             mis->page_requested_count++;
445             trace_postcopy_page_req_add(aligned, mis->page_requested_count);
446         }
447     }
448 
449     /*
450      * If the page is there, skip sending the message.  We don't even need the
451      * lock because as long as the page arrived, it'll be there forever.
452      */
453     if (received) {
454         return 0;
455     }
456 
457     return migrate_send_rp_message_req_pages(mis, rb, start);
458 }
459 
460 static bool migration_colo_enabled;
461 bool migration_incoming_colo_enabled(void)
462 {
463     return migration_colo_enabled;
464 }
465 
466 void migration_incoming_disable_colo(void)
467 {
468     ram_block_discard_disable(false);
469     migration_colo_enabled = false;
470 }
471 
472 int migration_incoming_enable_colo(void)
473 {
474     if (ram_block_discard_disable(true)) {
475         error_report("COLO: cannot disable RAM discard");
476         return -EBUSY;
477     }
478     migration_colo_enabled = true;
479     return 0;
480 }
481 
482 void migrate_add_address(SocketAddress *address)
483 {
484     MigrationIncomingState *mis = migration_incoming_get_current();
485 
486     QAPI_LIST_PREPEND(mis->socket_address_list,
487                       QAPI_CLONE(SocketAddress, address));
488 }
489 
490 static void qemu_start_incoming_migration(const char *uri, Error **errp)
491 {
492     const char *p = NULL;
493 
494     migrate_protocol_allow_multi_channels(false); /* reset it anyway */
495     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
496     if (strstart(uri, "tcp:", &p) ||
497         strstart(uri, "unix:", NULL) ||
498         strstart(uri, "vsock:", NULL)) {
499         migrate_protocol_allow_multi_channels(true);
500         socket_start_incoming_migration(p ? p : uri, errp);
501 #ifdef CONFIG_RDMA
502     } else if (strstart(uri, "rdma:", &p)) {
503         rdma_start_incoming_migration(p, errp);
504 #endif
505     } else if (strstart(uri, "exec:", &p)) {
506         exec_start_incoming_migration(p, errp);
507     } else if (strstart(uri, "fd:", &p)) {
508         fd_start_incoming_migration(p, errp);
509     } else {
510         error_setg(errp, "unknown migration protocol: %s", uri);
511     }
512 }
513 
514 static void process_incoming_migration_bh(void *opaque)
515 {
516     Error *local_err = NULL;
517     MigrationIncomingState *mis = opaque;
518 
519     /* If capability late_block_activate is set:
520      * Only fire up the block code now if we're going to restart the
521      * VM, else 'cont' will do it.
522      * This causes file locking to happen; so we don't want it to happen
523      * unless we really are starting the VM.
524      */
525     if (!migrate_late_block_activate() ||
526          (autostart && (!global_state_received() ||
527             global_state_get_runstate() == RUN_STATE_RUNNING))) {
528         /* Make sure all file formats throw away their mutable metadata.
529          * If we get an error here, just don't restart the VM yet. */
530         bdrv_activate_all(&local_err);
531         if (local_err) {
532             error_report_err(local_err);
533             local_err = NULL;
534             autostart = false;
535         }
536     }
537 
538     /*
539      * This must happen after all error conditions are dealt with and
540      * we're sure the VM is going to be running on this host.
541      */
542     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
543 
544     if (multifd_load_cleanup(&local_err) != 0) {
545         error_report_err(local_err);
546         autostart = false;
547     }
548     /* If global state section was not received or we are in running
549        state, we need to obey autostart. Any other state is set with
550        runstate_set. */
551 
552     dirty_bitmap_mig_before_vm_start();
553 
554     if (!global_state_received() ||
555         global_state_get_runstate() == RUN_STATE_RUNNING) {
556         if (autostart) {
557             vm_start();
558         } else {
559             runstate_set(RUN_STATE_PAUSED);
560         }
561     } else if (migration_incoming_colo_enabled()) {
562         migration_incoming_disable_colo();
563         vm_start();
564     } else {
565         runstate_set(global_state_get_runstate());
566     }
567     /*
568      * This must happen after any state changes since as soon as an external
569      * observer sees this event they might start to prod at the VM assuming
570      * it's ready to use.
571      */
572     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
573                       MIGRATION_STATUS_COMPLETED);
574     qemu_bh_delete(mis->bh);
575     migration_incoming_state_destroy();
576 }
577 
578 static void coroutine_fn
579 process_incoming_migration_co(void *opaque)
580 {
581     MigrationIncomingState *mis = migration_incoming_get_current();
582     PostcopyState ps;
583     int ret;
584     Error *local_err = NULL;
585 
586     assert(mis->from_src_file);
587     mis->migration_incoming_co = qemu_coroutine_self();
588     mis->largest_page_size = qemu_ram_pagesize_largest();
589     postcopy_state_set(POSTCOPY_INCOMING_NONE);
590     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
591                       MIGRATION_STATUS_ACTIVE);
592     ret = qemu_loadvm_state(mis->from_src_file);
593 
594     ps = postcopy_state_get();
595     trace_process_incoming_migration_co_end(ret, ps);
596     if (ps != POSTCOPY_INCOMING_NONE) {
597         if (ps == POSTCOPY_INCOMING_ADVISE) {
598             /*
599              * Where a migration had postcopy enabled (and thus went to advise)
600              * but managed to complete within the precopy period, we can use
601              * the normal exit.
602              */
603             postcopy_ram_incoming_cleanup(mis);
604         } else if (ret >= 0) {
605             /*
606              * Postcopy was started, cleanup should happen at the end of the
607              * postcopy thread.
608              */
609             trace_process_incoming_migration_co_postcopy_end_main();
610             return;
611         }
612         /* Else if something went wrong then just fall out of the normal exit */
613     }
614 
615     /* we get COLO info, and know if we are in COLO mode */
616     if (!ret && migration_incoming_colo_enabled()) {
617         /* Make sure all file formats throw away their mutable metadata */
618         bdrv_activate_all(&local_err);
619         if (local_err) {
620             error_report_err(local_err);
621             goto fail;
622         }
623 
624         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
625              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
626         mis->have_colo_incoming_thread = true;
627         qemu_coroutine_yield();
628 
629         qemu_mutex_unlock_iothread();
630         /* Wait checkpoint incoming thread exit before free resource */
631         qemu_thread_join(&mis->colo_incoming_thread);
632         qemu_mutex_lock_iothread();
633         /* We hold the global iothread lock, so it is safe here */
634         colo_release_ram_cache();
635     }
636 
637     if (ret < 0) {
638         error_report("load of migration failed: %s", strerror(-ret));
639         goto fail;
640     }
641     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
642     qemu_bh_schedule(mis->bh);
643     mis->migration_incoming_co = NULL;
644     return;
645 fail:
646     local_err = NULL;
647     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
648                       MIGRATION_STATUS_FAILED);
649     qemu_fclose(mis->from_src_file);
650     if (multifd_load_cleanup(&local_err) != 0) {
651         error_report_err(local_err);
652     }
653     exit(EXIT_FAILURE);
654 }
655 
656 /**
657  * migration_incoming_setup: Setup incoming migration
658  * @f: file for main migration channel
659  * @errp: where to put errors
660  *
661  * Returns: %true on success, %false on error.
662  */
663 static bool migration_incoming_setup(QEMUFile *f, Error **errp)
664 {
665     MigrationIncomingState *mis = migration_incoming_get_current();
666 
667     if (multifd_load_setup(errp) != 0) {
668         return false;
669     }
670 
671     if (!mis->from_src_file) {
672         mis->from_src_file = f;
673     }
674     qemu_file_set_blocking(f, false);
675     return true;
676 }
677 
678 void migration_incoming_process(void)
679 {
680     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
681     qemu_coroutine_enter(co);
682 }
683 
684 /* Returns true if recovered from a paused migration, otherwise false */
685 static bool postcopy_try_recover(void)
686 {
687     MigrationIncomingState *mis = migration_incoming_get_current();
688 
689     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
690         /* Resumed from a paused postcopy migration */
691 
692         /* This should be set already in migration_incoming_setup() */
693         assert(mis->from_src_file);
694         /* Postcopy has standalone thread to do vm load */
695         qemu_file_set_blocking(mis->from_src_file, true);
696 
697         /* Re-configure the return path */
698         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
699 
700         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
701                           MIGRATION_STATUS_POSTCOPY_RECOVER);
702 
703         /*
704          * Here, we only wake up the main loading thread (while the
705          * rest threads will still be waiting), so that we can receive
706          * commands from source now, and answer it if needed. The
707          * rest threads will be woken up afterwards until we are sure
708          * that source is ready to reply to page requests.
709          */
710         qemu_sem_post(&mis->postcopy_pause_sem_dst);
711         return true;
712     }
713 
714     return false;
715 }
716 
717 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
718 {
719     if (!migration_incoming_setup(f, errp)) {
720         return;
721     }
722     if (postcopy_try_recover()) {
723         return;
724     }
725     migration_incoming_process();
726 }
727 
728 static bool migration_needs_multiple_sockets(void)
729 {
730     return migrate_use_multifd() || migrate_postcopy_preempt();
731 }
732 
733 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
734 {
735     MigrationIncomingState *mis = migration_incoming_get_current();
736     Error *local_err = NULL;
737     bool start_migration;
738     QEMUFile *f;
739 
740     if (!mis->from_src_file) {
741         /* The first connection (multifd may have multiple) */
742         f = qemu_file_new_input(ioc);
743 
744         if (!migration_incoming_setup(f, errp)) {
745             return;
746         }
747 
748         /*
749          * Common migration only needs one channel, so we can start
750          * right now.  Some features need more than one channel, we wait.
751          */
752         start_migration = !migration_needs_multiple_sockets();
753     } else {
754         /* Multiple connections */
755         assert(migration_needs_multiple_sockets());
756         if (migrate_use_multifd()) {
757             start_migration = multifd_recv_new_channel(ioc, &local_err);
758         } else {
759             assert(migrate_postcopy_preempt());
760             f = qemu_file_new_input(ioc);
761             start_migration = postcopy_preempt_new_channel(mis, f);
762         }
763         if (local_err) {
764             error_propagate(errp, local_err);
765             return;
766         }
767     }
768 
769     if (start_migration) {
770         /* If it's a recovery, we're done */
771         if (postcopy_try_recover()) {
772             return;
773         }
774         migration_incoming_process();
775     }
776 }
777 
778 /**
779  * @migration_has_all_channels: We have received all channels that we need
780  *
781  * Returns true when we have got connections to all the channels that
782  * we need for migration.
783  */
784 bool migration_has_all_channels(void)
785 {
786     MigrationIncomingState *mis = migration_incoming_get_current();
787 
788     if (!mis->from_src_file) {
789         return false;
790     }
791 
792     if (migrate_use_multifd()) {
793         return multifd_recv_all_channels_created();
794     }
795 
796     if (migrate_postcopy_preempt()) {
797         return mis->postcopy_qemufile_dst != NULL;
798     }
799 
800     return true;
801 }
802 
803 /*
804  * Send a 'SHUT' message on the return channel with the given value
805  * to indicate that we've finished with the RP.  Non-0 value indicates
806  * error.
807  */
808 void migrate_send_rp_shut(MigrationIncomingState *mis,
809                           uint32_t value)
810 {
811     uint32_t buf;
812 
813     buf = cpu_to_be32(value);
814     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
815 }
816 
817 /*
818  * Send a 'PONG' message on the return channel with the given value
819  * (normally in response to a 'PING')
820  */
821 void migrate_send_rp_pong(MigrationIncomingState *mis,
822                           uint32_t value)
823 {
824     uint32_t buf;
825 
826     buf = cpu_to_be32(value);
827     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
828 }
829 
830 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
831                                  char *block_name)
832 {
833     char buf[512];
834     int len;
835     int64_t res;
836 
837     /*
838      * First, we send the header part. It contains only the len of
839      * idstr, and the idstr itself.
840      */
841     len = strlen(block_name);
842     buf[0] = len;
843     memcpy(buf + 1, block_name, len);
844 
845     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
846         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
847                      __func__);
848         return;
849     }
850 
851     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
852 
853     /*
854      * Next, we dump the received bitmap to the stream.
855      *
856      * TODO: currently we are safe since we are the only one that is
857      * using the to_src_file handle (fault thread is still paused),
858      * and it's ok even not taking the mutex. However the best way is
859      * to take the lock before sending the message header, and release
860      * the lock after sending the bitmap.
861      */
862     qemu_mutex_lock(&mis->rp_mutex);
863     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
864     qemu_mutex_unlock(&mis->rp_mutex);
865 
866     trace_migrate_send_rp_recv_bitmap(block_name, res);
867 }
868 
869 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
870 {
871     uint32_t buf;
872 
873     buf = cpu_to_be32(value);
874     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
875 }
876 
877 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
878 {
879     MigrationCapabilityStatusList *head = NULL, **tail = &head;
880     MigrationCapabilityStatus *caps;
881     MigrationState *s = migrate_get_current();
882     int i;
883 
884     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
885 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
886         if (i == MIGRATION_CAPABILITY_BLOCK) {
887             continue;
888         }
889 #endif
890         caps = g_malloc0(sizeof(*caps));
891         caps->capability = i;
892         caps->state = s->enabled_capabilities[i];
893         QAPI_LIST_APPEND(tail, caps);
894     }
895 
896     return head;
897 }
898 
899 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
900 {
901     MigrationParameters *params;
902     MigrationState *s = migrate_get_current();
903 
904     /* TODO use QAPI_CLONE() instead of duplicating it inline */
905     params = g_malloc0(sizeof(*params));
906     params->has_compress_level = true;
907     params->compress_level = s->parameters.compress_level;
908     params->has_compress_threads = true;
909     params->compress_threads = s->parameters.compress_threads;
910     params->has_compress_wait_thread = true;
911     params->compress_wait_thread = s->parameters.compress_wait_thread;
912     params->has_decompress_threads = true;
913     params->decompress_threads = s->parameters.decompress_threads;
914     params->has_throttle_trigger_threshold = true;
915     params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
916     params->has_cpu_throttle_initial = true;
917     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
918     params->has_cpu_throttle_increment = true;
919     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
920     params->has_cpu_throttle_tailslow = true;
921     params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
922     params->tls_creds = g_strdup(s->parameters.tls_creds);
923     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
924     params->tls_authz = g_strdup(s->parameters.tls_authz ?
925                                  s->parameters.tls_authz : "");
926     params->has_max_bandwidth = true;
927     params->max_bandwidth = s->parameters.max_bandwidth;
928     params->has_downtime_limit = true;
929     params->downtime_limit = s->parameters.downtime_limit;
930     params->has_x_checkpoint_delay = true;
931     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
932     params->has_block_incremental = true;
933     params->block_incremental = s->parameters.block_incremental;
934     params->has_multifd_channels = true;
935     params->multifd_channels = s->parameters.multifd_channels;
936     params->has_multifd_compression = true;
937     params->multifd_compression = s->parameters.multifd_compression;
938     params->has_multifd_zlib_level = true;
939     params->multifd_zlib_level = s->parameters.multifd_zlib_level;
940     params->has_multifd_zstd_level = true;
941     params->multifd_zstd_level = s->parameters.multifd_zstd_level;
942     params->has_xbzrle_cache_size = true;
943     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
944     params->has_max_postcopy_bandwidth = true;
945     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
946     params->has_max_cpu_throttle = true;
947     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
948     params->has_announce_initial = true;
949     params->announce_initial = s->parameters.announce_initial;
950     params->has_announce_max = true;
951     params->announce_max = s->parameters.announce_max;
952     params->has_announce_rounds = true;
953     params->announce_rounds = s->parameters.announce_rounds;
954     params->has_announce_step = true;
955     params->announce_step = s->parameters.announce_step;
956 
957     if (s->parameters.has_block_bitmap_mapping) {
958         params->has_block_bitmap_mapping = true;
959         params->block_bitmap_mapping =
960             QAPI_CLONE(BitmapMigrationNodeAliasList,
961                        s->parameters.block_bitmap_mapping);
962     }
963 
964     return params;
965 }
966 
967 void qmp_client_migrate_info(const char *protocol, const char *hostname,
968                              bool has_port, int64_t port,
969                              bool has_tls_port, int64_t tls_port,
970                              const char *cert_subject,
971                              Error **errp)
972 {
973     if (strcmp(protocol, "spice") == 0) {
974         if (!qemu_using_spice(errp)) {
975             return;
976         }
977 
978         if (!has_port && !has_tls_port) {
979             error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
980             return;
981         }
982 
983         if (qemu_spice.migrate_info(hostname,
984                                     has_port ? port : -1,
985                                     has_tls_port ? tls_port : -1,
986                                     cert_subject)) {
987             error_setg(errp, "Could not set up display for migration");
988             return;
989         }
990         return;
991     }
992 
993     error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
994 }
995 
996 AnnounceParameters *migrate_announce_params(void)
997 {
998     static AnnounceParameters ap;
999 
1000     MigrationState *s = migrate_get_current();
1001 
1002     ap.initial = s->parameters.announce_initial;
1003     ap.max = s->parameters.announce_max;
1004     ap.rounds = s->parameters.announce_rounds;
1005     ap.step = s->parameters.announce_step;
1006 
1007     return &ap;
1008 }
1009 
1010 /*
1011  * Return true if we're already in the middle of a migration
1012  * (i.e. any of the active or setup states)
1013  */
1014 bool migration_is_setup_or_active(int state)
1015 {
1016     switch (state) {
1017     case MIGRATION_STATUS_ACTIVE:
1018     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1019     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1020     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1021     case MIGRATION_STATUS_SETUP:
1022     case MIGRATION_STATUS_PRE_SWITCHOVER:
1023     case MIGRATION_STATUS_DEVICE:
1024     case MIGRATION_STATUS_WAIT_UNPLUG:
1025     case MIGRATION_STATUS_COLO:
1026         return true;
1027 
1028     default:
1029         return false;
1030 
1031     }
1032 }
1033 
1034 bool migration_is_running(int state)
1035 {
1036     switch (state) {
1037     case MIGRATION_STATUS_ACTIVE:
1038     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1039     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1040     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1041     case MIGRATION_STATUS_SETUP:
1042     case MIGRATION_STATUS_PRE_SWITCHOVER:
1043     case MIGRATION_STATUS_DEVICE:
1044     case MIGRATION_STATUS_WAIT_UNPLUG:
1045     case MIGRATION_STATUS_CANCELLING:
1046         return true;
1047 
1048     default:
1049         return false;
1050 
1051     }
1052 }
1053 
1054 static bool migrate_show_downtime(MigrationState *s)
1055 {
1056     return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
1057 }
1058 
1059 static void populate_time_info(MigrationInfo *info, MigrationState *s)
1060 {
1061     info->has_status = true;
1062     info->has_setup_time = true;
1063     info->setup_time = s->setup_time;
1064 
1065     if (s->state == MIGRATION_STATUS_COMPLETED) {
1066         info->has_total_time = true;
1067         info->total_time = s->total_time;
1068     } else {
1069         info->has_total_time = true;
1070         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
1071                            s->start_time;
1072     }
1073 
1074     if (migrate_show_downtime(s)) {
1075         info->has_downtime = true;
1076         info->downtime = s->downtime;
1077     } else {
1078         info->has_expected_downtime = true;
1079         info->expected_downtime = s->expected_downtime;
1080     }
1081 }
1082 
1083 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
1084 {
1085     size_t page_size = qemu_target_page_size();
1086 
1087     info->ram = g_malloc0(sizeof(*info->ram));
1088     info->ram->transferred = stat64_get(&ram_atomic_counters.transferred);
1089     info->ram->total = ram_bytes_total();
1090     info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate);
1091     /* legacy value.  It is not used anymore */
1092     info->ram->skipped = 0;
1093     info->ram->normal = stat64_get(&ram_atomic_counters.normal);
1094     info->ram->normal_bytes = info->ram->normal * page_size;
1095     info->ram->mbps = s->mbps;
1096     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
1097     info->ram->dirty_sync_missed_zero_copy =
1098             ram_counters.dirty_sync_missed_zero_copy;
1099     info->ram->postcopy_requests = ram_counters.postcopy_requests;
1100     info->ram->page_size = page_size;
1101     info->ram->multifd_bytes = ram_counters.multifd_bytes;
1102     info->ram->pages_per_second = s->pages_per_second;
1103     info->ram->precopy_bytes = ram_counters.precopy_bytes;
1104     info->ram->downtime_bytes = ram_counters.downtime_bytes;
1105     info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes);
1106 
1107     if (migrate_use_xbzrle()) {
1108         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
1109         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
1110         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
1111         info->xbzrle_cache->pages = xbzrle_counters.pages;
1112         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
1113         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
1114         info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
1115         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
1116     }
1117 
1118     if (migrate_use_compression()) {
1119         info->compression = g_malloc0(sizeof(*info->compression));
1120         info->compression->pages = compression_counters.pages;
1121         info->compression->busy = compression_counters.busy;
1122         info->compression->busy_rate = compression_counters.busy_rate;
1123         info->compression->compressed_size =
1124                                     compression_counters.compressed_size;
1125         info->compression->compression_rate =
1126                                     compression_counters.compression_rate;
1127     }
1128 
1129     if (cpu_throttle_active()) {
1130         info->has_cpu_throttle_percentage = true;
1131         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1132     }
1133 
1134     if (s->state != MIGRATION_STATUS_COMPLETED) {
1135         info->ram->remaining = ram_bytes_remaining();
1136         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
1137     }
1138 }
1139 
1140 static void populate_disk_info(MigrationInfo *info)
1141 {
1142     if (blk_mig_active()) {
1143         info->disk = g_malloc0(sizeof(*info->disk));
1144         info->disk->transferred = blk_mig_bytes_transferred();
1145         info->disk->remaining = blk_mig_bytes_remaining();
1146         info->disk->total = blk_mig_bytes_total();
1147     }
1148 }
1149 
1150 static void fill_source_migration_info(MigrationInfo *info)
1151 {
1152     MigrationState *s = migrate_get_current();
1153     int state = qatomic_read(&s->state);
1154     GSList *cur_blocker = migration_blockers;
1155 
1156     info->blocked_reasons = NULL;
1157 
1158     /*
1159      * There are two types of reasons a migration might be blocked;
1160      * a) devices marked in VMState as non-migratable, and
1161      * b) Explicit migration blockers
1162      * We need to add both of them here.
1163      */
1164     qemu_savevm_non_migratable_list(&info->blocked_reasons);
1165 
1166     while (cur_blocker) {
1167         QAPI_LIST_PREPEND(info->blocked_reasons,
1168                           g_strdup(error_get_pretty(cur_blocker->data)));
1169         cur_blocker = g_slist_next(cur_blocker);
1170     }
1171     info->has_blocked_reasons = info->blocked_reasons != NULL;
1172 
1173     switch (state) {
1174     case MIGRATION_STATUS_NONE:
1175         /* no migration has happened ever */
1176         /* do not overwrite destination migration status */
1177         return;
1178     case MIGRATION_STATUS_SETUP:
1179         info->has_status = true;
1180         info->has_total_time = false;
1181         break;
1182     case MIGRATION_STATUS_ACTIVE:
1183     case MIGRATION_STATUS_CANCELLING:
1184     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1185     case MIGRATION_STATUS_PRE_SWITCHOVER:
1186     case MIGRATION_STATUS_DEVICE:
1187     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1188     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1189         /* TODO add some postcopy stats */
1190         populate_time_info(info, s);
1191         populate_ram_info(info, s);
1192         populate_disk_info(info);
1193         populate_vfio_info(info);
1194         break;
1195     case MIGRATION_STATUS_COLO:
1196         info->has_status = true;
1197         /* TODO: display COLO specific information (checkpoint info etc.) */
1198         break;
1199     case MIGRATION_STATUS_COMPLETED:
1200         populate_time_info(info, s);
1201         populate_ram_info(info, s);
1202         populate_vfio_info(info);
1203         break;
1204     case MIGRATION_STATUS_FAILED:
1205         info->has_status = true;
1206         if (s->error) {
1207             info->error_desc = g_strdup(error_get_pretty(s->error));
1208         }
1209         break;
1210     case MIGRATION_STATUS_CANCELLED:
1211         info->has_status = true;
1212         break;
1213     case MIGRATION_STATUS_WAIT_UNPLUG:
1214         info->has_status = true;
1215         break;
1216     }
1217     info->status = state;
1218 }
1219 
1220 typedef enum WriteTrackingSupport {
1221     WT_SUPPORT_UNKNOWN = 0,
1222     WT_SUPPORT_ABSENT,
1223     WT_SUPPORT_AVAILABLE,
1224     WT_SUPPORT_COMPATIBLE
1225 } WriteTrackingSupport;
1226 
1227 static
1228 WriteTrackingSupport migrate_query_write_tracking(void)
1229 {
1230     /* Check if kernel supports required UFFD features */
1231     if (!ram_write_tracking_available()) {
1232         return WT_SUPPORT_ABSENT;
1233     }
1234     /*
1235      * Check if current memory configuration is
1236      * compatible with required UFFD features.
1237      */
1238     if (!ram_write_tracking_compatible()) {
1239         return WT_SUPPORT_AVAILABLE;
1240     }
1241 
1242     return WT_SUPPORT_COMPATIBLE;
1243 }
1244 
1245 /**
1246  * @migration_caps_check - check capability validity
1247  *
1248  * @cap_list: old capability list, array of bool
1249  * @params: new capabilities to be applied soon
1250  * @errp: set *errp if the check failed, with reason
1251  *
1252  * Returns true if check passed, otherwise false.
1253  */
1254 static bool migrate_caps_check(bool *cap_list,
1255                                MigrationCapabilityStatusList *params,
1256                                Error **errp)
1257 {
1258     MigrationCapabilityStatusList *cap;
1259     bool old_postcopy_cap;
1260     MigrationIncomingState *mis = migration_incoming_get_current();
1261 
1262     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1263 
1264     for (cap = params; cap; cap = cap->next) {
1265         cap_list[cap->value->capability] = cap->value->state;
1266     }
1267 
1268 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
1269     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
1270         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
1271                    "block migration");
1272         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
1273         return false;
1274     }
1275 #endif
1276 
1277 #ifndef CONFIG_REPLICATION
1278     if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
1279         error_setg(errp, "QEMU compiled without replication module"
1280                    " can't enable COLO");
1281         error_append_hint(errp, "Please enable replication before COLO.\n");
1282         return false;
1283     }
1284 #endif
1285 
1286     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
1287         /* This check is reasonably expensive, so only when it's being
1288          * set the first time, also it's only the destination that needs
1289          * special support.
1290          */
1291         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
1292             !postcopy_ram_supported_by_host(mis)) {
1293             /* postcopy_ram_supported_by_host will have emitted a more
1294              * detailed message
1295              */
1296             error_setg(errp, "Postcopy is not supported");
1297             return false;
1298         }
1299 
1300         if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
1301             error_setg(errp, "Postcopy is not compatible with ignore-shared");
1302             return false;
1303         }
1304     }
1305 
1306     if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
1307         WriteTrackingSupport wt_support;
1308         int idx;
1309         /*
1310          * Check if 'background-snapshot' capability is supported by
1311          * host kernel and compatible with guest memory configuration.
1312          */
1313         wt_support = migrate_query_write_tracking();
1314         if (wt_support < WT_SUPPORT_AVAILABLE) {
1315             error_setg(errp, "Background-snapshot is not supported by host kernel");
1316             return false;
1317         }
1318         if (wt_support < WT_SUPPORT_COMPATIBLE) {
1319             error_setg(errp, "Background-snapshot is not compatible "
1320                     "with guest memory configuration");
1321             return false;
1322         }
1323 
1324         /*
1325          * Check if there are any migration capabilities
1326          * incompatible with 'background-snapshot'.
1327          */
1328         for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
1329             int incomp_cap = check_caps_background_snapshot.caps[idx];
1330             if (cap_list[incomp_cap]) {
1331                 error_setg(errp,
1332                         "Background-snapshot is not compatible with %s",
1333                         MigrationCapability_str(incomp_cap));
1334                 return false;
1335             }
1336         }
1337     }
1338 
1339 #ifdef CONFIG_LINUX
1340     if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
1341         (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
1342          cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
1343          cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
1344          migrate_multifd_compression() ||
1345          migrate_use_tls())) {
1346         error_setg(errp,
1347                    "Zero copy only available for non-compressed non-TLS multifd migration");
1348         return false;
1349     }
1350 #else
1351     if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
1352         error_setg(errp,
1353                    "Zero copy currently only available on Linux");
1354         return false;
1355     }
1356 #endif
1357 
1358 
1359     /* incoming side only */
1360     if (runstate_check(RUN_STATE_INMIGRATE) &&
1361         !migrate_multi_channels_is_allowed() &&
1362         cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
1363         error_setg(errp, "multifd is not supported by current protocol");
1364         return false;
1365     }
1366 
1367     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
1368         if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
1369             error_setg(errp, "Postcopy preempt requires postcopy-ram");
1370             return false;
1371         }
1372 
1373         /*
1374          * Preempt mode requires urgent pages to be sent in separate
1375          * channel, OTOH compression logic will disorder all pages into
1376          * different compression channels, which is not compatible with the
1377          * preempt assumptions on channel assignments.
1378          */
1379         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
1380             error_setg(errp, "Postcopy preempt not compatible with compress");
1381             return false;
1382         }
1383     }
1384 
1385     if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
1386         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
1387             error_setg(errp, "Multifd is not compatible with compress");
1388             return false;
1389         }
1390     }
1391 
1392     return true;
1393 }
1394 
1395 static void fill_destination_migration_info(MigrationInfo *info)
1396 {
1397     MigrationIncomingState *mis = migration_incoming_get_current();
1398 
1399     if (mis->socket_address_list) {
1400         info->has_socket_address = true;
1401         info->socket_address =
1402             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1403     }
1404 
1405     switch (mis->state) {
1406     case MIGRATION_STATUS_NONE:
1407         return;
1408     case MIGRATION_STATUS_SETUP:
1409     case MIGRATION_STATUS_CANCELLING:
1410     case MIGRATION_STATUS_CANCELLED:
1411     case MIGRATION_STATUS_ACTIVE:
1412     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1413     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1414     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1415     case MIGRATION_STATUS_FAILED:
1416     case MIGRATION_STATUS_COLO:
1417         info->has_status = true;
1418         break;
1419     case MIGRATION_STATUS_COMPLETED:
1420         info->has_status = true;
1421         fill_destination_postcopy_migration_info(info);
1422         break;
1423     }
1424     info->status = mis->state;
1425 }
1426 
1427 MigrationInfo *qmp_query_migrate(Error **errp)
1428 {
1429     MigrationInfo *info = g_malloc0(sizeof(*info));
1430 
1431     fill_destination_migration_info(info);
1432     fill_source_migration_info(info);
1433 
1434     return info;
1435 }
1436 
1437 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
1438                                   Error **errp)
1439 {
1440     MigrationState *s = migrate_get_current();
1441     MigrationCapabilityStatusList *cap;
1442     bool cap_list[MIGRATION_CAPABILITY__MAX];
1443 
1444     if (migration_is_running(s->state)) {
1445         error_setg(errp, QERR_MIGRATION_ACTIVE);
1446         return;
1447     }
1448 
1449     memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
1450     if (!migrate_caps_check(cap_list, params, errp)) {
1451         return;
1452     }
1453 
1454     for (cap = params; cap; cap = cap->next) {
1455         s->enabled_capabilities[cap->value->capability] = cap->value->state;
1456     }
1457 }
1458 
1459 /*
1460  * Check whether the parameters are valid. Error will be put into errp
1461  * (if provided). Return true if valid, otherwise false.
1462  */
1463 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1464 {
1465     if (params->has_compress_level &&
1466         (params->compress_level > 9)) {
1467         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1468                    "a value between 0 and 9");
1469         return false;
1470     }
1471 
1472     if (params->has_compress_threads && (params->compress_threads < 1)) {
1473         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1474                    "compress_threads",
1475                    "a value between 1 and 255");
1476         return false;
1477     }
1478 
1479     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1480         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1481                    "decompress_threads",
1482                    "a value between 1 and 255");
1483         return false;
1484     }
1485 
1486     if (params->has_throttle_trigger_threshold &&
1487         (params->throttle_trigger_threshold < 1 ||
1488          params->throttle_trigger_threshold > 100)) {
1489         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1490                    "throttle_trigger_threshold",
1491                    "an integer in the range of 1 to 100");
1492         return false;
1493     }
1494 
1495     if (params->has_cpu_throttle_initial &&
1496         (params->cpu_throttle_initial < 1 ||
1497          params->cpu_throttle_initial > 99)) {
1498         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1499                    "cpu_throttle_initial",
1500                    "an integer in the range of 1 to 99");
1501         return false;
1502     }
1503 
1504     if (params->has_cpu_throttle_increment &&
1505         (params->cpu_throttle_increment < 1 ||
1506          params->cpu_throttle_increment > 99)) {
1507         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1508                    "cpu_throttle_increment",
1509                    "an integer in the range of 1 to 99");
1510         return false;
1511     }
1512 
1513     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1514         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1515                    "max_bandwidth",
1516                    "an integer in the range of 0 to "stringify(SIZE_MAX)
1517                    " bytes/second");
1518         return false;
1519     }
1520 
1521     if (params->has_downtime_limit &&
1522         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1523         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1524                    "downtime_limit",
1525                    "an integer in the range of 0 to "
1526                     stringify(MAX_MIGRATE_DOWNTIME)" ms");
1527         return false;
1528     }
1529 
1530     /* x_checkpoint_delay is now always positive */
1531 
1532     if (params->has_multifd_channels && (params->multifd_channels < 1)) {
1533         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1534                    "multifd_channels",
1535                    "a value between 1 and 255");
1536         return false;
1537     }
1538 
1539     if (params->has_multifd_zlib_level &&
1540         (params->multifd_zlib_level > 9)) {
1541         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
1542                    "a value between 0 and 9");
1543         return false;
1544     }
1545 
1546     if (params->has_multifd_zstd_level &&
1547         (params->multifd_zstd_level > 20)) {
1548         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
1549                    "a value between 0 and 20");
1550         return false;
1551     }
1552 
1553     if (params->has_xbzrle_cache_size &&
1554         (params->xbzrle_cache_size < qemu_target_page_size() ||
1555          !is_power_of_2(params->xbzrle_cache_size))) {
1556         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1557                    "xbzrle_cache_size",
1558                    "a power of two no less than the target page size");
1559         return false;
1560     }
1561 
1562     if (params->has_max_cpu_throttle &&
1563         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1564          params->max_cpu_throttle > 99)) {
1565         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1566                    "max_cpu_throttle",
1567                    "an integer in the range of cpu_throttle_initial to 99");
1568         return false;
1569     }
1570 
1571     if (params->has_announce_initial &&
1572         params->announce_initial > 100000) {
1573         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1574                    "announce_initial",
1575                    "a value between 0 and 100000");
1576         return false;
1577     }
1578     if (params->has_announce_max &&
1579         params->announce_max > 100000) {
1580         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1581                    "announce_max",
1582                    "a value between 0 and 100000");
1583        return false;
1584     }
1585     if (params->has_announce_rounds &&
1586         params->announce_rounds > 1000) {
1587         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1588                    "announce_rounds",
1589                    "a value between 0 and 1000");
1590        return false;
1591     }
1592     if (params->has_announce_step &&
1593         (params->announce_step < 1 ||
1594         params->announce_step > 10000)) {
1595         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1596                    "announce_step",
1597                    "a value between 0 and 10000");
1598        return false;
1599     }
1600 
1601     if (params->has_block_bitmap_mapping &&
1602         !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
1603         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
1604         return false;
1605     }
1606 
1607 #ifdef CONFIG_LINUX
1608     if (migrate_use_zero_copy_send() &&
1609         ((params->has_multifd_compression && params->multifd_compression) ||
1610          (params->tls_creds && *params->tls_creds))) {
1611         error_setg(errp,
1612                    "Zero copy only available for non-compressed non-TLS multifd migration");
1613         return false;
1614     }
1615 #endif
1616 
1617     return true;
1618 }
1619 
1620 static void migrate_params_test_apply(MigrateSetParameters *params,
1621                                       MigrationParameters *dest)
1622 {
1623     *dest = migrate_get_current()->parameters;
1624 
1625     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1626 
1627     if (params->has_compress_level) {
1628         dest->compress_level = params->compress_level;
1629     }
1630 
1631     if (params->has_compress_threads) {
1632         dest->compress_threads = params->compress_threads;
1633     }
1634 
1635     if (params->has_compress_wait_thread) {
1636         dest->compress_wait_thread = params->compress_wait_thread;
1637     }
1638 
1639     if (params->has_decompress_threads) {
1640         dest->decompress_threads = params->decompress_threads;
1641     }
1642 
1643     if (params->has_throttle_trigger_threshold) {
1644         dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
1645     }
1646 
1647     if (params->has_cpu_throttle_initial) {
1648         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1649     }
1650 
1651     if (params->has_cpu_throttle_increment) {
1652         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1653     }
1654 
1655     if (params->has_cpu_throttle_tailslow) {
1656         dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1657     }
1658 
1659     if (params->tls_creds) {
1660         assert(params->tls_creds->type == QTYPE_QSTRING);
1661         dest->tls_creds = params->tls_creds->u.s;
1662     }
1663 
1664     if (params->tls_hostname) {
1665         assert(params->tls_hostname->type == QTYPE_QSTRING);
1666         dest->tls_hostname = params->tls_hostname->u.s;
1667     }
1668 
1669     if (params->has_max_bandwidth) {
1670         dest->max_bandwidth = params->max_bandwidth;
1671     }
1672 
1673     if (params->has_downtime_limit) {
1674         dest->downtime_limit = params->downtime_limit;
1675     }
1676 
1677     if (params->has_x_checkpoint_delay) {
1678         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1679     }
1680 
1681     if (params->has_block_incremental) {
1682         dest->block_incremental = params->block_incremental;
1683     }
1684     if (params->has_multifd_channels) {
1685         dest->multifd_channels = params->multifd_channels;
1686     }
1687     if (params->has_multifd_compression) {
1688         dest->multifd_compression = params->multifd_compression;
1689     }
1690     if (params->has_xbzrle_cache_size) {
1691         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1692     }
1693     if (params->has_max_postcopy_bandwidth) {
1694         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1695     }
1696     if (params->has_max_cpu_throttle) {
1697         dest->max_cpu_throttle = params->max_cpu_throttle;
1698     }
1699     if (params->has_announce_initial) {
1700         dest->announce_initial = params->announce_initial;
1701     }
1702     if (params->has_announce_max) {
1703         dest->announce_max = params->announce_max;
1704     }
1705     if (params->has_announce_rounds) {
1706         dest->announce_rounds = params->announce_rounds;
1707     }
1708     if (params->has_announce_step) {
1709         dest->announce_step = params->announce_step;
1710     }
1711 
1712     if (params->has_block_bitmap_mapping) {
1713         dest->has_block_bitmap_mapping = true;
1714         dest->block_bitmap_mapping = params->block_bitmap_mapping;
1715     }
1716 }
1717 
1718 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1719 {
1720     MigrationState *s = migrate_get_current();
1721 
1722     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1723 
1724     if (params->has_compress_level) {
1725         s->parameters.compress_level = params->compress_level;
1726     }
1727 
1728     if (params->has_compress_threads) {
1729         s->parameters.compress_threads = params->compress_threads;
1730     }
1731 
1732     if (params->has_compress_wait_thread) {
1733         s->parameters.compress_wait_thread = params->compress_wait_thread;
1734     }
1735 
1736     if (params->has_decompress_threads) {
1737         s->parameters.decompress_threads = params->decompress_threads;
1738     }
1739 
1740     if (params->has_throttle_trigger_threshold) {
1741         s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
1742     }
1743 
1744     if (params->has_cpu_throttle_initial) {
1745         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1746     }
1747 
1748     if (params->has_cpu_throttle_increment) {
1749         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1750     }
1751 
1752     if (params->has_cpu_throttle_tailslow) {
1753         s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1754     }
1755 
1756     if (params->tls_creds) {
1757         g_free(s->parameters.tls_creds);
1758         assert(params->tls_creds->type == QTYPE_QSTRING);
1759         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1760     }
1761 
1762     if (params->tls_hostname) {
1763         g_free(s->parameters.tls_hostname);
1764         assert(params->tls_hostname->type == QTYPE_QSTRING);
1765         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1766     }
1767 
1768     if (params->tls_authz) {
1769         g_free(s->parameters.tls_authz);
1770         assert(params->tls_authz->type == QTYPE_QSTRING);
1771         s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
1772     }
1773 
1774     if (params->has_max_bandwidth) {
1775         s->parameters.max_bandwidth = params->max_bandwidth;
1776         if (s->to_dst_file && !migration_in_postcopy()) {
1777             qemu_file_set_rate_limit(s->to_dst_file,
1778                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1779         }
1780     }
1781 
1782     if (params->has_downtime_limit) {
1783         s->parameters.downtime_limit = params->downtime_limit;
1784     }
1785 
1786     if (params->has_x_checkpoint_delay) {
1787         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1788         if (migration_in_colo_state()) {
1789             colo_checkpoint_notify(s);
1790         }
1791     }
1792 
1793     if (params->has_block_incremental) {
1794         s->parameters.block_incremental = params->block_incremental;
1795     }
1796     if (params->has_multifd_channels) {
1797         s->parameters.multifd_channels = params->multifd_channels;
1798     }
1799     if (params->has_multifd_compression) {
1800         s->parameters.multifd_compression = params->multifd_compression;
1801     }
1802     if (params->has_xbzrle_cache_size) {
1803         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1804         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1805     }
1806     if (params->has_max_postcopy_bandwidth) {
1807         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1808         if (s->to_dst_file && migration_in_postcopy()) {
1809             qemu_file_set_rate_limit(s->to_dst_file,
1810                     s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
1811         }
1812     }
1813     if (params->has_max_cpu_throttle) {
1814         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1815     }
1816     if (params->has_announce_initial) {
1817         s->parameters.announce_initial = params->announce_initial;
1818     }
1819     if (params->has_announce_max) {
1820         s->parameters.announce_max = params->announce_max;
1821     }
1822     if (params->has_announce_rounds) {
1823         s->parameters.announce_rounds = params->announce_rounds;
1824     }
1825     if (params->has_announce_step) {
1826         s->parameters.announce_step = params->announce_step;
1827     }
1828 
1829     if (params->has_block_bitmap_mapping) {
1830         qapi_free_BitmapMigrationNodeAliasList(
1831             s->parameters.block_bitmap_mapping);
1832 
1833         s->parameters.has_block_bitmap_mapping = true;
1834         s->parameters.block_bitmap_mapping =
1835             QAPI_CLONE(BitmapMigrationNodeAliasList,
1836                        params->block_bitmap_mapping);
1837     }
1838 }
1839 
1840 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1841 {
1842     MigrationParameters tmp;
1843 
1844     /* TODO Rewrite "" to null instead */
1845     if (params->tls_creds
1846         && params->tls_creds->type == QTYPE_QNULL) {
1847         qobject_unref(params->tls_creds->u.n);
1848         params->tls_creds->type = QTYPE_QSTRING;
1849         params->tls_creds->u.s = strdup("");
1850     }
1851     /* TODO Rewrite "" to null instead */
1852     if (params->tls_hostname
1853         && params->tls_hostname->type == QTYPE_QNULL) {
1854         qobject_unref(params->tls_hostname->u.n);
1855         params->tls_hostname->type = QTYPE_QSTRING;
1856         params->tls_hostname->u.s = strdup("");
1857     }
1858 
1859     migrate_params_test_apply(params, &tmp);
1860 
1861     if (!migrate_params_check(&tmp, errp)) {
1862         /* Invalid parameter */
1863         return;
1864     }
1865 
1866     migrate_params_apply(params, errp);
1867 }
1868 
1869 
1870 void qmp_migrate_start_postcopy(Error **errp)
1871 {
1872     MigrationState *s = migrate_get_current();
1873 
1874     if (!migrate_postcopy()) {
1875         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1876                          " the start of migration");
1877         return;
1878     }
1879 
1880     if (s->state == MIGRATION_STATUS_NONE) {
1881         error_setg(errp, "Postcopy must be started after migration has been"
1882                          " started");
1883         return;
1884     }
1885     /*
1886      * we don't error if migration has finished since that would be racy
1887      * with issuing this command.
1888      */
1889     qatomic_set(&s->start_postcopy, true);
1890 }
1891 
1892 /* shared migration helpers */
1893 
1894 void migrate_set_state(int *state, int old_state, int new_state)
1895 {
1896     assert(new_state < MIGRATION_STATUS__MAX);
1897     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1898         trace_migrate_set_state(MigrationStatus_str(new_state));
1899         migrate_generate_event(new_state);
1900     }
1901 }
1902 
1903 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
1904                                                   bool state)
1905 {
1906     MigrationCapabilityStatus *cap;
1907 
1908     cap = g_new0(MigrationCapabilityStatus, 1);
1909     cap->capability = index;
1910     cap->state = state;
1911 
1912     return cap;
1913 }
1914 
1915 void migrate_set_block_enabled(bool value, Error **errp)
1916 {
1917     MigrationCapabilityStatusList *cap = NULL;
1918 
1919     QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
1920     qmp_migrate_set_capabilities(cap, errp);
1921     qapi_free_MigrationCapabilityStatusList(cap);
1922 }
1923 
1924 static void migrate_set_block_incremental(MigrationState *s, bool value)
1925 {
1926     s->parameters.block_incremental = value;
1927 }
1928 
1929 static void block_cleanup_parameters(MigrationState *s)
1930 {
1931     if (s->must_remove_block_options) {
1932         /* setting to false can never fail */
1933         migrate_set_block_enabled(false, &error_abort);
1934         migrate_set_block_incremental(s, false);
1935         s->must_remove_block_options = false;
1936     }
1937 }
1938 
1939 static void migrate_fd_cleanup(MigrationState *s)
1940 {
1941     qemu_bh_delete(s->cleanup_bh);
1942     s->cleanup_bh = NULL;
1943 
1944     g_free(s->hostname);
1945     s->hostname = NULL;
1946     json_writer_free(s->vmdesc);
1947     s->vmdesc = NULL;
1948 
1949     qemu_savevm_state_cleanup();
1950 
1951     if (s->to_dst_file) {
1952         QEMUFile *tmp;
1953 
1954         trace_migrate_fd_cleanup();
1955         qemu_mutex_unlock_iothread();
1956         if (s->migration_thread_running) {
1957             qemu_thread_join(&s->thread);
1958             s->migration_thread_running = false;
1959         }
1960         qemu_mutex_lock_iothread();
1961 
1962         multifd_save_cleanup();
1963         qemu_mutex_lock(&s->qemu_file_lock);
1964         tmp = s->to_dst_file;
1965         s->to_dst_file = NULL;
1966         qemu_mutex_unlock(&s->qemu_file_lock);
1967         /*
1968          * Close the file handle without the lock to make sure the
1969          * critical section won't block for long.
1970          */
1971         migration_ioc_unregister_yank_from_file(tmp);
1972         qemu_fclose(tmp);
1973     }
1974 
1975     if (s->postcopy_qemufile_src) {
1976         migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
1977         qemu_fclose(s->postcopy_qemufile_src);
1978         s->postcopy_qemufile_src = NULL;
1979     }
1980 
1981     assert(!migration_is_active(s));
1982 
1983     if (s->state == MIGRATION_STATUS_CANCELLING) {
1984         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1985                           MIGRATION_STATUS_CANCELLED);
1986     }
1987 
1988     if (s->error) {
1989         /* It is used on info migrate.  We can't free it */
1990         error_report_err(error_copy(s->error));
1991     }
1992     notifier_list_notify(&migration_state_notifiers, s);
1993     block_cleanup_parameters(s);
1994     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1995 }
1996 
1997 static void migrate_fd_cleanup_schedule(MigrationState *s)
1998 {
1999     /*
2000      * Ref the state for bh, because it may be called when
2001      * there're already no other refs
2002      */
2003     object_ref(OBJECT(s));
2004     qemu_bh_schedule(s->cleanup_bh);
2005 }
2006 
2007 static void migrate_fd_cleanup_bh(void *opaque)
2008 {
2009     MigrationState *s = opaque;
2010     migrate_fd_cleanup(s);
2011     object_unref(OBJECT(s));
2012 }
2013 
2014 void migrate_set_error(MigrationState *s, const Error *error)
2015 {
2016     QEMU_LOCK_GUARD(&s->error_mutex);
2017     if (!s->error) {
2018         s->error = error_copy(error);
2019     }
2020 }
2021 
2022 static void migrate_error_free(MigrationState *s)
2023 {
2024     QEMU_LOCK_GUARD(&s->error_mutex);
2025     if (s->error) {
2026         error_free(s->error);
2027         s->error = NULL;
2028     }
2029 }
2030 
2031 void migrate_fd_error(MigrationState *s, const Error *error)
2032 {
2033     trace_migrate_fd_error(error_get_pretty(error));
2034     assert(s->to_dst_file == NULL);
2035     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2036                       MIGRATION_STATUS_FAILED);
2037     migrate_set_error(s, error);
2038 }
2039 
2040 static void migrate_fd_cancel(MigrationState *s)
2041 {
2042     int old_state ;
2043     QEMUFile *f = migrate_get_current()->to_dst_file;
2044     trace_migrate_fd_cancel();
2045 
2046     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
2047         if (s->rp_state.from_dst_file) {
2048             /* shutdown the rp socket, so causing the rp thread to shutdown */
2049             qemu_file_shutdown(s->rp_state.from_dst_file);
2050         }
2051     }
2052 
2053     do {
2054         old_state = s->state;
2055         if (!migration_is_running(old_state)) {
2056             break;
2057         }
2058         /* If the migration is paused, kick it out of the pause */
2059         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
2060             qemu_sem_post(&s->pause_sem);
2061         }
2062         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
2063     } while (s->state != MIGRATION_STATUS_CANCELLING);
2064 
2065     /*
2066      * If we're unlucky the migration code might be stuck somewhere in a
2067      * send/write while the network has failed and is waiting to timeout;
2068      * if we've got shutdown(2) available then we can force it to quit.
2069      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
2070      * called in a bh, so there is no race against this cancel.
2071      */
2072     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
2073         qemu_file_shutdown(f);
2074     }
2075     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
2076         Error *local_err = NULL;
2077 
2078         bdrv_activate_all(&local_err);
2079         if (local_err) {
2080             error_report_err(local_err);
2081         } else {
2082             s->block_inactive = false;
2083         }
2084     }
2085 }
2086 
2087 void add_migration_state_change_notifier(Notifier *notify)
2088 {
2089     notifier_list_add(&migration_state_notifiers, notify);
2090 }
2091 
2092 void remove_migration_state_change_notifier(Notifier *notify)
2093 {
2094     notifier_remove(notify);
2095 }
2096 
2097 bool migration_in_setup(MigrationState *s)
2098 {
2099     return s->state == MIGRATION_STATUS_SETUP;
2100 }
2101 
2102 bool migration_has_finished(MigrationState *s)
2103 {
2104     return s->state == MIGRATION_STATUS_COMPLETED;
2105 }
2106 
2107 bool migration_has_failed(MigrationState *s)
2108 {
2109     return (s->state == MIGRATION_STATUS_CANCELLED ||
2110             s->state == MIGRATION_STATUS_FAILED);
2111 }
2112 
2113 bool migration_in_postcopy(void)
2114 {
2115     MigrationState *s = migrate_get_current();
2116 
2117     switch (s->state) {
2118     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
2119     case MIGRATION_STATUS_POSTCOPY_PAUSED:
2120     case MIGRATION_STATUS_POSTCOPY_RECOVER:
2121         return true;
2122     default:
2123         return false;
2124     }
2125 }
2126 
2127 bool migration_in_postcopy_after_devices(MigrationState *s)
2128 {
2129     return migration_in_postcopy() && s->postcopy_after_devices;
2130 }
2131 
2132 bool migration_in_incoming_postcopy(void)
2133 {
2134     PostcopyState ps = postcopy_state_get();
2135 
2136     return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
2137 }
2138 
2139 bool migration_incoming_postcopy_advised(void)
2140 {
2141     PostcopyState ps = postcopy_state_get();
2142 
2143     return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2144 }
2145 
2146 bool migration_in_bg_snapshot(void)
2147 {
2148     MigrationState *s = migrate_get_current();
2149 
2150     return migrate_background_snapshot() &&
2151             migration_is_setup_or_active(s->state);
2152 }
2153 
2154 bool migration_is_idle(void)
2155 {
2156     MigrationState *s = current_migration;
2157 
2158     if (!s) {
2159         return true;
2160     }
2161 
2162     switch (s->state) {
2163     case MIGRATION_STATUS_NONE:
2164     case MIGRATION_STATUS_CANCELLED:
2165     case MIGRATION_STATUS_COMPLETED:
2166     case MIGRATION_STATUS_FAILED:
2167         return true;
2168     case MIGRATION_STATUS_SETUP:
2169     case MIGRATION_STATUS_CANCELLING:
2170     case MIGRATION_STATUS_ACTIVE:
2171     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
2172     case MIGRATION_STATUS_COLO:
2173     case MIGRATION_STATUS_PRE_SWITCHOVER:
2174     case MIGRATION_STATUS_DEVICE:
2175     case MIGRATION_STATUS_WAIT_UNPLUG:
2176         return false;
2177     case MIGRATION_STATUS__MAX:
2178         g_assert_not_reached();
2179     }
2180 
2181     return false;
2182 }
2183 
2184 bool migration_is_active(MigrationState *s)
2185 {
2186     return (s->state == MIGRATION_STATUS_ACTIVE ||
2187             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2188 }
2189 
2190 void migrate_init(MigrationState *s)
2191 {
2192     /*
2193      * Reinitialise all migration state, except
2194      * parameters/capabilities that the user set, and
2195      * locks.
2196      */
2197     s->cleanup_bh = 0;
2198     s->vm_start_bh = 0;
2199     s->to_dst_file = NULL;
2200     s->state = MIGRATION_STATUS_NONE;
2201     s->rp_state.from_dst_file = NULL;
2202     s->rp_state.error = false;
2203     s->mbps = 0.0;
2204     s->pages_per_second = 0.0;
2205     s->downtime = 0;
2206     s->expected_downtime = 0;
2207     s->setup_time = 0;
2208     s->start_postcopy = false;
2209     s->postcopy_after_devices = false;
2210     s->migration_thread_running = false;
2211     error_free(s->error);
2212     s->error = NULL;
2213     s->hostname = NULL;
2214 
2215     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
2216 
2217     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2218     s->total_time = 0;
2219     s->vm_was_running = false;
2220     s->iteration_initial_bytes = 0;
2221     s->threshold_size = 0;
2222 }
2223 
2224 int migrate_add_blocker_internal(Error *reason, Error **errp)
2225 {
2226     /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
2227     if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
2228         error_propagate_prepend(errp, error_copy(reason),
2229                                 "disallowing migration blocker "
2230                                 "(migration/snapshot in progress) for: ");
2231         return -EBUSY;
2232     }
2233 
2234     migration_blockers = g_slist_prepend(migration_blockers, reason);
2235     return 0;
2236 }
2237 
2238 int migrate_add_blocker(Error *reason, Error **errp)
2239 {
2240     if (only_migratable) {
2241         error_propagate_prepend(errp, error_copy(reason),
2242                                 "disallowing migration blocker "
2243                                 "(--only-migratable) for: ");
2244         return -EACCES;
2245     }
2246 
2247     return migrate_add_blocker_internal(reason, errp);
2248 }
2249 
2250 void migrate_del_blocker(Error *reason)
2251 {
2252     migration_blockers = g_slist_remove(migration_blockers, reason);
2253 }
2254 
2255 void qmp_migrate_incoming(const char *uri, Error **errp)
2256 {
2257     Error *local_err = NULL;
2258     static bool once = true;
2259 
2260     if (!once) {
2261         error_setg(errp, "The incoming migration has already been started");
2262         return;
2263     }
2264     if (!runstate_check(RUN_STATE_INMIGRATE)) {
2265         error_setg(errp, "'-incoming' was not specified on the command line");
2266         return;
2267     }
2268 
2269     if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2270         return;
2271     }
2272 
2273     qemu_start_incoming_migration(uri, &local_err);
2274 
2275     if (local_err) {
2276         yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2277         error_propagate(errp, local_err);
2278         return;
2279     }
2280 
2281     once = false;
2282 }
2283 
2284 void qmp_migrate_recover(const char *uri, Error **errp)
2285 {
2286     MigrationIncomingState *mis = migration_incoming_get_current();
2287 
2288     /*
2289      * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
2290      * callers (no one should ignore a recover failure); if there is, it's a
2291      * programming error.
2292      */
2293     assert(errp);
2294 
2295     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2296         error_setg(errp, "Migrate recover can only be run "
2297                    "when postcopy is paused.");
2298         return;
2299     }
2300 
2301     /* If there's an existing transport, release it */
2302     migration_incoming_transport_cleanup(mis);
2303 
2304     /*
2305      * Note that this call will never start a real migration; it will
2306      * only re-setup the migration stream and poke existing migration
2307      * to continue using that newly established channel.
2308      */
2309     qemu_start_incoming_migration(uri, errp);
2310 }
2311 
2312 void qmp_migrate_pause(Error **errp)
2313 {
2314     MigrationState *ms = migrate_get_current();
2315     MigrationIncomingState *mis = migration_incoming_get_current();
2316     int ret;
2317 
2318     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2319         /* Source side, during postcopy */
2320         qemu_mutex_lock(&ms->qemu_file_lock);
2321         ret = qemu_file_shutdown(ms->to_dst_file);
2322         qemu_mutex_unlock(&ms->qemu_file_lock);
2323         if (ret) {
2324             error_setg(errp, "Failed to pause source migration");
2325         }
2326         return;
2327     }
2328 
2329     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2330         ret = qemu_file_shutdown(mis->from_src_file);
2331         if (ret) {
2332             error_setg(errp, "Failed to pause destination migration");
2333         }
2334         return;
2335     }
2336 
2337     error_setg(errp, "migrate-pause is currently only supported "
2338                "during postcopy-active state");
2339 }
2340 
2341 bool migration_is_blocked(Error **errp)
2342 {
2343     if (qemu_savevm_state_blocked(errp)) {
2344         return true;
2345     }
2346 
2347     if (migration_blockers) {
2348         error_propagate(errp, error_copy(migration_blockers->data));
2349         return true;
2350     }
2351 
2352     return false;
2353 }
2354 
2355 /* Returns true if continue to migrate, or false if error detected */
2356 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
2357                             bool resume, Error **errp)
2358 {
2359     Error *local_err = NULL;
2360 
2361     if (resume) {
2362         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2363             error_setg(errp, "Cannot resume if there is no "
2364                        "paused migration");
2365             return false;
2366         }
2367 
2368         /*
2369          * Postcopy recovery won't work well with release-ram
2370          * capability since release-ram will drop the page buffer as
2371          * long as the page is put into the send buffer.  So if there
2372          * is a network failure happened, any page buffers that have
2373          * not yet reached the destination VM but have already been
2374          * sent from the source VM will be lost forever.  Let's refuse
2375          * the client from resuming such a postcopy migration.
2376          * Luckily release-ram was designed to only be used when src
2377          * and destination VMs are on the same host, so it should be
2378          * fine.
2379          */
2380         if (migrate_release_ram()) {
2381             error_setg(errp, "Postcopy recovery cannot work "
2382                        "when release-ram capability is set");
2383             return false;
2384         }
2385 
2386         /* This is a resume, skip init status */
2387         return true;
2388     }
2389 
2390     if (migration_is_running(s->state)) {
2391         error_setg(errp, QERR_MIGRATION_ACTIVE);
2392         return false;
2393     }
2394 
2395     if (runstate_check(RUN_STATE_INMIGRATE)) {
2396         error_setg(errp, "Guest is waiting for an incoming migration");
2397         return false;
2398     }
2399 
2400     if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2401         error_setg(errp, "Can't migrate the vm that was paused due to "
2402                    "previous migration");
2403         return false;
2404     }
2405 
2406     if (migration_is_blocked(errp)) {
2407         return false;
2408     }
2409 
2410     if (blk || blk_inc) {
2411         if (migrate_colo_enabled()) {
2412             error_setg(errp, "No disk migration is required in COLO mode");
2413             return false;
2414         }
2415         if (migrate_use_block() || migrate_use_block_incremental()) {
2416             error_setg(errp, "Command options are incompatible with "
2417                        "current migration capabilities");
2418             return false;
2419         }
2420         migrate_set_block_enabled(true, &local_err);
2421         if (local_err) {
2422             error_propagate(errp, local_err);
2423             return false;
2424         }
2425         s->must_remove_block_options = true;
2426     }
2427 
2428     if (blk_inc) {
2429         migrate_set_block_incremental(s, true);
2430     }
2431 
2432     migrate_init(s);
2433     /*
2434      * set ram_counters compression_counters memory to zero for a
2435      * new migration
2436      */
2437     memset(&ram_counters, 0, sizeof(ram_counters));
2438     memset(&compression_counters, 0, sizeof(compression_counters));
2439 
2440     return true;
2441 }
2442 
2443 void qmp_migrate(const char *uri, bool has_blk, bool blk,
2444                  bool has_inc, bool inc, bool has_detach, bool detach,
2445                  bool has_resume, bool resume, Error **errp)
2446 {
2447     Error *local_err = NULL;
2448     MigrationState *s = migrate_get_current();
2449     const char *p = NULL;
2450 
2451     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
2452                          has_resume && resume, errp)) {
2453         /* Error detected, put into errp */
2454         return;
2455     }
2456 
2457     if (!(has_resume && resume)) {
2458         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2459             return;
2460         }
2461     }
2462 
2463     migrate_protocol_allow_multi_channels(false);
2464     if (strstart(uri, "tcp:", &p) ||
2465         strstart(uri, "unix:", NULL) ||
2466         strstart(uri, "vsock:", NULL)) {
2467         migrate_protocol_allow_multi_channels(true);
2468         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
2469 #ifdef CONFIG_RDMA
2470     } else if (strstart(uri, "rdma:", &p)) {
2471         rdma_start_outgoing_migration(s, p, &local_err);
2472 #endif
2473     } else if (strstart(uri, "exec:", &p)) {
2474         exec_start_outgoing_migration(s, p, &local_err);
2475     } else if (strstart(uri, "fd:", &p)) {
2476         fd_start_outgoing_migration(s, p, &local_err);
2477     } else {
2478         if (!(has_resume && resume)) {
2479             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2480         }
2481         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
2482                    "a valid migration protocol");
2483         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2484                           MIGRATION_STATUS_FAILED);
2485         block_cleanup_parameters(s);
2486         return;
2487     }
2488 
2489     if (local_err) {
2490         if (!(has_resume && resume)) {
2491             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2492         }
2493         migrate_fd_error(s, local_err);
2494         error_propagate(errp, local_err);
2495         return;
2496     }
2497 }
2498 
2499 void qmp_migrate_cancel(Error **errp)
2500 {
2501     migration_cancel(NULL);
2502 }
2503 
2504 void qmp_migrate_continue(MigrationStatus state, Error **errp)
2505 {
2506     MigrationState *s = migrate_get_current();
2507     if (s->state != state) {
2508         error_setg(errp,  "Migration not in expected state: %s",
2509                    MigrationStatus_str(s->state));
2510         return;
2511     }
2512     qemu_sem_post(&s->pause_sem);
2513 }
2514 
2515 bool migrate_release_ram(void)
2516 {
2517     MigrationState *s;
2518 
2519     s = migrate_get_current();
2520 
2521     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
2522 }
2523 
2524 bool migrate_postcopy_ram(void)
2525 {
2526     MigrationState *s;
2527 
2528     s = migrate_get_current();
2529 
2530     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
2531 }
2532 
2533 bool migrate_postcopy(void)
2534 {
2535     return migrate_postcopy_ram() || migrate_dirty_bitmaps();
2536 }
2537 
2538 bool migrate_auto_converge(void)
2539 {
2540     MigrationState *s;
2541 
2542     s = migrate_get_current();
2543 
2544     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
2545 }
2546 
2547 bool migrate_zero_blocks(void)
2548 {
2549     MigrationState *s;
2550 
2551     s = migrate_get_current();
2552 
2553     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
2554 }
2555 
2556 bool migrate_postcopy_blocktime(void)
2557 {
2558     MigrationState *s;
2559 
2560     s = migrate_get_current();
2561 
2562     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
2563 }
2564 
2565 bool migrate_use_compression(void)
2566 {
2567     MigrationState *s;
2568 
2569     s = migrate_get_current();
2570 
2571     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
2572 }
2573 
2574 int migrate_compress_level(void)
2575 {
2576     MigrationState *s;
2577 
2578     s = migrate_get_current();
2579 
2580     return s->parameters.compress_level;
2581 }
2582 
2583 int migrate_compress_threads(void)
2584 {
2585     MigrationState *s;
2586 
2587     s = migrate_get_current();
2588 
2589     return s->parameters.compress_threads;
2590 }
2591 
2592 int migrate_compress_wait_thread(void)
2593 {
2594     MigrationState *s;
2595 
2596     s = migrate_get_current();
2597 
2598     return s->parameters.compress_wait_thread;
2599 }
2600 
2601 int migrate_decompress_threads(void)
2602 {
2603     MigrationState *s;
2604 
2605     s = migrate_get_current();
2606 
2607     return s->parameters.decompress_threads;
2608 }
2609 
2610 bool migrate_dirty_bitmaps(void)
2611 {
2612     MigrationState *s;
2613 
2614     s = migrate_get_current();
2615 
2616     return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
2617 }
2618 
2619 bool migrate_ignore_shared(void)
2620 {
2621     MigrationState *s;
2622 
2623     s = migrate_get_current();
2624 
2625     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
2626 }
2627 
2628 bool migrate_validate_uuid(void)
2629 {
2630     MigrationState *s;
2631 
2632     s = migrate_get_current();
2633 
2634     return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
2635 }
2636 
2637 bool migrate_use_events(void)
2638 {
2639     MigrationState *s;
2640 
2641     s = migrate_get_current();
2642 
2643     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
2644 }
2645 
2646 bool migrate_use_multifd(void)
2647 {
2648     MigrationState *s;
2649 
2650     s = migrate_get_current();
2651 
2652     return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
2653 }
2654 
2655 bool migrate_pause_before_switchover(void)
2656 {
2657     MigrationState *s;
2658 
2659     s = migrate_get_current();
2660 
2661     return s->enabled_capabilities[
2662         MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
2663 }
2664 
2665 int migrate_multifd_channels(void)
2666 {
2667     MigrationState *s;
2668 
2669     s = migrate_get_current();
2670 
2671     return s->parameters.multifd_channels;
2672 }
2673 
2674 MultiFDCompression migrate_multifd_compression(void)
2675 {
2676     MigrationState *s;
2677 
2678     s = migrate_get_current();
2679 
2680     assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
2681     return s->parameters.multifd_compression;
2682 }
2683 
2684 int migrate_multifd_zlib_level(void)
2685 {
2686     MigrationState *s;
2687 
2688     s = migrate_get_current();
2689 
2690     return s->parameters.multifd_zlib_level;
2691 }
2692 
2693 int migrate_multifd_zstd_level(void)
2694 {
2695     MigrationState *s;
2696 
2697     s = migrate_get_current();
2698 
2699     return s->parameters.multifd_zstd_level;
2700 }
2701 
2702 #ifdef CONFIG_LINUX
2703 bool migrate_use_zero_copy_send(void)
2704 {
2705     MigrationState *s;
2706 
2707     s = migrate_get_current();
2708 
2709     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
2710 }
2711 #endif
2712 
2713 int migrate_use_tls(void)
2714 {
2715     MigrationState *s;
2716 
2717     s = migrate_get_current();
2718 
2719     return s->parameters.tls_creds && *s->parameters.tls_creds;
2720 }
2721 
2722 int migrate_use_xbzrle(void)
2723 {
2724     MigrationState *s;
2725 
2726     s = migrate_get_current();
2727 
2728     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
2729 }
2730 
2731 uint64_t migrate_xbzrle_cache_size(void)
2732 {
2733     MigrationState *s;
2734 
2735     s = migrate_get_current();
2736 
2737     return s->parameters.xbzrle_cache_size;
2738 }
2739 
2740 static int64_t migrate_max_postcopy_bandwidth(void)
2741 {
2742     MigrationState *s;
2743 
2744     s = migrate_get_current();
2745 
2746     return s->parameters.max_postcopy_bandwidth;
2747 }
2748 
2749 bool migrate_use_block(void)
2750 {
2751     MigrationState *s;
2752 
2753     s = migrate_get_current();
2754 
2755     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
2756 }
2757 
2758 bool migrate_use_return_path(void)
2759 {
2760     MigrationState *s;
2761 
2762     s = migrate_get_current();
2763 
2764     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
2765 }
2766 
2767 bool migrate_use_block_incremental(void)
2768 {
2769     MigrationState *s;
2770 
2771     s = migrate_get_current();
2772 
2773     return s->parameters.block_incremental;
2774 }
2775 
2776 bool migrate_background_snapshot(void)
2777 {
2778     MigrationState *s;
2779 
2780     s = migrate_get_current();
2781 
2782     return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
2783 }
2784 
2785 bool migrate_postcopy_preempt(void)
2786 {
2787     MigrationState *s;
2788 
2789     s = migrate_get_current();
2790 
2791     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
2792 }
2793 
2794 /* migration thread support */
2795 /*
2796  * Something bad happened to the RP stream, mark an error
2797  * The caller shall print or trace something to indicate why
2798  */
2799 static void mark_source_rp_bad(MigrationState *s)
2800 {
2801     s->rp_state.error = true;
2802 }
2803 
2804 static struct rp_cmd_args {
2805     ssize_t     len; /* -1 = variable */
2806     const char *name;
2807 } rp_cmd_args[] = {
2808     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2809     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2810     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2811     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2812     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2813     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2814     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2815     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2816 };
2817 
2818 /*
2819  * Process a request for pages received on the return path,
2820  * We're allowed to send more than requested (e.g. to round to our page size)
2821  * and we don't need to send pages that have already been sent.
2822  */
2823 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2824                                        ram_addr_t start, size_t len)
2825 {
2826     long our_host_ps = qemu_real_host_page_size();
2827 
2828     trace_migrate_handle_rp_req_pages(rbname, start, len);
2829 
2830     /*
2831      * Since we currently insist on matching page sizes, just sanity check
2832      * we're being asked for whole host pages.
2833      */
2834     if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2835         !QEMU_IS_ALIGNED(len, our_host_ps)) {
2836         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2837                      " len: %zd", __func__, start, len);
2838         mark_source_rp_bad(ms);
2839         return;
2840     }
2841 
2842     if (ram_save_queue_pages(rbname, start, len)) {
2843         mark_source_rp_bad(ms);
2844     }
2845 }
2846 
2847 /* Return true to retry, false to quit */
2848 static bool postcopy_pause_return_path_thread(MigrationState *s)
2849 {
2850     trace_postcopy_pause_return_path();
2851 
2852     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2853 
2854     trace_postcopy_pause_return_path_continued();
2855 
2856     return true;
2857 }
2858 
2859 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2860 {
2861     RAMBlock *block = qemu_ram_block_by_name(block_name);
2862 
2863     if (!block) {
2864         error_report("%s: invalid block name '%s'", __func__, block_name);
2865         return -EINVAL;
2866     }
2867 
2868     /* Fetch the received bitmap and refresh the dirty bitmap */
2869     return ram_dirty_bitmap_reload(s, block);
2870 }
2871 
2872 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2873 {
2874     trace_source_return_path_thread_resume_ack(value);
2875 
2876     if (value != MIGRATION_RESUME_ACK_VALUE) {
2877         error_report("%s: illegal resume_ack value %"PRIu32,
2878                      __func__, value);
2879         return -1;
2880     }
2881 
2882     /* Now both sides are active. */
2883     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2884                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2885 
2886     /* Notify send thread that time to continue send pages */
2887     qemu_sem_post(&s->rp_state.rp_sem);
2888 
2889     return 0;
2890 }
2891 
2892 /*
2893  * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
2894  * existed) in a safe way.
2895  */
2896 static void migration_release_dst_files(MigrationState *ms)
2897 {
2898     QEMUFile *file;
2899 
2900     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2901         /*
2902          * Reset the from_dst_file pointer first before releasing it, as we
2903          * can't block within lock section
2904          */
2905         file = ms->rp_state.from_dst_file;
2906         ms->rp_state.from_dst_file = NULL;
2907     }
2908 
2909     /*
2910      * Do the same to postcopy fast path socket too if there is.  No
2911      * locking needed because this qemufile should only be managed by
2912      * return path thread.
2913      */
2914     if (ms->postcopy_qemufile_src) {
2915         migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
2916         qemu_file_shutdown(ms->postcopy_qemufile_src);
2917         qemu_fclose(ms->postcopy_qemufile_src);
2918         ms->postcopy_qemufile_src = NULL;
2919     }
2920 
2921     qemu_fclose(file);
2922 }
2923 
2924 /*
2925  * Handles messages sent on the return path towards the source VM
2926  *
2927  */
2928 static void *source_return_path_thread(void *opaque)
2929 {
2930     MigrationState *ms = opaque;
2931     QEMUFile *rp = ms->rp_state.from_dst_file;
2932     uint16_t header_len, header_type;
2933     uint8_t buf[512];
2934     uint32_t tmp32, sibling_error;
2935     ram_addr_t start = 0; /* =0 to silence warning */
2936     size_t  len = 0, expected_len;
2937     int res;
2938 
2939     trace_source_return_path_thread_entry();
2940     rcu_register_thread();
2941 
2942 retry:
2943     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2944            migration_is_setup_or_active(ms->state)) {
2945         trace_source_return_path_thread_loop_top();
2946         header_type = qemu_get_be16(rp);
2947         header_len = qemu_get_be16(rp);
2948 
2949         if (qemu_file_get_error(rp)) {
2950             mark_source_rp_bad(ms);
2951             goto out;
2952         }
2953 
2954         if (header_type >= MIG_RP_MSG_MAX ||
2955             header_type == MIG_RP_MSG_INVALID) {
2956             error_report("RP: Received invalid message 0x%04x length 0x%04x",
2957                          header_type, header_len);
2958             mark_source_rp_bad(ms);
2959             goto out;
2960         }
2961 
2962         if ((rp_cmd_args[header_type].len != -1 &&
2963             header_len != rp_cmd_args[header_type].len) ||
2964             header_len > sizeof(buf)) {
2965             error_report("RP: Received '%s' message (0x%04x) with"
2966                          "incorrect length %d expecting %zu",
2967                          rp_cmd_args[header_type].name, header_type, header_len,
2968                          (size_t)rp_cmd_args[header_type].len);
2969             mark_source_rp_bad(ms);
2970             goto out;
2971         }
2972 
2973         /* We know we've got a valid header by this point */
2974         res = qemu_get_buffer(rp, buf, header_len);
2975         if (res != header_len) {
2976             error_report("RP: Failed reading data for message 0x%04x"
2977                          " read %d expected %d",
2978                          header_type, res, header_len);
2979             mark_source_rp_bad(ms);
2980             goto out;
2981         }
2982 
2983         /* OK, we have the message and the data */
2984         switch (header_type) {
2985         case MIG_RP_MSG_SHUT:
2986             sibling_error = ldl_be_p(buf);
2987             trace_source_return_path_thread_shut(sibling_error);
2988             if (sibling_error) {
2989                 error_report("RP: Sibling indicated error %d", sibling_error);
2990                 mark_source_rp_bad(ms);
2991             }
2992             /*
2993              * We'll let the main thread deal with closing the RP
2994              * we could do a shutdown(2) on it, but we're the only user
2995              * anyway, so there's nothing gained.
2996              */
2997             goto out;
2998 
2999         case MIG_RP_MSG_PONG:
3000             tmp32 = ldl_be_p(buf);
3001             trace_source_return_path_thread_pong(tmp32);
3002             break;
3003 
3004         case MIG_RP_MSG_REQ_PAGES:
3005             start = ldq_be_p(buf);
3006             len = ldl_be_p(buf + 8);
3007             migrate_handle_rp_req_pages(ms, NULL, start, len);
3008             break;
3009 
3010         case MIG_RP_MSG_REQ_PAGES_ID:
3011             expected_len = 12 + 1; /* header + termination */
3012 
3013             if (header_len >= expected_len) {
3014                 start = ldq_be_p(buf);
3015                 len = ldl_be_p(buf + 8);
3016                 /* Now we expect an idstr */
3017                 tmp32 = buf[12]; /* Length of the following idstr */
3018                 buf[13 + tmp32] = '\0';
3019                 expected_len += tmp32;
3020             }
3021             if (header_len != expected_len) {
3022                 error_report("RP: Req_Page_id with length %d expecting %zd",
3023                              header_len, expected_len);
3024                 mark_source_rp_bad(ms);
3025                 goto out;
3026             }
3027             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
3028             break;
3029 
3030         case MIG_RP_MSG_RECV_BITMAP:
3031             if (header_len < 1) {
3032                 error_report("%s: missing block name", __func__);
3033                 mark_source_rp_bad(ms);
3034                 goto out;
3035             }
3036             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
3037             buf[buf[0] + 1] = '\0';
3038             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
3039                 mark_source_rp_bad(ms);
3040                 goto out;
3041             }
3042             break;
3043 
3044         case MIG_RP_MSG_RESUME_ACK:
3045             tmp32 = ldl_be_p(buf);
3046             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
3047                 mark_source_rp_bad(ms);
3048                 goto out;
3049             }
3050             break;
3051 
3052         default:
3053             break;
3054         }
3055     }
3056 
3057 out:
3058     res = qemu_file_get_error(rp);
3059     if (res) {
3060         if (res && migration_in_postcopy()) {
3061             /*
3062              * Maybe there is something we can do: it looks like a
3063              * network down issue, and we pause for a recovery.
3064              */
3065             migration_release_dst_files(ms);
3066             rp = NULL;
3067             if (postcopy_pause_return_path_thread(ms)) {
3068                 /*
3069                  * Reload rp, reset the rest.  Referencing it is safe since
3070                  * it's reset only by us above, or when migration completes
3071                  */
3072                 rp = ms->rp_state.from_dst_file;
3073                 ms->rp_state.error = false;
3074                 goto retry;
3075             }
3076         }
3077 
3078         trace_source_return_path_thread_bad_end();
3079         mark_source_rp_bad(ms);
3080     }
3081 
3082     trace_source_return_path_thread_end();
3083     migration_release_dst_files(ms);
3084     rcu_unregister_thread();
3085     return NULL;
3086 }
3087 
3088 static int open_return_path_on_source(MigrationState *ms,
3089                                       bool create_thread)
3090 {
3091     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
3092     if (!ms->rp_state.from_dst_file) {
3093         return -1;
3094     }
3095 
3096     trace_open_return_path_on_source();
3097 
3098     if (!create_thread) {
3099         /* We're done */
3100         return 0;
3101     }
3102 
3103     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
3104                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
3105     ms->rp_state.rp_thread_created = true;
3106 
3107     trace_open_return_path_on_source_continue();
3108 
3109     return 0;
3110 }
3111 
3112 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
3113 static int await_return_path_close_on_source(MigrationState *ms)
3114 {
3115     /*
3116      * If this is a normal exit then the destination will send a SHUT and the
3117      * rp_thread will exit, however if there's an error we need to cause
3118      * it to exit.
3119      */
3120     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
3121         /*
3122          * shutdown(2), if we have it, will cause it to unblock if it's stuck
3123          * waiting for the destination.
3124          */
3125         qemu_file_shutdown(ms->rp_state.from_dst_file);
3126         mark_source_rp_bad(ms);
3127     }
3128     trace_await_return_path_close_on_source_joining();
3129     qemu_thread_join(&ms->rp_state.rp_thread);
3130     ms->rp_state.rp_thread_created = false;
3131     trace_await_return_path_close_on_source_close();
3132     return ms->rp_state.error;
3133 }
3134 
3135 /*
3136  * Switch from normal iteration to postcopy
3137  * Returns non-0 on error
3138  */
3139 static int postcopy_start(MigrationState *ms)
3140 {
3141     int ret;
3142     QIOChannelBuffer *bioc;
3143     QEMUFile *fb;
3144     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3145     int64_t bandwidth = migrate_max_postcopy_bandwidth();
3146     bool restart_block = false;
3147     int cur_state = MIGRATION_STATUS_ACTIVE;
3148 
3149     if (postcopy_preempt_wait_channel(ms)) {
3150         migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
3151         return -1;
3152     }
3153 
3154     if (!migrate_pause_before_switchover()) {
3155         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
3156                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
3157     }
3158 
3159     trace_postcopy_start();
3160     qemu_mutex_lock_iothread();
3161     trace_postcopy_start_set_run();
3162 
3163     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3164     global_state_store();
3165     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
3166     if (ret < 0) {
3167         goto fail;
3168     }
3169 
3170     ret = migration_maybe_pause(ms, &cur_state,
3171                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
3172     if (ret < 0) {
3173         goto fail;
3174     }
3175 
3176     ret = bdrv_inactivate_all();
3177     if (ret < 0) {
3178         goto fail;
3179     }
3180     restart_block = true;
3181 
3182     /*
3183      * Cause any non-postcopiable, but iterative devices to
3184      * send out their final data.
3185      */
3186     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
3187 
3188     /*
3189      * in Finish migrate and with the io-lock held everything should
3190      * be quiet, but we've potentially still got dirty pages and we
3191      * need to tell the destination to throw any pages it's already received
3192      * that are dirty
3193      */
3194     if (migrate_postcopy_ram()) {
3195         ram_postcopy_send_discard_bitmap(ms);
3196     }
3197 
3198     /*
3199      * send rest of state - note things that are doing postcopy
3200      * will notice we're in POSTCOPY_ACTIVE and not actually
3201      * wrap their state up here
3202      */
3203     /* 0 max-postcopy-bandwidth means unlimited */
3204     if (!bandwidth) {
3205         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
3206     } else {
3207         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
3208     }
3209     if (migrate_postcopy_ram()) {
3210         /* Ping just for debugging, helps line traces up */
3211         qemu_savevm_send_ping(ms->to_dst_file, 2);
3212     }
3213 
3214     /*
3215      * While loading the device state we may trigger page transfer
3216      * requests and the fd must be free to process those, and thus
3217      * the destination must read the whole device state off the fd before
3218      * it starts processing it.  Unfortunately the ad-hoc migration format
3219      * doesn't allow the destination to know the size to read without fully
3220      * parsing it through each devices load-state code (especially the open
3221      * coded devices that use get/put).
3222      * So we wrap the device state up in a package with a length at the start;
3223      * to do this we use a qemu_buf to hold the whole of the device state.
3224      */
3225     bioc = qio_channel_buffer_new(4096);
3226     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
3227     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
3228     object_unref(OBJECT(bioc));
3229 
3230     /*
3231      * Make sure the receiver can get incoming pages before we send the rest
3232      * of the state
3233      */
3234     qemu_savevm_send_postcopy_listen(fb);
3235 
3236     qemu_savevm_state_complete_precopy(fb, false, false);
3237     if (migrate_postcopy_ram()) {
3238         qemu_savevm_send_ping(fb, 3);
3239     }
3240 
3241     qemu_savevm_send_postcopy_run(fb);
3242 
3243     /* <><> end of stuff going into the package */
3244 
3245     /* Last point of recovery; as soon as we send the package the destination
3246      * can open devices and potentially start running.
3247      * Lets just check again we've not got any errors.
3248      */
3249     ret = qemu_file_get_error(ms->to_dst_file);
3250     if (ret) {
3251         error_report("postcopy_start: Migration stream errored (pre package)");
3252         goto fail_closefb;
3253     }
3254 
3255     restart_block = false;
3256 
3257     /* Now send that blob */
3258     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
3259         goto fail_closefb;
3260     }
3261     qemu_fclose(fb);
3262 
3263     /* Send a notify to give a chance for anything that needs to happen
3264      * at the transition to postcopy and after the device state; in particular
3265      * spice needs to trigger a transition now
3266      */
3267     ms->postcopy_after_devices = true;
3268     notifier_list_notify(&migration_state_notifiers, ms);
3269 
3270     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
3271 
3272     qemu_mutex_unlock_iothread();
3273 
3274     if (migrate_postcopy_ram()) {
3275         /*
3276          * Although this ping is just for debug, it could potentially be
3277          * used for getting a better measurement of downtime at the source.
3278          */
3279         qemu_savevm_send_ping(ms->to_dst_file, 4);
3280     }
3281 
3282     if (migrate_release_ram()) {
3283         ram_postcopy_migrated_memory_release(ms);
3284     }
3285 
3286     ret = qemu_file_get_error(ms->to_dst_file);
3287     if (ret) {
3288         error_report("postcopy_start: Migration stream errored");
3289         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3290                               MIGRATION_STATUS_FAILED);
3291     }
3292 
3293     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
3294 
3295     return ret;
3296 
3297 fail_closefb:
3298     qemu_fclose(fb);
3299 fail:
3300     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3301                           MIGRATION_STATUS_FAILED);
3302     if (restart_block) {
3303         /* A failure happened early enough that we know the destination hasn't
3304          * accessed block devices, so we're safe to recover.
3305          */
3306         Error *local_err = NULL;
3307 
3308         bdrv_activate_all(&local_err);
3309         if (local_err) {
3310             error_report_err(local_err);
3311         }
3312     }
3313     qemu_mutex_unlock_iothread();
3314     return -1;
3315 }
3316 
3317 /**
3318  * migration_maybe_pause: Pause if required to by
3319  * migrate_pause_before_switchover called with the iothread locked
3320  * Returns: 0 on success
3321  */
3322 static int migration_maybe_pause(MigrationState *s,
3323                                  int *current_active_state,
3324                                  int new_state)
3325 {
3326     if (!migrate_pause_before_switchover()) {
3327         return 0;
3328     }
3329 
3330     /* Since leaving this state is not atomic with posting the semaphore
3331      * it's possible that someone could have issued multiple migrate_continue
3332      * and the semaphore is incorrectly positive at this point;
3333      * the docs say it's undefined to reinit a semaphore that's already
3334      * init'd, so use timedwait to eat up any existing posts.
3335      */
3336     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
3337         /* This block intentionally left blank */
3338     }
3339 
3340     /*
3341      * If the migration is cancelled when it is in the completion phase,
3342      * the migration state is set to MIGRATION_STATUS_CANCELLING.
3343      * So we don't need to wait a semaphore, otherwise we would always
3344      * wait for the 'pause_sem' semaphore.
3345      */
3346     if (s->state != MIGRATION_STATUS_CANCELLING) {
3347         qemu_mutex_unlock_iothread();
3348         migrate_set_state(&s->state, *current_active_state,
3349                           MIGRATION_STATUS_PRE_SWITCHOVER);
3350         qemu_sem_wait(&s->pause_sem);
3351         migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
3352                           new_state);
3353         *current_active_state = new_state;
3354         qemu_mutex_lock_iothread();
3355     }
3356 
3357     return s->state == new_state ? 0 : -EINVAL;
3358 }
3359 
3360 /**
3361  * migration_completion: Used by migration_thread when there's not much left.
3362  *   The caller 'breaks' the loop when this returns.
3363  *
3364  * @s: Current migration state
3365  */
3366 static void migration_completion(MigrationState *s)
3367 {
3368     int ret;
3369     int current_active_state = s->state;
3370 
3371     if (s->state == MIGRATION_STATUS_ACTIVE) {
3372         qemu_mutex_lock_iothread();
3373         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3374         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3375         s->vm_was_running = runstate_is_running();
3376         ret = global_state_store();
3377 
3378         if (!ret) {
3379             bool inactivate = !migrate_colo_enabled();
3380             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
3381             trace_migration_completion_vm_stop(ret);
3382             if (ret >= 0) {
3383                 ret = migration_maybe_pause(s, &current_active_state,
3384                                             MIGRATION_STATUS_DEVICE);
3385             }
3386             if (ret >= 0) {
3387                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
3388                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
3389                                                          inactivate);
3390             }
3391             if (inactivate && ret >= 0) {
3392                 s->block_inactive = true;
3393             }
3394         }
3395         qemu_mutex_unlock_iothread();
3396 
3397         if (ret < 0) {
3398             goto fail;
3399         }
3400     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3401         trace_migration_completion_postcopy_end();
3402 
3403         qemu_mutex_lock_iothread();
3404         qemu_savevm_state_complete_postcopy(s->to_dst_file);
3405         qemu_mutex_unlock_iothread();
3406 
3407         /* Shutdown the postcopy fast path thread */
3408         if (migrate_postcopy_preempt()) {
3409             postcopy_preempt_shutdown_file(s);
3410         }
3411 
3412         trace_migration_completion_postcopy_end_after_complete();
3413     } else {
3414         goto fail;
3415     }
3416 
3417     /*
3418      * If rp was opened we must clean up the thread before
3419      * cleaning everything else up (since if there are no failures
3420      * it will wait for the destination to send it's status in
3421      * a SHUT command).
3422      */
3423     if (s->rp_state.rp_thread_created) {
3424         int rp_error;
3425         trace_migration_return_path_end_before();
3426         rp_error = await_return_path_close_on_source(s);
3427         trace_migration_return_path_end_after(rp_error);
3428         if (rp_error) {
3429             goto fail_invalidate;
3430         }
3431     }
3432 
3433     if (qemu_file_get_error(s->to_dst_file)) {
3434         trace_migration_completion_file_err();
3435         goto fail_invalidate;
3436     }
3437 
3438     if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
3439         /* COLO does not support postcopy */
3440         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3441                           MIGRATION_STATUS_COLO);
3442     } else {
3443         migrate_set_state(&s->state, current_active_state,
3444                           MIGRATION_STATUS_COMPLETED);
3445     }
3446 
3447     return;
3448 
3449 fail_invalidate:
3450     /* If not doing postcopy, vm_start() will be called: let's regain
3451      * control on images.
3452      */
3453     if (s->state == MIGRATION_STATUS_ACTIVE ||
3454         s->state == MIGRATION_STATUS_DEVICE) {
3455         Error *local_err = NULL;
3456 
3457         qemu_mutex_lock_iothread();
3458         bdrv_activate_all(&local_err);
3459         if (local_err) {
3460             error_report_err(local_err);
3461         } else {
3462             s->block_inactive = false;
3463         }
3464         qemu_mutex_unlock_iothread();
3465     }
3466 
3467 fail:
3468     migrate_set_state(&s->state, current_active_state,
3469                       MIGRATION_STATUS_FAILED);
3470 }
3471 
3472 /**
3473  * bg_migration_completion: Used by bg_migration_thread when after all the
3474  *   RAM has been saved. The caller 'breaks' the loop when this returns.
3475  *
3476  * @s: Current migration state
3477  */
3478 static void bg_migration_completion(MigrationState *s)
3479 {
3480     int current_active_state = s->state;
3481 
3482     /*
3483      * Stop tracking RAM writes - un-protect memory, un-register UFFD
3484      * memory ranges, flush kernel wait queues and wake up threads
3485      * waiting for write fault to be resolved.
3486      */
3487     ram_write_tracking_stop();
3488 
3489     if (s->state == MIGRATION_STATUS_ACTIVE) {
3490         /*
3491          * By this moment we have RAM content saved into the migration stream.
3492          * The next step is to flush the non-RAM content (device state)
3493          * right after the ram content. The device state has been stored into
3494          * the temporary buffer before RAM saving started.
3495          */
3496         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
3497         qemu_fflush(s->to_dst_file);
3498     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
3499         goto fail;
3500     }
3501 
3502     if (qemu_file_get_error(s->to_dst_file)) {
3503         trace_migration_completion_file_err();
3504         goto fail;
3505     }
3506 
3507     migrate_set_state(&s->state, current_active_state,
3508                       MIGRATION_STATUS_COMPLETED);
3509     return;
3510 
3511 fail:
3512     migrate_set_state(&s->state, current_active_state,
3513                       MIGRATION_STATUS_FAILED);
3514 }
3515 
3516 bool migrate_colo_enabled(void)
3517 {
3518     MigrationState *s = migrate_get_current();
3519     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
3520 }
3521 
3522 typedef enum MigThrError {
3523     /* No error detected */
3524     MIG_THR_ERR_NONE = 0,
3525     /* Detected error, but resumed successfully */
3526     MIG_THR_ERR_RECOVERED = 1,
3527     /* Detected fatal error, need to exit */
3528     MIG_THR_ERR_FATAL = 2,
3529 } MigThrError;
3530 
3531 static int postcopy_resume_handshake(MigrationState *s)
3532 {
3533     qemu_savevm_send_postcopy_resume(s->to_dst_file);
3534 
3535     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3536         qemu_sem_wait(&s->rp_state.rp_sem);
3537     }
3538 
3539     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3540         return 0;
3541     }
3542 
3543     return -1;
3544 }
3545 
3546 /* Return zero if success, or <0 for error */
3547 static int postcopy_do_resume(MigrationState *s)
3548 {
3549     int ret;
3550 
3551     /*
3552      * Call all the resume_prepare() hooks, so that modules can be
3553      * ready for the migration resume.
3554      */
3555     ret = qemu_savevm_state_resume_prepare(s);
3556     if (ret) {
3557         error_report("%s: resume_prepare() failure detected: %d",
3558                      __func__, ret);
3559         return ret;
3560     }
3561 
3562     /*
3563      * Last handshake with destination on the resume (destination will
3564      * switch to postcopy-active afterwards)
3565      */
3566     ret = postcopy_resume_handshake(s);
3567     if (ret) {
3568         error_report("%s: handshake failed: %d", __func__, ret);
3569         return ret;
3570     }
3571 
3572     return 0;
3573 }
3574 
3575 /*
3576  * We don't return until we are in a safe state to continue current
3577  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
3578  * MIG_THR_ERR_FATAL if unrecovery failure happened.
3579  */
3580 static MigThrError postcopy_pause(MigrationState *s)
3581 {
3582     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3583 
3584     while (true) {
3585         QEMUFile *file;
3586 
3587         /*
3588          * Current channel is possibly broken. Release it.  Note that this is
3589          * guaranteed even without lock because to_dst_file should only be
3590          * modified by the migration thread.  That also guarantees that the
3591          * unregister of yank is safe too without the lock.  It should be safe
3592          * even to be within the qemu_file_lock, but we didn't do that to avoid
3593          * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3594          * the qemu_file_lock critical section as small as possible.
3595          */
3596         assert(s->to_dst_file);
3597         migration_ioc_unregister_yank_from_file(s->to_dst_file);
3598         qemu_mutex_lock(&s->qemu_file_lock);
3599         file = s->to_dst_file;
3600         s->to_dst_file = NULL;
3601         qemu_mutex_unlock(&s->qemu_file_lock);
3602 
3603         qemu_file_shutdown(file);
3604         qemu_fclose(file);
3605 
3606         migrate_set_state(&s->state, s->state,
3607                           MIGRATION_STATUS_POSTCOPY_PAUSED);
3608 
3609         error_report("Detected IO failure for postcopy. "
3610                      "Migration paused.");
3611 
3612         /*
3613          * We wait until things fixed up. Then someone will setup the
3614          * status back for us.
3615          */
3616         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
3617             qemu_sem_wait(&s->postcopy_pause_sem);
3618         }
3619 
3620         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3621             /* Woken up by a recover procedure. Give it a shot */
3622 
3623             if (postcopy_preempt_wait_channel(s)) {
3624                 /*
3625                  * Preempt enabled, and new channel create failed; loop
3626                  * back to wait for another recovery.
3627                  */
3628                 continue;
3629             }
3630 
3631             /*
3632              * Firstly, let's wake up the return path now, with a new
3633              * return path channel.
3634              */
3635             qemu_sem_post(&s->postcopy_pause_rp_sem);
3636 
3637             /* Do the resume logic */
3638             if (postcopy_do_resume(s) == 0) {
3639                 /* Let's continue! */
3640                 trace_postcopy_pause_continued();
3641                 return MIG_THR_ERR_RECOVERED;
3642             } else {
3643                 /*
3644                  * Something wrong happened during the recovery, let's
3645                  * pause again. Pause is always better than throwing
3646                  * data away.
3647                  */
3648                 continue;
3649             }
3650         } else {
3651             /* This is not right... Time to quit. */
3652             return MIG_THR_ERR_FATAL;
3653         }
3654     }
3655 }
3656 
3657 static MigThrError migration_detect_error(MigrationState *s)
3658 {
3659     int ret;
3660     int state = s->state;
3661     Error *local_error = NULL;
3662 
3663     if (state == MIGRATION_STATUS_CANCELLING ||
3664         state == MIGRATION_STATUS_CANCELLED) {
3665         /* End the migration, but don't set the state to failed */
3666         return MIG_THR_ERR_FATAL;
3667     }
3668 
3669     /*
3670      * Try to detect any file errors.  Note that postcopy_qemufile_src will
3671      * be NULL when postcopy preempt is not enabled.
3672      */
3673     ret = qemu_file_get_error_obj_any(s->to_dst_file,
3674                                       s->postcopy_qemufile_src,
3675                                       &local_error);
3676     if (!ret) {
3677         /* Everything is fine */
3678         assert(!local_error);
3679         return MIG_THR_ERR_NONE;
3680     }
3681 
3682     if (local_error) {
3683         migrate_set_error(s, local_error);
3684         error_free(local_error);
3685     }
3686 
3687     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3688         /*
3689          * For postcopy, we allow the network to be down for a
3690          * while. After that, it can be continued by a
3691          * recovery phase.
3692          */
3693         return postcopy_pause(s);
3694     } else {
3695         /*
3696          * For precopy (or postcopy with error outside IO), we fail
3697          * with no time.
3698          */
3699         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3700         trace_migration_thread_file_err();
3701 
3702         /* Time to stop the migration, now. */
3703         return MIG_THR_ERR_FATAL;
3704     }
3705 }
3706 
3707 /* How many bytes have we transferred since the beginning of the migration */
3708 static uint64_t migration_total_bytes(MigrationState *s)
3709 {
3710     return qemu_file_total_transferred(s->to_dst_file) +
3711         ram_counters.multifd_bytes;
3712 }
3713 
3714 static void migration_calculate_complete(MigrationState *s)
3715 {
3716     uint64_t bytes = migration_total_bytes(s);
3717     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3718     int64_t transfer_time;
3719 
3720     s->total_time = end_time - s->start_time;
3721     if (!s->downtime) {
3722         /*
3723          * It's still not set, so we are precopy migration.  For
3724          * postcopy, downtime is calculated during postcopy_start().
3725          */
3726         s->downtime = end_time - s->downtime_start;
3727     }
3728 
3729     transfer_time = s->total_time - s->setup_time;
3730     if (transfer_time) {
3731         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3732     }
3733 }
3734 
3735 static void update_iteration_initial_status(MigrationState *s)
3736 {
3737     /*
3738      * Update these three fields at the same time to avoid mismatch info lead
3739      * wrong speed calculation.
3740      */
3741     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3742     s->iteration_initial_bytes = migration_total_bytes(s);
3743     s->iteration_initial_pages = ram_get_total_transferred_pages();
3744 }
3745 
3746 static void migration_update_counters(MigrationState *s,
3747                                       int64_t current_time)
3748 {
3749     uint64_t transferred, transferred_pages, time_spent;
3750     uint64_t current_bytes; /* bytes transferred since the beginning */
3751     double bandwidth;
3752 
3753     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3754         return;
3755     }
3756 
3757     current_bytes = migration_total_bytes(s);
3758     transferred = current_bytes - s->iteration_initial_bytes;
3759     time_spent = current_time - s->iteration_start_time;
3760     bandwidth = (double)transferred / time_spent;
3761     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3762 
3763     s->mbps = (((double) transferred * 8.0) /
3764                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3765 
3766     transferred_pages = ram_get_total_transferred_pages() -
3767                             s->iteration_initial_pages;
3768     s->pages_per_second = (double) transferred_pages /
3769                              (((double) time_spent / 1000.0));
3770 
3771     /*
3772      * if we haven't sent anything, we don't want to
3773      * recalculate. 10000 is a small enough number for our purposes
3774      */
3775     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3776         s->expected_downtime = ram_counters.remaining / bandwidth;
3777     }
3778 
3779     qemu_file_reset_rate_limit(s->to_dst_file);
3780 
3781     update_iteration_initial_status(s);
3782 
3783     trace_migrate_transferred(transferred, time_spent,
3784                               bandwidth, s->threshold_size);
3785 }
3786 
3787 /* Migration thread iteration status */
3788 typedef enum {
3789     MIG_ITERATE_RESUME,         /* Resume current iteration */
3790     MIG_ITERATE_SKIP,           /* Skip current iteration */
3791     MIG_ITERATE_BREAK,          /* Break the loop */
3792 } MigIterateState;
3793 
3794 /*
3795  * Return true if continue to the next iteration directly, false
3796  * otherwise.
3797  */
3798 static MigIterateState migration_iteration_run(MigrationState *s)
3799 {
3800     uint64_t pend_pre, pend_compat, pend_post;
3801     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3802 
3803     qemu_savevm_state_pending_estimate(&pend_pre, &pend_compat, &pend_post);
3804     uint64_t pending_size = pend_pre + pend_compat + pend_post;
3805 
3806     trace_migrate_pending_estimate(pending_size,
3807                                    pend_pre, pend_compat, pend_post);
3808 
3809     if (pend_pre + pend_compat <= s->threshold_size) {
3810         qemu_savevm_state_pending_exact(&pend_pre, &pend_compat, &pend_post);
3811         pending_size = pend_pre + pend_compat + pend_post;
3812         trace_migrate_pending_exact(pending_size,
3813                                     pend_pre, pend_compat, pend_post);
3814     }
3815 
3816     if (!pending_size || pending_size < s->threshold_size) {
3817         trace_migration_thread_low_pending(pending_size);
3818         migration_completion(s);
3819         return MIG_ITERATE_BREAK;
3820     }
3821 
3822     /* Still a significant amount to transfer */
3823     if (!in_postcopy && pend_pre <= s->threshold_size &&
3824         qatomic_read(&s->start_postcopy)) {
3825         if (postcopy_start(s)) {
3826             error_report("%s: postcopy failed to start", __func__);
3827         }
3828         return MIG_ITERATE_SKIP;
3829     }
3830 
3831     /* Just another iteration step */
3832     qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3833     return MIG_ITERATE_RESUME;
3834 }
3835 
3836 static void migration_iteration_finish(MigrationState *s)
3837 {
3838     /* If we enabled cpu throttling for auto-converge, turn it off. */
3839     cpu_throttle_stop();
3840 
3841     qemu_mutex_lock_iothread();
3842     switch (s->state) {
3843     case MIGRATION_STATUS_COMPLETED:
3844         migration_calculate_complete(s);
3845         runstate_set(RUN_STATE_POSTMIGRATE);
3846         break;
3847     case MIGRATION_STATUS_COLO:
3848         if (!migrate_colo_enabled()) {
3849             error_report("%s: critical error: calling COLO code without "
3850                          "COLO enabled", __func__);
3851         }
3852         migrate_start_colo_process(s);
3853         s->vm_was_running = true;
3854         /* Fallthrough */
3855     case MIGRATION_STATUS_FAILED:
3856     case MIGRATION_STATUS_CANCELLED:
3857     case MIGRATION_STATUS_CANCELLING:
3858         if (s->vm_was_running) {
3859             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3860                 vm_start();
3861             }
3862         } else {
3863             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3864                 runstate_set(RUN_STATE_POSTMIGRATE);
3865             }
3866         }
3867         break;
3868 
3869     default:
3870         /* Should not reach here, but if so, forgive the VM. */
3871         error_report("%s: Unknown ending state %d", __func__, s->state);
3872         break;
3873     }
3874     migrate_fd_cleanup_schedule(s);
3875     qemu_mutex_unlock_iothread();
3876 }
3877 
3878 static void bg_migration_iteration_finish(MigrationState *s)
3879 {
3880     qemu_mutex_lock_iothread();
3881     switch (s->state) {
3882     case MIGRATION_STATUS_COMPLETED:
3883         migration_calculate_complete(s);
3884         break;
3885 
3886     case MIGRATION_STATUS_ACTIVE:
3887     case MIGRATION_STATUS_FAILED:
3888     case MIGRATION_STATUS_CANCELLED:
3889     case MIGRATION_STATUS_CANCELLING:
3890         break;
3891 
3892     default:
3893         /* Should not reach here, but if so, forgive the VM. */
3894         error_report("%s: Unknown ending state %d", __func__, s->state);
3895         break;
3896     }
3897 
3898     migrate_fd_cleanup_schedule(s);
3899     qemu_mutex_unlock_iothread();
3900 }
3901 
3902 /*
3903  * Return true if continue to the next iteration directly, false
3904  * otherwise.
3905  */
3906 static MigIterateState bg_migration_iteration_run(MigrationState *s)
3907 {
3908     int res;
3909 
3910     res = qemu_savevm_state_iterate(s->to_dst_file, false);
3911     if (res > 0) {
3912         bg_migration_completion(s);
3913         return MIG_ITERATE_BREAK;
3914     }
3915 
3916     return MIG_ITERATE_RESUME;
3917 }
3918 
3919 void migration_make_urgent_request(void)
3920 {
3921     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3922 }
3923 
3924 void migration_consume_urgent_request(void)
3925 {
3926     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3927 }
3928 
3929 /* Returns true if the rate limiting was broken by an urgent request */
3930 bool migration_rate_limit(void)
3931 {
3932     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3933     MigrationState *s = migrate_get_current();
3934 
3935     bool urgent = false;
3936     migration_update_counters(s, now);
3937     if (qemu_file_rate_limit(s->to_dst_file)) {
3938 
3939         if (qemu_file_get_error(s->to_dst_file)) {
3940             return false;
3941         }
3942         /*
3943          * Wait for a delay to do rate limiting OR
3944          * something urgent to post the semaphore.
3945          */
3946         int ms = s->iteration_start_time + BUFFER_DELAY - now;
3947         trace_migration_rate_limit_pre(ms);
3948         if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3949             /*
3950              * We were woken by one or more urgent things but
3951              * the timedwait will have consumed one of them.
3952              * The service routine for the urgent wake will dec
3953              * the semaphore itself for each item it consumes,
3954              * so add this one we just eat back.
3955              */
3956             qemu_sem_post(&s->rate_limit_sem);
3957             urgent = true;
3958         }
3959         trace_migration_rate_limit_post(urgent);
3960     }
3961     return urgent;
3962 }
3963 
3964 /*
3965  * if failover devices are present, wait they are completely
3966  * unplugged
3967  */
3968 
3969 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
3970                                     int new_state)
3971 {
3972     if (qemu_savevm_state_guest_unplug_pending()) {
3973         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
3974 
3975         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
3976                qemu_savevm_state_guest_unplug_pending()) {
3977             qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3978         }
3979         if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
3980             int timeout = 120; /* 30 seconds */
3981             /*
3982              * migration has been canceled
3983              * but as we have started an unplug we must wait the end
3984              * to be able to plug back the card
3985              */
3986             while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
3987                 qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3988             }
3989             if (qemu_savevm_state_guest_unplug_pending() &&
3990                 !qtest_enabled()) {
3991                 warn_report("migration: partially unplugged device on "
3992                             "failure");
3993             }
3994         }
3995 
3996         migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
3997     } else {
3998         migrate_set_state(&s->state, old_state, new_state);
3999     }
4000 }
4001 
4002 /*
4003  * Master migration thread on the source VM.
4004  * It drives the migration and pumps the data down the outgoing channel.
4005  */
4006 static void *migration_thread(void *opaque)
4007 {
4008     MigrationState *s = opaque;
4009     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
4010     MigThrError thr_error;
4011     bool urgent = false;
4012 
4013     rcu_register_thread();
4014 
4015     object_ref(OBJECT(s));
4016     update_iteration_initial_status(s);
4017 
4018     qemu_savevm_state_header(s->to_dst_file);
4019 
4020     /*
4021      * If we opened the return path, we need to make sure dst has it
4022      * opened as well.
4023      */
4024     if (s->rp_state.rp_thread_created) {
4025         /* Now tell the dest that it should open its end so it can reply */
4026         qemu_savevm_send_open_return_path(s->to_dst_file);
4027 
4028         /* And do a ping that will make stuff easier to debug */
4029         qemu_savevm_send_ping(s->to_dst_file, 1);
4030     }
4031 
4032     if (migrate_postcopy()) {
4033         /*
4034          * Tell the destination that we *might* want to do postcopy later;
4035          * if the other end can't do postcopy it should fail now, nice and
4036          * early.
4037          */
4038         qemu_savevm_send_postcopy_advise(s->to_dst_file);
4039     }
4040 
4041     if (migrate_colo_enabled()) {
4042         /* Notify migration destination that we enable COLO */
4043         qemu_savevm_send_colo_enable(s->to_dst_file);
4044     }
4045 
4046     qemu_savevm_state_setup(s->to_dst_file);
4047 
4048     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
4049                                MIGRATION_STATUS_ACTIVE);
4050 
4051     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
4052 
4053     trace_migration_thread_setup_complete();
4054 
4055     while (migration_is_active(s)) {
4056         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
4057             MigIterateState iter_state = migration_iteration_run(s);
4058             if (iter_state == MIG_ITERATE_SKIP) {
4059                 continue;
4060             } else if (iter_state == MIG_ITERATE_BREAK) {
4061                 break;
4062             }
4063         }
4064 
4065         /*
4066          * Try to detect any kind of failures, and see whether we
4067          * should stop the migration now.
4068          */
4069         thr_error = migration_detect_error(s);
4070         if (thr_error == MIG_THR_ERR_FATAL) {
4071             /* Stop migration */
4072             break;
4073         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
4074             /*
4075              * Just recovered from a e.g. network failure, reset all
4076              * the local variables. This is important to avoid
4077              * breaking transferred_bytes and bandwidth calculation
4078              */
4079             update_iteration_initial_status(s);
4080         }
4081 
4082         urgent = migration_rate_limit();
4083     }
4084 
4085     trace_migration_thread_after_loop();
4086     migration_iteration_finish(s);
4087     object_unref(OBJECT(s));
4088     rcu_unregister_thread();
4089     return NULL;
4090 }
4091 
4092 static void bg_migration_vm_start_bh(void *opaque)
4093 {
4094     MigrationState *s = opaque;
4095 
4096     qemu_bh_delete(s->vm_start_bh);
4097     s->vm_start_bh = NULL;
4098 
4099     vm_start();
4100     s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
4101 }
4102 
4103 /**
4104  * Background snapshot thread, based on live migration code.
4105  * This is an alternative implementation of live migration mechanism
4106  * introduced specifically to support background snapshots.
4107  *
4108  * It takes advantage of userfault_fd write protection mechanism introduced
4109  * in v5.7 kernel. Compared to existing dirty page logging migration much
4110  * lesser stream traffic is produced resulting in smaller snapshot images,
4111  * simply cause of no page duplicates can get into the stream.
4112  *
4113  * Another key point is that generated vmstate stream reflects machine state
4114  * 'frozen' at the beginning of snapshot creation compared to dirty page logging
4115  * mechanism, which effectively results in that saved snapshot is the state of VM
4116  * at the end of the process.
4117  */
4118 static void *bg_migration_thread(void *opaque)
4119 {
4120     MigrationState *s = opaque;
4121     int64_t setup_start;
4122     MigThrError thr_error;
4123     QEMUFile *fb;
4124     bool early_fail = true;
4125 
4126     rcu_register_thread();
4127     object_ref(OBJECT(s));
4128 
4129     qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
4130 
4131     setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
4132     /*
4133      * We want to save vmstate for the moment when migration has been
4134      * initiated but also we want to save RAM content while VM is running.
4135      * The RAM content should appear first in the vmstate. So, we first
4136      * stash the non-RAM part of the vmstate to the temporary buffer,
4137      * then write RAM part of the vmstate to the migration stream
4138      * with vCPUs running and, finally, write stashed non-RAM part of
4139      * the vmstate from the buffer to the migration stream.
4140      */
4141     s->bioc = qio_channel_buffer_new(512 * 1024);
4142     qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
4143     fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
4144     object_unref(OBJECT(s->bioc));
4145 
4146     update_iteration_initial_status(s);
4147 
4148     /*
4149      * Prepare for tracking memory writes with UFFD-WP - populate
4150      * RAM pages before protecting.
4151      */
4152 #ifdef __linux__
4153     ram_write_tracking_prepare();
4154 #endif
4155 
4156     qemu_savevm_state_header(s->to_dst_file);
4157     qemu_savevm_state_setup(s->to_dst_file);
4158 
4159     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
4160                                MIGRATION_STATUS_ACTIVE);
4161 
4162     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
4163 
4164     trace_migration_thread_setup_complete();
4165     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
4166 
4167     qemu_mutex_lock_iothread();
4168 
4169     /*
4170      * If VM is currently in suspended state, then, to make a valid runstate
4171      * transition in vm_stop_force_state() we need to wakeup it up.
4172      */
4173     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
4174     s->vm_was_running = runstate_is_running();
4175 
4176     if (global_state_store()) {
4177         goto fail;
4178     }
4179     /* Forcibly stop VM before saving state of vCPUs and devices */
4180     if (vm_stop_force_state(RUN_STATE_PAUSED)) {
4181         goto fail;
4182     }
4183     /*
4184      * Put vCPUs in sync with shadow context structures, then
4185      * save their state to channel-buffer along with devices.
4186      */
4187     cpu_synchronize_all_states();
4188     if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
4189         goto fail;
4190     }
4191     /*
4192      * Since we are going to get non-iterable state data directly
4193      * from s->bioc->data, explicit flush is needed here.
4194      */
4195     qemu_fflush(fb);
4196 
4197     /* Now initialize UFFD context and start tracking RAM writes */
4198     if (ram_write_tracking_start()) {
4199         goto fail;
4200     }
4201     early_fail = false;
4202 
4203     /*
4204      * Start VM from BH handler to avoid write-fault lock here.
4205      * UFFD-WP protection for the whole RAM is already enabled so
4206      * calling VM state change notifiers from vm_start() would initiate
4207      * writes to virtio VQs memory which is in write-protected region.
4208      */
4209     s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
4210     qemu_bh_schedule(s->vm_start_bh);
4211 
4212     qemu_mutex_unlock_iothread();
4213 
4214     while (migration_is_active(s)) {
4215         MigIterateState iter_state = bg_migration_iteration_run(s);
4216         if (iter_state == MIG_ITERATE_SKIP) {
4217             continue;
4218         } else if (iter_state == MIG_ITERATE_BREAK) {
4219             break;
4220         }
4221 
4222         /*
4223          * Try to detect any kind of failures, and see whether we
4224          * should stop the migration now.
4225          */
4226         thr_error = migration_detect_error(s);
4227         if (thr_error == MIG_THR_ERR_FATAL) {
4228             /* Stop migration */
4229             break;
4230         }
4231 
4232         migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
4233     }
4234 
4235     trace_migration_thread_after_loop();
4236 
4237 fail:
4238     if (early_fail) {
4239         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
4240                 MIGRATION_STATUS_FAILED);
4241         qemu_mutex_unlock_iothread();
4242     }
4243 
4244     bg_migration_iteration_finish(s);
4245 
4246     qemu_fclose(fb);
4247     object_unref(OBJECT(s));
4248     rcu_unregister_thread();
4249 
4250     return NULL;
4251 }
4252 
4253 void migrate_fd_connect(MigrationState *s, Error *error_in)
4254 {
4255     Error *local_err = NULL;
4256     int64_t rate_limit;
4257     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
4258 
4259     /*
4260      * If there's a previous error, free it and prepare for another one.
4261      * Meanwhile if migration completes successfully, there won't have an error
4262      * dumped when calling migrate_fd_cleanup().
4263      */
4264     migrate_error_free(s);
4265 
4266     s->expected_downtime = s->parameters.downtime_limit;
4267     if (resume) {
4268         assert(s->cleanup_bh);
4269     } else {
4270         assert(!s->cleanup_bh);
4271         s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
4272     }
4273     if (error_in) {
4274         migrate_fd_error(s, error_in);
4275         if (resume) {
4276             /*
4277              * Don't do cleanup for resume if channel is invalid, but only dump
4278              * the error.  We wait for another channel connect from the user.
4279              * The error_report still gives HMP user a hint on what failed.
4280              * It's normally done in migrate_fd_cleanup(), but call it here
4281              * explicitly.
4282              */
4283             error_report_err(error_copy(s->error));
4284         } else {
4285             migrate_fd_cleanup(s);
4286         }
4287         return;
4288     }
4289 
4290     if (resume) {
4291         /* This is a resumed migration */
4292         rate_limit = s->parameters.max_postcopy_bandwidth /
4293             XFER_LIMIT_RATIO;
4294     } else {
4295         /* This is a fresh new migration */
4296         rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
4297 
4298         /* Notify before starting migration thread */
4299         notifier_list_notify(&migration_state_notifiers, s);
4300     }
4301 
4302     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
4303     qemu_file_set_blocking(s->to_dst_file, true);
4304 
4305     /*
4306      * Open the return path. For postcopy, it is used exclusively. For
4307      * precopy, only if user specified "return-path" capability would
4308      * QEMU uses the return path.
4309      */
4310     if (migrate_postcopy_ram() || migrate_use_return_path()) {
4311         if (open_return_path_on_source(s, !resume)) {
4312             error_report("Unable to open return-path for postcopy");
4313             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
4314             migrate_fd_cleanup(s);
4315             return;
4316         }
4317     }
4318 
4319     /* This needs to be done before resuming a postcopy */
4320     if (postcopy_preempt_setup(s, &local_err)) {
4321         error_report_err(local_err);
4322         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
4323                           MIGRATION_STATUS_FAILED);
4324         migrate_fd_cleanup(s);
4325         return;
4326     }
4327 
4328     if (resume) {
4329         /* Wakeup the main migration thread to do the recovery */
4330         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
4331                           MIGRATION_STATUS_POSTCOPY_RECOVER);
4332         qemu_sem_post(&s->postcopy_pause_sem);
4333         return;
4334     }
4335 
4336     if (multifd_save_setup(&local_err) != 0) {
4337         error_report_err(local_err);
4338         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
4339                           MIGRATION_STATUS_FAILED);
4340         migrate_fd_cleanup(s);
4341         return;
4342     }
4343 
4344     if (migrate_background_snapshot()) {
4345         qemu_thread_create(&s->thread, "bg_snapshot",
4346                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
4347     } else {
4348         qemu_thread_create(&s->thread, "live_migration",
4349                 migration_thread, s, QEMU_THREAD_JOINABLE);
4350     }
4351     s->migration_thread_running = true;
4352 }
4353 
4354 void migration_global_dump(Monitor *mon)
4355 {
4356     MigrationState *ms = migrate_get_current();
4357 
4358     monitor_printf(mon, "globals:\n");
4359     monitor_printf(mon, "store-global-state: %s\n",
4360                    ms->store_global_state ? "on" : "off");
4361     monitor_printf(mon, "only-migratable: %s\n",
4362                    only_migratable ? "on" : "off");
4363     monitor_printf(mon, "send-configuration: %s\n",
4364                    ms->send_configuration ? "on" : "off");
4365     monitor_printf(mon, "send-section-footer: %s\n",
4366                    ms->send_section_footer ? "on" : "off");
4367     monitor_printf(mon, "decompress-error-check: %s\n",
4368                    ms->decompress_error_check ? "on" : "off");
4369     monitor_printf(mon, "clear-bitmap-shift: %u\n",
4370                    ms->clear_bitmap_shift);
4371 }
4372 
4373 #define DEFINE_PROP_MIG_CAP(name, x)             \
4374     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
4375 
4376 static Property migration_properties[] = {
4377     DEFINE_PROP_BOOL("store-global-state", MigrationState,
4378                      store_global_state, true),
4379     DEFINE_PROP_BOOL("send-configuration", MigrationState,
4380                      send_configuration, true),
4381     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
4382                      send_section_footer, true),
4383     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
4384                       decompress_error_check, true),
4385     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
4386                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
4387 
4388     /* Migration parameters */
4389     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
4390                       parameters.compress_level,
4391                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
4392     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
4393                       parameters.compress_threads,
4394                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
4395     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
4396                       parameters.compress_wait_thread, true),
4397     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
4398                       parameters.decompress_threads,
4399                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
4400     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
4401                       parameters.throttle_trigger_threshold,
4402                       DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
4403     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
4404                       parameters.cpu_throttle_initial,
4405                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
4406     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
4407                       parameters.cpu_throttle_increment,
4408                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
4409     DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
4410                       parameters.cpu_throttle_tailslow, false),
4411     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
4412                       parameters.max_bandwidth, MAX_THROTTLE),
4413     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
4414                       parameters.downtime_limit,
4415                       DEFAULT_MIGRATE_SET_DOWNTIME),
4416     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
4417                       parameters.x_checkpoint_delay,
4418                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
4419     DEFINE_PROP_UINT8("multifd-channels", MigrationState,
4420                       parameters.multifd_channels,
4421                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
4422     DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
4423                       parameters.multifd_compression,
4424                       DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
4425     DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
4426                       parameters.multifd_zlib_level,
4427                       DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
4428     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
4429                       parameters.multifd_zstd_level,
4430                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
4431     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
4432                       parameters.xbzrle_cache_size,
4433                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
4434     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
4435                       parameters.max_postcopy_bandwidth,
4436                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
4437     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
4438                       parameters.max_cpu_throttle,
4439                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
4440     DEFINE_PROP_SIZE("announce-initial", MigrationState,
4441                       parameters.announce_initial,
4442                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
4443     DEFINE_PROP_SIZE("announce-max", MigrationState,
4444                       parameters.announce_max,
4445                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
4446     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
4447                       parameters.announce_rounds,
4448                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
4449     DEFINE_PROP_SIZE("announce-step", MigrationState,
4450                       parameters.announce_step,
4451                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
4452     DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
4453     DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
4454     DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
4455 
4456     /* Migration capabilities */
4457     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
4458     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
4459     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
4460     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
4461     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
4462     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
4463     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
4464     DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
4465                         MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
4466     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
4467     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
4468     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
4469     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
4470     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
4471     DEFINE_PROP_MIG_CAP("x-background-snapshot",
4472             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
4473 #ifdef CONFIG_LINUX
4474     DEFINE_PROP_MIG_CAP("x-zero-copy-send",
4475             MIGRATION_CAPABILITY_ZERO_COPY_SEND),
4476 #endif
4477 
4478     DEFINE_PROP_END_OF_LIST(),
4479 };
4480 
4481 static void migration_class_init(ObjectClass *klass, void *data)
4482 {
4483     DeviceClass *dc = DEVICE_CLASS(klass);
4484 
4485     dc->user_creatable = false;
4486     device_class_set_props(dc, migration_properties);
4487 }
4488 
4489 static void migration_instance_finalize(Object *obj)
4490 {
4491     MigrationState *ms = MIGRATION_OBJ(obj);
4492 
4493     qemu_mutex_destroy(&ms->error_mutex);
4494     qemu_mutex_destroy(&ms->qemu_file_lock);
4495     qemu_sem_destroy(&ms->wait_unplug_sem);
4496     qemu_sem_destroy(&ms->rate_limit_sem);
4497     qemu_sem_destroy(&ms->pause_sem);
4498     qemu_sem_destroy(&ms->postcopy_pause_sem);
4499     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
4500     qemu_sem_destroy(&ms->rp_state.rp_sem);
4501     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
4502     error_free(ms->error);
4503 }
4504 
4505 static void migration_instance_init(Object *obj)
4506 {
4507     MigrationState *ms = MIGRATION_OBJ(obj);
4508     MigrationParameters *params = &ms->parameters;
4509 
4510     ms->state = MIGRATION_STATUS_NONE;
4511     ms->mbps = -1;
4512     ms->pages_per_second = -1;
4513     qemu_sem_init(&ms->pause_sem, 0);
4514     qemu_mutex_init(&ms->error_mutex);
4515 
4516     params->tls_hostname = g_strdup("");
4517     params->tls_creds = g_strdup("");
4518 
4519     /* Set has_* up only for parameter checks */
4520     params->has_compress_level = true;
4521     params->has_compress_threads = true;
4522     params->has_compress_wait_thread = true;
4523     params->has_decompress_threads = true;
4524     params->has_throttle_trigger_threshold = true;
4525     params->has_cpu_throttle_initial = true;
4526     params->has_cpu_throttle_increment = true;
4527     params->has_cpu_throttle_tailslow = true;
4528     params->has_max_bandwidth = true;
4529     params->has_downtime_limit = true;
4530     params->has_x_checkpoint_delay = true;
4531     params->has_block_incremental = true;
4532     params->has_multifd_channels = true;
4533     params->has_multifd_compression = true;
4534     params->has_multifd_zlib_level = true;
4535     params->has_multifd_zstd_level = true;
4536     params->has_xbzrle_cache_size = true;
4537     params->has_max_postcopy_bandwidth = true;
4538     params->has_max_cpu_throttle = true;
4539     params->has_announce_initial = true;
4540     params->has_announce_max = true;
4541     params->has_announce_rounds = true;
4542     params->has_announce_step = true;
4543 
4544     qemu_sem_init(&ms->postcopy_pause_sem, 0);
4545     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
4546     qemu_sem_init(&ms->rp_state.rp_sem, 0);
4547     qemu_sem_init(&ms->rate_limit_sem, 0);
4548     qemu_sem_init(&ms->wait_unplug_sem, 0);
4549     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
4550     qemu_mutex_init(&ms->qemu_file_lock);
4551 }
4552 
4553 /*
4554  * Return true if check pass, false otherwise. Error will be put
4555  * inside errp if provided.
4556  */
4557 static bool migration_object_check(MigrationState *ms, Error **errp)
4558 {
4559     MigrationCapabilityStatusList *head = NULL;
4560     /* Assuming all off */
4561     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
4562     int i;
4563 
4564     if (!migrate_params_check(&ms->parameters, errp)) {
4565         return false;
4566     }
4567 
4568     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
4569         if (ms->enabled_capabilities[i]) {
4570             QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
4571         }
4572     }
4573 
4574     ret = migrate_caps_check(cap_list, head, errp);
4575 
4576     /* It works with head == NULL */
4577     qapi_free_MigrationCapabilityStatusList(head);
4578 
4579     return ret;
4580 }
4581 
4582 static const TypeInfo migration_type = {
4583     .name = TYPE_MIGRATION,
4584     /*
4585      * NOTE: TYPE_MIGRATION is not really a device, as the object is
4586      * not created using qdev_new(), it is not attached to the qdev
4587      * device tree, and it is never realized.
4588      *
4589      * TODO: Make this TYPE_OBJECT once QOM provides something like
4590      * TYPE_DEVICE's "-global" properties.
4591      */
4592     .parent = TYPE_DEVICE,
4593     .class_init = migration_class_init,
4594     .class_size = sizeof(MigrationClass),
4595     .instance_size = sizeof(MigrationState),
4596     .instance_init = migration_instance_init,
4597     .instance_finalize = migration_instance_finalize,
4598 };
4599 
4600 static void register_migration_types(void)
4601 {
4602     type_register_static(&migration_type);
4603 }
4604 
4605 type_init(register_migration_types);
4606