xref: /qemu/migration/migration.c (revision 22e3284f)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "migration/blocker.h"
20 #include "exec.h"
21 #include "fd.h"
22 #include "socket.h"
23 #include "rdma.h"
24 #include "ram.h"
25 #include "migration/global_state.h"
26 #include "migration/misc.h"
27 #include "migration.h"
28 #include "savevm.h"
29 #include "qemu-file-channel.h"
30 #include "qemu-file.h"
31 #include "migration/vmstate.h"
32 #include "block/block.h"
33 #include "qapi/error.h"
34 #include "qapi/clone-visitor.h"
35 #include "qapi/qapi-visit-sockets.h"
36 #include "qapi/qapi-commands-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "qapi/qmp/qerror.h"
39 #include "qapi/qmp/qnull.h"
40 #include "qemu/rcu.h"
41 #include "block.h"
42 #include "postcopy-ram.h"
43 #include "qemu/thread.h"
44 #include "trace.h"
45 #include "exec/target_page.h"
46 #include "io/channel-buffer.h"
47 #include "migration/colo.h"
48 #include "hw/boards.h"
49 #include "monitor/monitor.h"
50 #include "net/announce.h"
51 
52 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
53 
54 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
55  * data. */
56 #define BUFFER_DELAY     100
57 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
58 
59 /* Time in milliseconds we are allowed to stop the source,
60  * for sending the last part */
61 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
62 
63 /* Maximum migrate downtime set to 2000 seconds */
64 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
65 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
66 
67 /* Default compression thread count */
68 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
69 /* Default decompression thread count, usually decompression is at
70  * least 4 times as fast as compression.*/
71 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
72 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
73 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
74 /* Define default autoconverge cpu throttle migration parameters */
75 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
76 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
77 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
78 
79 /* Migration XBZRLE default cache size */
80 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
81 
82 /* The delay time (in ms) between two COLO checkpoints */
83 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
84 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
85 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
86 
87 /* Background transfer rate for postcopy, 0 means unlimited, note
88  * that page requests can still exceed this limit.
89  */
90 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
91 
92 /*
93  * Parameters for self_announce_delay giving a stream of RARP/ARP
94  * packets after migration.
95  */
96 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
97 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
98 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
99 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
100 
101 static NotifierList migration_state_notifiers =
102     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
103 
104 static bool deferred_incoming;
105 
106 /* Messages sent on the return path from destination to source */
107 enum mig_rp_message_type {
108     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
109     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
110     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
111 
112     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
113     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
114     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
115     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
116 
117     MIG_RP_MSG_MAX
118 };
119 
120 /* When we add fault tolerance, we could have several
121    migrations at once.  For now we don't need to add
122    dynamic creation of migration */
123 
124 static MigrationState *current_migration;
125 static MigrationIncomingState *current_incoming;
126 
127 static bool migration_object_check(MigrationState *ms, Error **errp);
128 static int migration_maybe_pause(MigrationState *s,
129                                  int *current_active_state,
130                                  int new_state);
131 static void migrate_fd_cancel(MigrationState *s);
132 
133 void migration_object_init(void)
134 {
135     MachineState *ms = MACHINE(qdev_get_machine());
136     Error *err = NULL;
137 
138     /* This can only be called once. */
139     assert(!current_migration);
140     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
141 
142     /*
143      * Init the migrate incoming object as well no matter whether
144      * we'll use it or not.
145      */
146     assert(!current_incoming);
147     current_incoming = g_new0(MigrationIncomingState, 1);
148     current_incoming->state = MIGRATION_STATUS_NONE;
149     current_incoming->postcopy_remote_fds =
150         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
151     qemu_mutex_init(&current_incoming->rp_mutex);
152     qemu_event_init(&current_incoming->main_thread_load_event, false);
153     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
154     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
155 
156     init_dirty_bitmap_incoming_migration();
157 
158     if (!migration_object_check(current_migration, &err)) {
159         error_report_err(err);
160         exit(1);
161     }
162 
163     /*
164      * We cannot really do this in migration_instance_init() since at
165      * that time global properties are not yet applied, then this
166      * value will be definitely replaced by something else.
167      */
168     if (ms->enforce_config_section) {
169         current_migration->send_configuration = true;
170     }
171 }
172 
173 void migration_shutdown(void)
174 {
175     /*
176      * Cancel the current migration - that will (eventually)
177      * stop the migration using this structure
178      */
179     migrate_fd_cancel(current_migration);
180     object_unref(OBJECT(current_migration));
181 }
182 
183 /* For outgoing */
184 MigrationState *migrate_get_current(void)
185 {
186     /* This can only be called after the object created. */
187     assert(current_migration);
188     return current_migration;
189 }
190 
191 MigrationIncomingState *migration_incoming_get_current(void)
192 {
193     assert(current_incoming);
194     return current_incoming;
195 }
196 
197 void migration_incoming_state_destroy(void)
198 {
199     struct MigrationIncomingState *mis = migration_incoming_get_current();
200 
201     if (mis->to_src_file) {
202         /* Tell source that we are done */
203         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
204         qemu_fclose(mis->to_src_file);
205         mis->to_src_file = NULL;
206     }
207 
208     if (mis->from_src_file) {
209         qemu_fclose(mis->from_src_file);
210         mis->from_src_file = NULL;
211     }
212     if (mis->postcopy_remote_fds) {
213         g_array_free(mis->postcopy_remote_fds, TRUE);
214         mis->postcopy_remote_fds = NULL;
215     }
216 
217     qemu_event_reset(&mis->main_thread_load_event);
218 
219     if (mis->socket_address_list) {
220         qapi_free_SocketAddressList(mis->socket_address_list);
221         mis->socket_address_list = NULL;
222     }
223 }
224 
225 static void migrate_generate_event(int new_state)
226 {
227     if (migrate_use_events()) {
228         qapi_event_send_migration(new_state);
229     }
230 }
231 
232 static bool migrate_late_block_activate(void)
233 {
234     MigrationState *s;
235 
236     s = migrate_get_current();
237 
238     return s->enabled_capabilities[
239         MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
240 }
241 
242 /*
243  * Called on -incoming with a defer: uri.
244  * The migration can be started later after any parameters have been
245  * changed.
246  */
247 static void deferred_incoming_migration(Error **errp)
248 {
249     if (deferred_incoming) {
250         error_setg(errp, "Incoming migration already deferred");
251     }
252     deferred_incoming = true;
253 }
254 
255 /*
256  * Send a message on the return channel back to the source
257  * of the migration.
258  */
259 static int migrate_send_rp_message(MigrationIncomingState *mis,
260                                    enum mig_rp_message_type message_type,
261                                    uint16_t len, void *data)
262 {
263     int ret = 0;
264 
265     trace_migrate_send_rp_message((int)message_type, len);
266     qemu_mutex_lock(&mis->rp_mutex);
267 
268     /*
269      * It's possible that the file handle got lost due to network
270      * failures.
271      */
272     if (!mis->to_src_file) {
273         ret = -EIO;
274         goto error;
275     }
276 
277     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
278     qemu_put_be16(mis->to_src_file, len);
279     qemu_put_buffer(mis->to_src_file, data, len);
280     qemu_fflush(mis->to_src_file);
281 
282     /* It's possible that qemu file got error during sending */
283     ret = qemu_file_get_error(mis->to_src_file);
284 
285 error:
286     qemu_mutex_unlock(&mis->rp_mutex);
287     return ret;
288 }
289 
290 /* Request a range of pages from the source VM at the given
291  * start address.
292  *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
293  *           as the last request (a name must have been given previously)
294  *   Start: Address offset within the RB
295  *   Len: Length in bytes required - must be a multiple of pagesize
296  */
297 int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
298                               ram_addr_t start, size_t len)
299 {
300     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
301     size_t msglen = 12; /* start + len */
302     enum mig_rp_message_type msg_type;
303 
304     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
305     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
306 
307     if (rbname) {
308         int rbname_len = strlen(rbname);
309         assert(rbname_len < 256);
310 
311         bufc[msglen++] = rbname_len;
312         memcpy(bufc + msglen, rbname, rbname_len);
313         msglen += rbname_len;
314         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
315     } else {
316         msg_type = MIG_RP_MSG_REQ_PAGES;
317     }
318 
319     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
320 }
321 
322 static bool migration_colo_enabled;
323 bool migration_incoming_colo_enabled(void)
324 {
325     return migration_colo_enabled;
326 }
327 
328 void migration_incoming_disable_colo(void)
329 {
330     migration_colo_enabled = false;
331 }
332 
333 void migration_incoming_enable_colo(void)
334 {
335     migration_colo_enabled = true;
336 }
337 
338 void migrate_add_address(SocketAddress *address)
339 {
340     MigrationIncomingState *mis = migration_incoming_get_current();
341     SocketAddressList *addrs;
342 
343     addrs = g_new0(SocketAddressList, 1);
344     addrs->next = mis->socket_address_list;
345     mis->socket_address_list = addrs;
346     addrs->value = QAPI_CLONE(SocketAddress, address);
347 }
348 
349 void qemu_start_incoming_migration(const char *uri, Error **errp)
350 {
351     const char *p;
352 
353     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
354     if (!strcmp(uri, "defer")) {
355         deferred_incoming_migration(errp);
356     } else if (strstart(uri, "tcp:", &p)) {
357         tcp_start_incoming_migration(p, errp);
358 #ifdef CONFIG_RDMA
359     } else if (strstart(uri, "rdma:", &p)) {
360         rdma_start_incoming_migration(p, errp);
361 #endif
362     } else if (strstart(uri, "exec:", &p)) {
363         exec_start_incoming_migration(p, errp);
364     } else if (strstart(uri, "unix:", &p)) {
365         unix_start_incoming_migration(p, errp);
366     } else if (strstart(uri, "fd:", &p)) {
367         fd_start_incoming_migration(p, errp);
368     } else {
369         error_setg(errp, "unknown migration protocol: %s", uri);
370     }
371 }
372 
373 static void process_incoming_migration_bh(void *opaque)
374 {
375     Error *local_err = NULL;
376     MigrationIncomingState *mis = opaque;
377 
378     /* If capability late_block_activate is set:
379      * Only fire up the block code now if we're going to restart the
380      * VM, else 'cont' will do it.
381      * This causes file locking to happen; so we don't want it to happen
382      * unless we really are starting the VM.
383      */
384     if (!migrate_late_block_activate() ||
385          (autostart && (!global_state_received() ||
386             global_state_get_runstate() == RUN_STATE_RUNNING))) {
387         /* Make sure all file formats flush their mutable metadata.
388          * If we get an error here, just don't restart the VM yet. */
389         bdrv_invalidate_cache_all(&local_err);
390         if (local_err) {
391             error_report_err(local_err);
392             local_err = NULL;
393             autostart = false;
394         }
395     }
396 
397     /*
398      * This must happen after all error conditions are dealt with and
399      * we're sure the VM is going to be running on this host.
400      */
401     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
402 
403     if (multifd_load_cleanup(&local_err) != 0) {
404         error_report_err(local_err);
405         autostart = false;
406     }
407     /* If global state section was not received or we are in running
408        state, we need to obey autostart. Any other state is set with
409        runstate_set. */
410 
411     dirty_bitmap_mig_before_vm_start();
412 
413     if (!global_state_received() ||
414         global_state_get_runstate() == RUN_STATE_RUNNING) {
415         if (autostart) {
416             vm_start();
417         } else {
418             runstate_set(RUN_STATE_PAUSED);
419         }
420     } else if (migration_incoming_colo_enabled()) {
421         migration_incoming_disable_colo();
422         vm_start();
423     } else {
424         runstate_set(global_state_get_runstate());
425     }
426     /*
427      * This must happen after any state changes since as soon as an external
428      * observer sees this event they might start to prod at the VM assuming
429      * it's ready to use.
430      */
431     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
432                       MIGRATION_STATUS_COMPLETED);
433     qemu_bh_delete(mis->bh);
434     migration_incoming_state_destroy();
435 }
436 
437 static void process_incoming_migration_co(void *opaque)
438 {
439     MigrationIncomingState *mis = migration_incoming_get_current();
440     PostcopyState ps;
441     int ret;
442     Error *local_err = NULL;
443 
444     assert(mis->from_src_file);
445     mis->migration_incoming_co = qemu_coroutine_self();
446     mis->largest_page_size = qemu_ram_pagesize_largest();
447     postcopy_state_set(POSTCOPY_INCOMING_NONE);
448     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
449                       MIGRATION_STATUS_ACTIVE);
450     ret = qemu_loadvm_state(mis->from_src_file);
451 
452     ps = postcopy_state_get();
453     trace_process_incoming_migration_co_end(ret, ps);
454     if (ps != POSTCOPY_INCOMING_NONE) {
455         if (ps == POSTCOPY_INCOMING_ADVISE) {
456             /*
457              * Where a migration had postcopy enabled (and thus went to advise)
458              * but managed to complete within the precopy period, we can use
459              * the normal exit.
460              */
461             postcopy_ram_incoming_cleanup(mis);
462         } else if (ret >= 0) {
463             /*
464              * Postcopy was started, cleanup should happen at the end of the
465              * postcopy thread.
466              */
467             trace_process_incoming_migration_co_postcopy_end_main();
468             return;
469         }
470         /* Else if something went wrong then just fall out of the normal exit */
471     }
472 
473     /* we get COLO info, and know if we are in COLO mode */
474     if (!ret && migration_incoming_colo_enabled()) {
475         /* Make sure all file formats flush their mutable metadata */
476         bdrv_invalidate_cache_all(&local_err);
477         if (local_err) {
478             error_report_err(local_err);
479             goto fail;
480         }
481 
482         if (colo_init_ram_cache() < 0) {
483             error_report("Init ram cache failed");
484             goto fail;
485         }
486 
487         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
488              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
489         mis->have_colo_incoming_thread = true;
490         qemu_coroutine_yield();
491 
492         /* Wait checkpoint incoming thread exit before free resource */
493         qemu_thread_join(&mis->colo_incoming_thread);
494         /* We hold the global iothread lock, so it is safe here */
495         colo_release_ram_cache();
496     }
497 
498     if (ret < 0) {
499         error_report("load of migration failed: %s", strerror(-ret));
500         goto fail;
501     }
502     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
503     qemu_bh_schedule(mis->bh);
504     mis->migration_incoming_co = NULL;
505     return;
506 fail:
507     local_err = NULL;
508     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
509                       MIGRATION_STATUS_FAILED);
510     qemu_fclose(mis->from_src_file);
511     if (multifd_load_cleanup(&local_err) != 0) {
512         error_report_err(local_err);
513     }
514     exit(EXIT_FAILURE);
515 }
516 
517 static void migration_incoming_setup(QEMUFile *f)
518 {
519     MigrationIncomingState *mis = migration_incoming_get_current();
520 
521     if (multifd_load_setup() != 0) {
522         /* We haven't been able to create multifd threads
523            nothing better to do */
524         exit(EXIT_FAILURE);
525     }
526 
527     if (!mis->from_src_file) {
528         mis->from_src_file = f;
529     }
530     qemu_file_set_blocking(f, false);
531 }
532 
533 void migration_incoming_process(void)
534 {
535     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
536     qemu_coroutine_enter(co);
537 }
538 
539 /* Returns true if recovered from a paused migration, otherwise false */
540 static bool postcopy_try_recover(QEMUFile *f)
541 {
542     MigrationIncomingState *mis = migration_incoming_get_current();
543 
544     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
545         /* Resumed from a paused postcopy migration */
546 
547         mis->from_src_file = f;
548         /* Postcopy has standalone thread to do vm load */
549         qemu_file_set_blocking(f, true);
550 
551         /* Re-configure the return path */
552         mis->to_src_file = qemu_file_get_return_path(f);
553 
554         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
555                           MIGRATION_STATUS_POSTCOPY_RECOVER);
556 
557         /*
558          * Here, we only wake up the main loading thread (while the
559          * fault thread will still be waiting), so that we can receive
560          * commands from source now, and answer it if needed. The
561          * fault thread will be woken up afterwards until we are sure
562          * that source is ready to reply to page requests.
563          */
564         qemu_sem_post(&mis->postcopy_pause_sem_dst);
565         return true;
566     }
567 
568     return false;
569 }
570 
571 void migration_fd_process_incoming(QEMUFile *f)
572 {
573     if (postcopy_try_recover(f)) {
574         return;
575     }
576 
577     migration_incoming_setup(f);
578     migration_incoming_process();
579 }
580 
581 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
582 {
583     MigrationIncomingState *mis = migration_incoming_get_current();
584     bool start_migration;
585 
586     if (!mis->from_src_file) {
587         /* The first connection (multifd may have multiple) */
588         QEMUFile *f = qemu_fopen_channel_input(ioc);
589 
590         /* If it's a recovery, we're done */
591         if (postcopy_try_recover(f)) {
592             return;
593         }
594 
595         migration_incoming_setup(f);
596 
597         /*
598          * Common migration only needs one channel, so we can start
599          * right now.  Multifd needs more than one channel, we wait.
600          */
601         start_migration = !migrate_use_multifd();
602     } else {
603         Error *local_err = NULL;
604         /* Multiple connections */
605         assert(migrate_use_multifd());
606         start_migration = multifd_recv_new_channel(ioc, &local_err);
607         if (local_err) {
608             error_propagate(errp, local_err);
609             return;
610         }
611     }
612 
613     if (start_migration) {
614         migration_incoming_process();
615     }
616 }
617 
618 /**
619  * @migration_has_all_channels: We have received all channels that we need
620  *
621  * Returns true when we have got connections to all the channels that
622  * we need for migration.
623  */
624 bool migration_has_all_channels(void)
625 {
626     MigrationIncomingState *mis = migration_incoming_get_current();
627     bool all_channels;
628 
629     all_channels = multifd_recv_all_channels_created();
630 
631     return all_channels && mis->from_src_file != NULL;
632 }
633 
634 /*
635  * Send a 'SHUT' message on the return channel with the given value
636  * to indicate that we've finished with the RP.  Non-0 value indicates
637  * error.
638  */
639 void migrate_send_rp_shut(MigrationIncomingState *mis,
640                           uint32_t value)
641 {
642     uint32_t buf;
643 
644     buf = cpu_to_be32(value);
645     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
646 }
647 
648 /*
649  * Send a 'PONG' message on the return channel with the given value
650  * (normally in response to a 'PING')
651  */
652 void migrate_send_rp_pong(MigrationIncomingState *mis,
653                           uint32_t value)
654 {
655     uint32_t buf;
656 
657     buf = cpu_to_be32(value);
658     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
659 }
660 
661 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
662                                  char *block_name)
663 {
664     char buf[512];
665     int len;
666     int64_t res;
667 
668     /*
669      * First, we send the header part. It contains only the len of
670      * idstr, and the idstr itself.
671      */
672     len = strlen(block_name);
673     buf[0] = len;
674     memcpy(buf + 1, block_name, len);
675 
676     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
677         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
678                      __func__);
679         return;
680     }
681 
682     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
683 
684     /*
685      * Next, we dump the received bitmap to the stream.
686      *
687      * TODO: currently we are safe since we are the only one that is
688      * using the to_src_file handle (fault thread is still paused),
689      * and it's ok even not taking the mutex. However the best way is
690      * to take the lock before sending the message header, and release
691      * the lock after sending the bitmap.
692      */
693     qemu_mutex_lock(&mis->rp_mutex);
694     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
695     qemu_mutex_unlock(&mis->rp_mutex);
696 
697     trace_migrate_send_rp_recv_bitmap(block_name, res);
698 }
699 
700 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
701 {
702     uint32_t buf;
703 
704     buf = cpu_to_be32(value);
705     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
706 }
707 
708 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
709 {
710     MigrationCapabilityStatusList *head = NULL;
711     MigrationCapabilityStatusList *caps;
712     MigrationState *s = migrate_get_current();
713     int i;
714 
715     caps = NULL; /* silence compiler warning */
716     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
717 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
718         if (i == MIGRATION_CAPABILITY_BLOCK) {
719             continue;
720         }
721 #endif
722         if (head == NULL) {
723             head = g_malloc0(sizeof(*caps));
724             caps = head;
725         } else {
726             caps->next = g_malloc0(sizeof(*caps));
727             caps = caps->next;
728         }
729         caps->value =
730             g_malloc(sizeof(*caps->value));
731         caps->value->capability = i;
732         caps->value->state = s->enabled_capabilities[i];
733     }
734 
735     return head;
736 }
737 
738 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
739 {
740     MigrationParameters *params;
741     MigrationState *s = migrate_get_current();
742 
743     /* TODO use QAPI_CLONE() instead of duplicating it inline */
744     params = g_malloc0(sizeof(*params));
745     params->has_compress_level = true;
746     params->compress_level = s->parameters.compress_level;
747     params->has_compress_threads = true;
748     params->compress_threads = s->parameters.compress_threads;
749     params->has_compress_wait_thread = true;
750     params->compress_wait_thread = s->parameters.compress_wait_thread;
751     params->has_decompress_threads = true;
752     params->decompress_threads = s->parameters.decompress_threads;
753     params->has_cpu_throttle_initial = true;
754     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
755     params->has_cpu_throttle_increment = true;
756     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
757     params->has_tls_creds = true;
758     params->tls_creds = g_strdup(s->parameters.tls_creds);
759     params->has_tls_hostname = true;
760     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
761     params->has_max_bandwidth = true;
762     params->max_bandwidth = s->parameters.max_bandwidth;
763     params->has_downtime_limit = true;
764     params->downtime_limit = s->parameters.downtime_limit;
765     params->has_x_checkpoint_delay = true;
766     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
767     params->has_block_incremental = true;
768     params->block_incremental = s->parameters.block_incremental;
769     params->has_x_multifd_channels = true;
770     params->x_multifd_channels = s->parameters.x_multifd_channels;
771     params->has_x_multifd_page_count = true;
772     params->x_multifd_page_count = s->parameters.x_multifd_page_count;
773     params->has_xbzrle_cache_size = true;
774     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
775     params->has_max_postcopy_bandwidth = true;
776     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
777     params->has_max_cpu_throttle = true;
778     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
779     params->has_announce_initial = true;
780     params->announce_initial = s->parameters.announce_initial;
781     params->has_announce_max = true;
782     params->announce_max = s->parameters.announce_max;
783     params->has_announce_rounds = true;
784     params->announce_rounds = s->parameters.announce_rounds;
785     params->has_announce_step = true;
786     params->announce_step = s->parameters.announce_step;
787 
788     return params;
789 }
790 
791 AnnounceParameters *migrate_announce_params(void)
792 {
793     static AnnounceParameters ap;
794 
795     MigrationState *s = migrate_get_current();
796 
797     ap.initial = s->parameters.announce_initial;
798     ap.max = s->parameters.announce_max;
799     ap.rounds = s->parameters.announce_rounds;
800     ap.step = s->parameters.announce_step;
801 
802     return &ap;
803 }
804 
805 /*
806  * Return true if we're already in the middle of a migration
807  * (i.e. any of the active or setup states)
808  */
809 bool migration_is_setup_or_active(int state)
810 {
811     switch (state) {
812     case MIGRATION_STATUS_ACTIVE:
813     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
814     case MIGRATION_STATUS_POSTCOPY_PAUSED:
815     case MIGRATION_STATUS_POSTCOPY_RECOVER:
816     case MIGRATION_STATUS_SETUP:
817     case MIGRATION_STATUS_PRE_SWITCHOVER:
818     case MIGRATION_STATUS_DEVICE:
819         return true;
820 
821     default:
822         return false;
823 
824     }
825 }
826 
827 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
828 {
829     info->has_ram = true;
830     info->ram = g_malloc0(sizeof(*info->ram));
831     info->ram->transferred = ram_counters.transferred;
832     info->ram->total = ram_bytes_total();
833     info->ram->duplicate = ram_counters.duplicate;
834     /* legacy value.  It is not used anymore */
835     info->ram->skipped = 0;
836     info->ram->normal = ram_counters.normal;
837     info->ram->normal_bytes = ram_counters.normal *
838         qemu_target_page_size();
839     info->ram->mbps = s->mbps;
840     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
841     info->ram->postcopy_requests = ram_counters.postcopy_requests;
842     info->ram->page_size = qemu_target_page_size();
843     info->ram->multifd_bytes = ram_counters.multifd_bytes;
844     info->ram->pages_per_second = s->pages_per_second;
845 
846     if (migrate_use_xbzrle()) {
847         info->has_xbzrle_cache = true;
848         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
849         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
850         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
851         info->xbzrle_cache->pages = xbzrle_counters.pages;
852         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
853         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
854         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
855     }
856 
857     if (migrate_use_compression()) {
858         info->has_compression = true;
859         info->compression = g_malloc0(sizeof(*info->compression));
860         info->compression->pages = compression_counters.pages;
861         info->compression->busy = compression_counters.busy;
862         info->compression->busy_rate = compression_counters.busy_rate;
863         info->compression->compressed_size =
864                                     compression_counters.compressed_size;
865         info->compression->compression_rate =
866                                     compression_counters.compression_rate;
867     }
868 
869     if (cpu_throttle_active()) {
870         info->has_cpu_throttle_percentage = true;
871         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
872     }
873 
874     if (s->state != MIGRATION_STATUS_COMPLETED) {
875         info->ram->remaining = ram_bytes_remaining();
876         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
877     }
878 }
879 
880 static void populate_disk_info(MigrationInfo *info)
881 {
882     if (blk_mig_active()) {
883         info->has_disk = true;
884         info->disk = g_malloc0(sizeof(*info->disk));
885         info->disk->transferred = blk_mig_bytes_transferred();
886         info->disk->remaining = blk_mig_bytes_remaining();
887         info->disk->total = blk_mig_bytes_total();
888     }
889 }
890 
891 static void fill_source_migration_info(MigrationInfo *info)
892 {
893     MigrationState *s = migrate_get_current();
894 
895     switch (s->state) {
896     case MIGRATION_STATUS_NONE:
897         /* no migration has happened ever */
898         /* do not overwrite destination migration status */
899         return;
900         break;
901     case MIGRATION_STATUS_SETUP:
902         info->has_status = true;
903         info->has_total_time = false;
904         break;
905     case MIGRATION_STATUS_ACTIVE:
906     case MIGRATION_STATUS_CANCELLING:
907     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
908     case MIGRATION_STATUS_PRE_SWITCHOVER:
909     case MIGRATION_STATUS_DEVICE:
910     case MIGRATION_STATUS_POSTCOPY_PAUSED:
911     case MIGRATION_STATUS_POSTCOPY_RECOVER:
912          /* TODO add some postcopy stats */
913         info->has_status = true;
914         info->has_total_time = true;
915         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
916             - s->start_time;
917         info->has_expected_downtime = true;
918         info->expected_downtime = s->expected_downtime;
919         info->has_setup_time = true;
920         info->setup_time = s->setup_time;
921 
922         populate_ram_info(info, s);
923         populate_disk_info(info);
924         break;
925     case MIGRATION_STATUS_COLO:
926         info->has_status = true;
927         /* TODO: display COLO specific information (checkpoint info etc.) */
928         break;
929     case MIGRATION_STATUS_COMPLETED:
930         info->has_status = true;
931         info->has_total_time = true;
932         info->total_time = s->total_time;
933         info->has_downtime = true;
934         info->downtime = s->downtime;
935         info->has_setup_time = true;
936         info->setup_time = s->setup_time;
937 
938         populate_ram_info(info, s);
939         break;
940     case MIGRATION_STATUS_FAILED:
941         info->has_status = true;
942         if (s->error) {
943             info->has_error_desc = true;
944             info->error_desc = g_strdup(error_get_pretty(s->error));
945         }
946         break;
947     case MIGRATION_STATUS_CANCELLED:
948         info->has_status = true;
949         break;
950     }
951     info->status = s->state;
952 }
953 
954 /**
955  * @migration_caps_check - check capability validity
956  *
957  * @cap_list: old capability list, array of bool
958  * @params: new capabilities to be applied soon
959  * @errp: set *errp if the check failed, with reason
960  *
961  * Returns true if check passed, otherwise false.
962  */
963 static bool migrate_caps_check(bool *cap_list,
964                                MigrationCapabilityStatusList *params,
965                                Error **errp)
966 {
967     MigrationCapabilityStatusList *cap;
968     bool old_postcopy_cap;
969     MigrationIncomingState *mis = migration_incoming_get_current();
970 
971     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
972 
973     for (cap = params; cap; cap = cap->next) {
974         cap_list[cap->value->capability] = cap->value->state;
975     }
976 
977 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
978     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
979         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
980                    "block migration");
981         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
982         return false;
983     }
984 #endif
985 
986 #ifndef CONFIG_REPLICATION
987     if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
988         error_setg(errp, "QEMU compiled without replication module"
989                    " can't enable COLO");
990         error_append_hint(errp, "Please enable replication before COLO.\n");
991         return false;
992     }
993 #endif
994 
995     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
996         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
997             /* The decompression threads asynchronously write into RAM
998              * rather than use the atomic copies needed to avoid
999              * userfaulting.  It should be possible to fix the decompression
1000              * threads for compatibility in future.
1001              */
1002             error_setg(errp, "Postcopy is not currently compatible "
1003                        "with compression");
1004             return false;
1005         }
1006 
1007         /* This check is reasonably expensive, so only when it's being
1008          * set the first time, also it's only the destination that needs
1009          * special support.
1010          */
1011         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
1012             !postcopy_ram_supported_by_host(mis)) {
1013             /* postcopy_ram_supported_by_host will have emitted a more
1014              * detailed message
1015              */
1016             error_setg(errp, "Postcopy is not supported");
1017             return false;
1018         }
1019 
1020         if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
1021             error_setg(errp, "Postcopy is not compatible with ignore-shared");
1022             return false;
1023         }
1024     }
1025 
1026     return true;
1027 }
1028 
1029 static void fill_destination_migration_info(MigrationInfo *info)
1030 {
1031     MigrationIncomingState *mis = migration_incoming_get_current();
1032 
1033     if (mis->socket_address_list) {
1034         info->has_socket_address = true;
1035         info->socket_address =
1036             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1037     }
1038 
1039     switch (mis->state) {
1040     case MIGRATION_STATUS_NONE:
1041         return;
1042         break;
1043     case MIGRATION_STATUS_SETUP:
1044     case MIGRATION_STATUS_CANCELLING:
1045     case MIGRATION_STATUS_CANCELLED:
1046     case MIGRATION_STATUS_ACTIVE:
1047     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1048     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1049     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1050     case MIGRATION_STATUS_FAILED:
1051     case MIGRATION_STATUS_COLO:
1052         info->has_status = true;
1053         break;
1054     case MIGRATION_STATUS_COMPLETED:
1055         info->has_status = true;
1056         fill_destination_postcopy_migration_info(info);
1057         break;
1058     }
1059     info->status = mis->state;
1060 }
1061 
1062 MigrationInfo *qmp_query_migrate(Error **errp)
1063 {
1064     MigrationInfo *info = g_malloc0(sizeof(*info));
1065 
1066     fill_destination_migration_info(info);
1067     fill_source_migration_info(info);
1068 
1069     return info;
1070 }
1071 
1072 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
1073                                   Error **errp)
1074 {
1075     MigrationState *s = migrate_get_current();
1076     MigrationCapabilityStatusList *cap;
1077     bool cap_list[MIGRATION_CAPABILITY__MAX];
1078 
1079     if (migration_is_setup_or_active(s->state)) {
1080         error_setg(errp, QERR_MIGRATION_ACTIVE);
1081         return;
1082     }
1083 
1084     memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
1085     if (!migrate_caps_check(cap_list, params, errp)) {
1086         return;
1087     }
1088 
1089     for (cap = params; cap; cap = cap->next) {
1090         s->enabled_capabilities[cap->value->capability] = cap->value->state;
1091     }
1092 }
1093 
1094 /*
1095  * Check whether the parameters are valid. Error will be put into errp
1096  * (if provided). Return true if valid, otherwise false.
1097  */
1098 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1099 {
1100     if (params->has_compress_level &&
1101         (params->compress_level > 9)) {
1102         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1103                    "is invalid, it should be in the range of 0 to 9");
1104         return false;
1105     }
1106 
1107     if (params->has_compress_threads && (params->compress_threads < 1)) {
1108         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1109                    "compress_threads",
1110                    "is invalid, it should be in the range of 1 to 255");
1111         return false;
1112     }
1113 
1114     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1115         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1116                    "decompress_threads",
1117                    "is invalid, it should be in the range of 1 to 255");
1118         return false;
1119     }
1120 
1121     if (params->has_cpu_throttle_initial &&
1122         (params->cpu_throttle_initial < 1 ||
1123          params->cpu_throttle_initial > 99)) {
1124         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1125                    "cpu_throttle_initial",
1126                    "an integer in the range of 1 to 99");
1127         return false;
1128     }
1129 
1130     if (params->has_cpu_throttle_increment &&
1131         (params->cpu_throttle_increment < 1 ||
1132          params->cpu_throttle_increment > 99)) {
1133         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1134                    "cpu_throttle_increment",
1135                    "an integer in the range of 1 to 99");
1136         return false;
1137     }
1138 
1139     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1140         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
1141                          " range of 0 to %zu bytes/second", SIZE_MAX);
1142         return false;
1143     }
1144 
1145     if (params->has_downtime_limit &&
1146         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1147         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1148                          "the range of 0 to %d milliseconds",
1149                          MAX_MIGRATE_DOWNTIME);
1150         return false;
1151     }
1152 
1153     /* x_checkpoint_delay is now always positive */
1154 
1155     if (params->has_x_multifd_channels && (params->x_multifd_channels < 1)) {
1156         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1157                    "multifd_channels",
1158                    "is invalid, it should be in the range of 1 to 255");
1159         return false;
1160     }
1161     if (params->has_x_multifd_page_count &&
1162         (params->x_multifd_page_count < 1 ||
1163          params->x_multifd_page_count > 10000)) {
1164         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1165                    "multifd_page_count",
1166                    "is invalid, it should be in the range of 1 to 10000");
1167         return false;
1168     }
1169 
1170     if (params->has_xbzrle_cache_size &&
1171         (params->xbzrle_cache_size < qemu_target_page_size() ||
1172          !is_power_of_2(params->xbzrle_cache_size))) {
1173         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1174                    "xbzrle_cache_size",
1175                    "is invalid, it should be bigger than target page size"
1176                    " and a power of two");
1177         return false;
1178     }
1179 
1180     if (params->has_max_cpu_throttle &&
1181         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1182          params->max_cpu_throttle > 99)) {
1183         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1184                    "max_cpu_throttle",
1185                    "an integer in the range of cpu_throttle_initial to 99");
1186         return false;
1187     }
1188 
1189     if (params->has_announce_initial &&
1190         params->announce_initial > 100000) {
1191         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1192                    "announce_initial",
1193                    "is invalid, it must be less than 100000 ms");
1194         return false;
1195     }
1196     if (params->has_announce_max &&
1197         params->announce_max > 100000) {
1198         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1199                    "announce_max",
1200                    "is invalid, it must be less than 100000 ms");
1201        return false;
1202     }
1203     if (params->has_announce_rounds &&
1204         params->announce_rounds > 1000) {
1205         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1206                    "announce_rounds",
1207                    "is invalid, it must be in the range of 0 to 1000");
1208        return false;
1209     }
1210     if (params->has_announce_step &&
1211         (params->announce_step < 1 ||
1212         params->announce_step > 10000)) {
1213         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1214                    "announce_step",
1215                    "is invalid, it must be in the range of 1 to 10000 ms");
1216        return false;
1217     }
1218     return true;
1219 }
1220 
1221 static void migrate_params_test_apply(MigrateSetParameters *params,
1222                                       MigrationParameters *dest)
1223 {
1224     *dest = migrate_get_current()->parameters;
1225 
1226     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1227 
1228     if (params->has_compress_level) {
1229         dest->compress_level = params->compress_level;
1230     }
1231 
1232     if (params->has_compress_threads) {
1233         dest->compress_threads = params->compress_threads;
1234     }
1235 
1236     if (params->has_compress_wait_thread) {
1237         dest->compress_wait_thread = params->compress_wait_thread;
1238     }
1239 
1240     if (params->has_decompress_threads) {
1241         dest->decompress_threads = params->decompress_threads;
1242     }
1243 
1244     if (params->has_cpu_throttle_initial) {
1245         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1246     }
1247 
1248     if (params->has_cpu_throttle_increment) {
1249         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1250     }
1251 
1252     if (params->has_tls_creds) {
1253         assert(params->tls_creds->type == QTYPE_QSTRING);
1254         dest->tls_creds = g_strdup(params->tls_creds->u.s);
1255     }
1256 
1257     if (params->has_tls_hostname) {
1258         assert(params->tls_hostname->type == QTYPE_QSTRING);
1259         dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
1260     }
1261 
1262     if (params->has_max_bandwidth) {
1263         dest->max_bandwidth = params->max_bandwidth;
1264     }
1265 
1266     if (params->has_downtime_limit) {
1267         dest->downtime_limit = params->downtime_limit;
1268     }
1269 
1270     if (params->has_x_checkpoint_delay) {
1271         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1272     }
1273 
1274     if (params->has_block_incremental) {
1275         dest->block_incremental = params->block_incremental;
1276     }
1277     if (params->has_x_multifd_channels) {
1278         dest->x_multifd_channels = params->x_multifd_channels;
1279     }
1280     if (params->has_x_multifd_page_count) {
1281         dest->x_multifd_page_count = params->x_multifd_page_count;
1282     }
1283     if (params->has_xbzrle_cache_size) {
1284         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1285     }
1286     if (params->has_max_postcopy_bandwidth) {
1287         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1288     }
1289     if (params->has_max_cpu_throttle) {
1290         dest->max_cpu_throttle = params->max_cpu_throttle;
1291     }
1292     if (params->has_announce_initial) {
1293         dest->announce_initial = params->announce_initial;
1294     }
1295     if (params->has_announce_max) {
1296         dest->announce_max = params->announce_max;
1297     }
1298     if (params->has_announce_rounds) {
1299         dest->announce_rounds = params->announce_rounds;
1300     }
1301     if (params->has_announce_step) {
1302         dest->announce_step = params->announce_step;
1303     }
1304 }
1305 
1306 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1307 {
1308     MigrationState *s = migrate_get_current();
1309 
1310     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1311 
1312     if (params->has_compress_level) {
1313         s->parameters.compress_level = params->compress_level;
1314     }
1315 
1316     if (params->has_compress_threads) {
1317         s->parameters.compress_threads = params->compress_threads;
1318     }
1319 
1320     if (params->has_compress_wait_thread) {
1321         s->parameters.compress_wait_thread = params->compress_wait_thread;
1322     }
1323 
1324     if (params->has_decompress_threads) {
1325         s->parameters.decompress_threads = params->decompress_threads;
1326     }
1327 
1328     if (params->has_cpu_throttle_initial) {
1329         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1330     }
1331 
1332     if (params->has_cpu_throttle_increment) {
1333         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1334     }
1335 
1336     if (params->has_tls_creds) {
1337         g_free(s->parameters.tls_creds);
1338         assert(params->tls_creds->type == QTYPE_QSTRING);
1339         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1340     }
1341 
1342     if (params->has_tls_hostname) {
1343         g_free(s->parameters.tls_hostname);
1344         assert(params->tls_hostname->type == QTYPE_QSTRING);
1345         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1346     }
1347 
1348     if (params->has_max_bandwidth) {
1349         s->parameters.max_bandwidth = params->max_bandwidth;
1350         if (s->to_dst_file) {
1351             qemu_file_set_rate_limit(s->to_dst_file,
1352                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1353         }
1354     }
1355 
1356     if (params->has_downtime_limit) {
1357         s->parameters.downtime_limit = params->downtime_limit;
1358     }
1359 
1360     if (params->has_x_checkpoint_delay) {
1361         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1362         if (migration_in_colo_state()) {
1363             colo_checkpoint_notify(s);
1364         }
1365     }
1366 
1367     if (params->has_block_incremental) {
1368         s->parameters.block_incremental = params->block_incremental;
1369     }
1370     if (params->has_x_multifd_channels) {
1371         s->parameters.x_multifd_channels = params->x_multifd_channels;
1372     }
1373     if (params->has_x_multifd_page_count) {
1374         s->parameters.x_multifd_page_count = params->x_multifd_page_count;
1375     }
1376     if (params->has_xbzrle_cache_size) {
1377         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1378         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1379     }
1380     if (params->has_max_postcopy_bandwidth) {
1381         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1382     }
1383     if (params->has_max_cpu_throttle) {
1384         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1385     }
1386     if (params->has_announce_initial) {
1387         s->parameters.announce_initial = params->announce_initial;
1388     }
1389     if (params->has_announce_max) {
1390         s->parameters.announce_max = params->announce_max;
1391     }
1392     if (params->has_announce_rounds) {
1393         s->parameters.announce_rounds = params->announce_rounds;
1394     }
1395     if (params->has_announce_step) {
1396         s->parameters.announce_step = params->announce_step;
1397     }
1398 }
1399 
1400 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1401 {
1402     MigrationParameters tmp;
1403 
1404     /* TODO Rewrite "" to null instead */
1405     if (params->has_tls_creds
1406         && params->tls_creds->type == QTYPE_QNULL) {
1407         qobject_unref(params->tls_creds->u.n);
1408         params->tls_creds->type = QTYPE_QSTRING;
1409         params->tls_creds->u.s = strdup("");
1410     }
1411     /* TODO Rewrite "" to null instead */
1412     if (params->has_tls_hostname
1413         && params->tls_hostname->type == QTYPE_QNULL) {
1414         qobject_unref(params->tls_hostname->u.n);
1415         params->tls_hostname->type = QTYPE_QSTRING;
1416         params->tls_hostname->u.s = strdup("");
1417     }
1418 
1419     migrate_params_test_apply(params, &tmp);
1420 
1421     if (!migrate_params_check(&tmp, errp)) {
1422         /* Invalid parameter */
1423         return;
1424     }
1425 
1426     migrate_params_apply(params, errp);
1427 }
1428 
1429 
1430 void qmp_migrate_start_postcopy(Error **errp)
1431 {
1432     MigrationState *s = migrate_get_current();
1433 
1434     if (!migrate_postcopy()) {
1435         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1436                          " the start of migration");
1437         return;
1438     }
1439 
1440     if (s->state == MIGRATION_STATUS_NONE) {
1441         error_setg(errp, "Postcopy must be started after migration has been"
1442                          " started");
1443         return;
1444     }
1445     /*
1446      * we don't error if migration has finished since that would be racy
1447      * with issuing this command.
1448      */
1449     atomic_set(&s->start_postcopy, true);
1450 }
1451 
1452 /* shared migration helpers */
1453 
1454 void migrate_set_state(int *state, int old_state, int new_state)
1455 {
1456     assert(new_state < MIGRATION_STATUS__MAX);
1457     if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
1458         trace_migrate_set_state(MigrationStatus_str(new_state));
1459         migrate_generate_event(new_state);
1460     }
1461 }
1462 
1463 static MigrationCapabilityStatusList *migrate_cap_add(
1464     MigrationCapabilityStatusList *list,
1465     MigrationCapability index,
1466     bool state)
1467 {
1468     MigrationCapabilityStatusList *cap;
1469 
1470     cap = g_new0(MigrationCapabilityStatusList, 1);
1471     cap->value = g_new0(MigrationCapabilityStatus, 1);
1472     cap->value->capability = index;
1473     cap->value->state = state;
1474     cap->next = list;
1475 
1476     return cap;
1477 }
1478 
1479 void migrate_set_block_enabled(bool value, Error **errp)
1480 {
1481     MigrationCapabilityStatusList *cap;
1482 
1483     cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
1484     qmp_migrate_set_capabilities(cap, errp);
1485     qapi_free_MigrationCapabilityStatusList(cap);
1486 }
1487 
1488 static void migrate_set_block_incremental(MigrationState *s, bool value)
1489 {
1490     s->parameters.block_incremental = value;
1491 }
1492 
1493 static void block_cleanup_parameters(MigrationState *s)
1494 {
1495     if (s->must_remove_block_options) {
1496         /* setting to false can never fail */
1497         migrate_set_block_enabled(false, &error_abort);
1498         migrate_set_block_incremental(s, false);
1499         s->must_remove_block_options = false;
1500     }
1501 }
1502 
1503 static void migrate_fd_cleanup(void *opaque)
1504 {
1505     MigrationState *s = opaque;
1506 
1507     qemu_bh_delete(s->cleanup_bh);
1508     s->cleanup_bh = NULL;
1509 
1510     qemu_savevm_state_cleanup();
1511 
1512     if (s->to_dst_file) {
1513         QEMUFile *tmp;
1514 
1515         trace_migrate_fd_cleanup();
1516         qemu_mutex_unlock_iothread();
1517         if (s->migration_thread_running) {
1518             qemu_thread_join(&s->thread);
1519             s->migration_thread_running = false;
1520         }
1521         qemu_mutex_lock_iothread();
1522 
1523         multifd_save_cleanup();
1524         qemu_mutex_lock(&s->qemu_file_lock);
1525         tmp = s->to_dst_file;
1526         s->to_dst_file = NULL;
1527         qemu_mutex_unlock(&s->qemu_file_lock);
1528         /*
1529          * Close the file handle without the lock to make sure the
1530          * critical section won't block for long.
1531          */
1532         qemu_fclose(tmp);
1533     }
1534 
1535     assert((s->state != MIGRATION_STATUS_ACTIVE) &&
1536            (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
1537 
1538     if (s->state == MIGRATION_STATUS_CANCELLING) {
1539         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1540                           MIGRATION_STATUS_CANCELLED);
1541     }
1542 
1543     if (s->error) {
1544         /* It is used on info migrate.  We can't free it */
1545         error_report_err(error_copy(s->error));
1546     }
1547     notifier_list_notify(&migration_state_notifiers, s);
1548     block_cleanup_parameters(s);
1549 }
1550 
1551 void migrate_set_error(MigrationState *s, const Error *error)
1552 {
1553     qemu_mutex_lock(&s->error_mutex);
1554     if (!s->error) {
1555         s->error = error_copy(error);
1556     }
1557     qemu_mutex_unlock(&s->error_mutex);
1558 }
1559 
1560 void migrate_fd_error(MigrationState *s, const Error *error)
1561 {
1562     trace_migrate_fd_error(error_get_pretty(error));
1563     assert(s->to_dst_file == NULL);
1564     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1565                       MIGRATION_STATUS_FAILED);
1566     migrate_set_error(s, error);
1567 }
1568 
1569 static void migrate_fd_cancel(MigrationState *s)
1570 {
1571     int old_state ;
1572     QEMUFile *f = migrate_get_current()->to_dst_file;
1573     trace_migrate_fd_cancel();
1574 
1575     if (s->rp_state.from_dst_file) {
1576         /* shutdown the rp socket, so causing the rp thread to shutdown */
1577         qemu_file_shutdown(s->rp_state.from_dst_file);
1578     }
1579 
1580     do {
1581         old_state = s->state;
1582         if (!migration_is_setup_or_active(old_state)) {
1583             break;
1584         }
1585         /* If the migration is paused, kick it out of the pause */
1586         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1587             qemu_sem_post(&s->pause_sem);
1588         }
1589         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1590     } while (s->state != MIGRATION_STATUS_CANCELLING);
1591 
1592     /*
1593      * If we're unlucky the migration code might be stuck somewhere in a
1594      * send/write while the network has failed and is waiting to timeout;
1595      * if we've got shutdown(2) available then we can force it to quit.
1596      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1597      * called in a bh, so there is no race against this cancel.
1598      */
1599     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1600         qemu_file_shutdown(f);
1601     }
1602     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1603         Error *local_err = NULL;
1604 
1605         bdrv_invalidate_cache_all(&local_err);
1606         if (local_err) {
1607             error_report_err(local_err);
1608         } else {
1609             s->block_inactive = false;
1610         }
1611     }
1612 }
1613 
1614 void add_migration_state_change_notifier(Notifier *notify)
1615 {
1616     notifier_list_add(&migration_state_notifiers, notify);
1617 }
1618 
1619 void remove_migration_state_change_notifier(Notifier *notify)
1620 {
1621     notifier_remove(notify);
1622 }
1623 
1624 bool migration_in_setup(MigrationState *s)
1625 {
1626     return s->state == MIGRATION_STATUS_SETUP;
1627 }
1628 
1629 bool migration_has_finished(MigrationState *s)
1630 {
1631     return s->state == MIGRATION_STATUS_COMPLETED;
1632 }
1633 
1634 bool migration_has_failed(MigrationState *s)
1635 {
1636     return (s->state == MIGRATION_STATUS_CANCELLED ||
1637             s->state == MIGRATION_STATUS_FAILED);
1638 }
1639 
1640 bool migration_in_postcopy(void)
1641 {
1642     MigrationState *s = migrate_get_current();
1643 
1644     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1645 }
1646 
1647 bool migration_in_postcopy_after_devices(MigrationState *s)
1648 {
1649     return migration_in_postcopy() && s->postcopy_after_devices;
1650 }
1651 
1652 bool migration_is_idle(void)
1653 {
1654     MigrationState *s = migrate_get_current();
1655 
1656     switch (s->state) {
1657     case MIGRATION_STATUS_NONE:
1658     case MIGRATION_STATUS_CANCELLED:
1659     case MIGRATION_STATUS_COMPLETED:
1660     case MIGRATION_STATUS_FAILED:
1661         return true;
1662     case MIGRATION_STATUS_SETUP:
1663     case MIGRATION_STATUS_CANCELLING:
1664     case MIGRATION_STATUS_ACTIVE:
1665     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1666     case MIGRATION_STATUS_COLO:
1667     case MIGRATION_STATUS_PRE_SWITCHOVER:
1668     case MIGRATION_STATUS_DEVICE:
1669         return false;
1670     case MIGRATION_STATUS__MAX:
1671         g_assert_not_reached();
1672     }
1673 
1674     return false;
1675 }
1676 
1677 void migrate_init(MigrationState *s)
1678 {
1679     /*
1680      * Reinitialise all migration state, except
1681      * parameters/capabilities that the user set, and
1682      * locks.
1683      */
1684     s->bytes_xfer = 0;
1685     s->xfer_limit = 0;
1686     s->cleanup_bh = 0;
1687     s->to_dst_file = NULL;
1688     s->state = MIGRATION_STATUS_NONE;
1689     s->rp_state.from_dst_file = NULL;
1690     s->rp_state.error = false;
1691     s->mbps = 0.0;
1692     s->pages_per_second = 0.0;
1693     s->downtime = 0;
1694     s->expected_downtime = 0;
1695     s->setup_time = 0;
1696     s->start_postcopy = false;
1697     s->postcopy_after_devices = false;
1698     s->migration_thread_running = false;
1699     error_free(s->error);
1700     s->error = NULL;
1701 
1702     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1703 
1704     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1705     s->total_time = 0;
1706     s->vm_was_running = false;
1707     s->iteration_initial_bytes = 0;
1708     s->threshold_size = 0;
1709 }
1710 
1711 static GSList *migration_blockers;
1712 
1713 int migrate_add_blocker(Error *reason, Error **errp)
1714 {
1715     if (migrate_get_current()->only_migratable) {
1716         error_propagate_prepend(errp, error_copy(reason),
1717                                 "disallowing migration blocker "
1718                                 "(--only_migratable) for: ");
1719         return -EACCES;
1720     }
1721 
1722     if (migration_is_idle()) {
1723         migration_blockers = g_slist_prepend(migration_blockers, reason);
1724         return 0;
1725     }
1726 
1727     error_propagate_prepend(errp, error_copy(reason),
1728                             "disallowing migration blocker "
1729                             "(migration in progress) for: ");
1730     return -EBUSY;
1731 }
1732 
1733 void migrate_del_blocker(Error *reason)
1734 {
1735     migration_blockers = g_slist_remove(migration_blockers, reason);
1736 }
1737 
1738 void qmp_migrate_incoming(const char *uri, Error **errp)
1739 {
1740     Error *local_err = NULL;
1741     static bool once = true;
1742 
1743     if (!deferred_incoming) {
1744         error_setg(errp, "For use with '-incoming defer'");
1745         return;
1746     }
1747     if (!once) {
1748         error_setg(errp, "The incoming migration has already been started");
1749     }
1750 
1751     qemu_start_incoming_migration(uri, &local_err);
1752 
1753     if (local_err) {
1754         error_propagate(errp, local_err);
1755         return;
1756     }
1757 
1758     once = false;
1759 }
1760 
1761 void qmp_migrate_recover(const char *uri, Error **errp)
1762 {
1763     MigrationIncomingState *mis = migration_incoming_get_current();
1764 
1765     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1766         error_setg(errp, "Migrate recover can only be run "
1767                    "when postcopy is paused.");
1768         return;
1769     }
1770 
1771     if (atomic_cmpxchg(&mis->postcopy_recover_triggered,
1772                        false, true) == true) {
1773         error_setg(errp, "Migrate recovery is triggered already");
1774         return;
1775     }
1776 
1777     /*
1778      * Note that this call will never start a real migration; it will
1779      * only re-setup the migration stream and poke existing migration
1780      * to continue using that newly established channel.
1781      */
1782     qemu_start_incoming_migration(uri, errp);
1783 }
1784 
1785 void qmp_migrate_pause(Error **errp)
1786 {
1787     MigrationState *ms = migrate_get_current();
1788     MigrationIncomingState *mis = migration_incoming_get_current();
1789     int ret;
1790 
1791     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1792         /* Source side, during postcopy */
1793         qemu_mutex_lock(&ms->qemu_file_lock);
1794         ret = qemu_file_shutdown(ms->to_dst_file);
1795         qemu_mutex_unlock(&ms->qemu_file_lock);
1796         if (ret) {
1797             error_setg(errp, "Failed to pause source migration");
1798         }
1799         return;
1800     }
1801 
1802     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1803         ret = qemu_file_shutdown(mis->from_src_file);
1804         if (ret) {
1805             error_setg(errp, "Failed to pause destination migration");
1806         }
1807         return;
1808     }
1809 
1810     error_setg(errp, "migrate-pause is currently only supported "
1811                "during postcopy-active state");
1812 }
1813 
1814 bool migration_is_blocked(Error **errp)
1815 {
1816     if (qemu_savevm_state_blocked(errp)) {
1817         return true;
1818     }
1819 
1820     if (migration_blockers) {
1821         error_propagate(errp, error_copy(migration_blockers->data));
1822         return true;
1823     }
1824 
1825     return false;
1826 }
1827 
1828 /* Returns true if continue to migrate, or false if error detected */
1829 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
1830                             bool resume, Error **errp)
1831 {
1832     Error *local_err = NULL;
1833 
1834     if (resume) {
1835         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1836             error_setg(errp, "Cannot resume if there is no "
1837                        "paused migration");
1838             return false;
1839         }
1840 
1841         /*
1842          * Postcopy recovery won't work well with release-ram
1843          * capability since release-ram will drop the page buffer as
1844          * long as the page is put into the send buffer.  So if there
1845          * is a network failure happened, any page buffers that have
1846          * not yet reached the destination VM but have already been
1847          * sent from the source VM will be lost forever.  Let's refuse
1848          * the client from resuming such a postcopy migration.
1849          * Luckily release-ram was designed to only be used when src
1850          * and destination VMs are on the same host, so it should be
1851          * fine.
1852          */
1853         if (migrate_release_ram()) {
1854             error_setg(errp, "Postcopy recovery cannot work "
1855                        "when release-ram capability is set");
1856             return false;
1857         }
1858 
1859         /* This is a resume, skip init status */
1860         return true;
1861     }
1862 
1863     if (migration_is_setup_or_active(s->state) ||
1864         s->state == MIGRATION_STATUS_CANCELLING ||
1865         s->state == MIGRATION_STATUS_COLO) {
1866         error_setg(errp, QERR_MIGRATION_ACTIVE);
1867         return false;
1868     }
1869 
1870     if (runstate_check(RUN_STATE_INMIGRATE)) {
1871         error_setg(errp, "Guest is waiting for an incoming migration");
1872         return false;
1873     }
1874 
1875     if (migration_is_blocked(errp)) {
1876         return false;
1877     }
1878 
1879     if (blk || blk_inc) {
1880         if (migrate_use_block() || migrate_use_block_incremental()) {
1881             error_setg(errp, "Command options are incompatible with "
1882                        "current migration capabilities");
1883             return false;
1884         }
1885         migrate_set_block_enabled(true, &local_err);
1886         if (local_err) {
1887             error_propagate(errp, local_err);
1888             return false;
1889         }
1890         s->must_remove_block_options = true;
1891     }
1892 
1893     if (blk_inc) {
1894         migrate_set_block_incremental(s, true);
1895     }
1896 
1897     migrate_init(s);
1898 
1899     return true;
1900 }
1901 
1902 void qmp_migrate(const char *uri, bool has_blk, bool blk,
1903                  bool has_inc, bool inc, bool has_detach, bool detach,
1904                  bool has_resume, bool resume, Error **errp)
1905 {
1906     Error *local_err = NULL;
1907     MigrationState *s = migrate_get_current();
1908     const char *p;
1909 
1910     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
1911                          has_resume && resume, errp)) {
1912         /* Error detected, put into errp */
1913         return;
1914     }
1915 
1916     if (strstart(uri, "tcp:", &p)) {
1917         tcp_start_outgoing_migration(s, p, &local_err);
1918 #ifdef CONFIG_RDMA
1919     } else if (strstart(uri, "rdma:", &p)) {
1920         rdma_start_outgoing_migration(s, p, &local_err);
1921 #endif
1922     } else if (strstart(uri, "exec:", &p)) {
1923         exec_start_outgoing_migration(s, p, &local_err);
1924     } else if (strstart(uri, "unix:", &p)) {
1925         unix_start_outgoing_migration(s, p, &local_err);
1926     } else if (strstart(uri, "fd:", &p)) {
1927         fd_start_outgoing_migration(s, p, &local_err);
1928     } else {
1929         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1930                    "a valid migration protocol");
1931         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1932                           MIGRATION_STATUS_FAILED);
1933         block_cleanup_parameters(s);
1934         return;
1935     }
1936 
1937     if (local_err) {
1938         migrate_fd_error(s, local_err);
1939         error_propagate(errp, local_err);
1940         return;
1941     }
1942 }
1943 
1944 void qmp_migrate_cancel(Error **errp)
1945 {
1946     migrate_fd_cancel(migrate_get_current());
1947 }
1948 
1949 void qmp_migrate_continue(MigrationStatus state, Error **errp)
1950 {
1951     MigrationState *s = migrate_get_current();
1952     if (s->state != state) {
1953         error_setg(errp,  "Migration not in expected state: %s",
1954                    MigrationStatus_str(s->state));
1955         return;
1956     }
1957     qemu_sem_post(&s->pause_sem);
1958 }
1959 
1960 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1961 {
1962     MigrateSetParameters p = {
1963         .has_xbzrle_cache_size = true,
1964         .xbzrle_cache_size = value,
1965     };
1966 
1967     qmp_migrate_set_parameters(&p, errp);
1968 }
1969 
1970 int64_t qmp_query_migrate_cache_size(Error **errp)
1971 {
1972     return migrate_xbzrle_cache_size();
1973 }
1974 
1975 void qmp_migrate_set_speed(int64_t value, Error **errp)
1976 {
1977     MigrateSetParameters p = {
1978         .has_max_bandwidth = true,
1979         .max_bandwidth = value,
1980     };
1981 
1982     qmp_migrate_set_parameters(&p, errp);
1983 }
1984 
1985 void qmp_migrate_set_downtime(double value, Error **errp)
1986 {
1987     if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
1988         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1989                          "the range of 0 to %d seconds",
1990                          MAX_MIGRATE_DOWNTIME_SECONDS);
1991         return;
1992     }
1993 
1994     value *= 1000; /* Convert to milliseconds */
1995     value = MAX(0, MIN(INT64_MAX, value));
1996 
1997     MigrateSetParameters p = {
1998         .has_downtime_limit = true,
1999         .downtime_limit = value,
2000     };
2001 
2002     qmp_migrate_set_parameters(&p, errp);
2003 }
2004 
2005 bool migrate_release_ram(void)
2006 {
2007     MigrationState *s;
2008 
2009     s = migrate_get_current();
2010 
2011     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
2012 }
2013 
2014 bool migrate_postcopy_ram(void)
2015 {
2016     MigrationState *s;
2017 
2018     s = migrate_get_current();
2019 
2020     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
2021 }
2022 
2023 bool migrate_postcopy(void)
2024 {
2025     return migrate_postcopy_ram() || migrate_dirty_bitmaps();
2026 }
2027 
2028 bool migrate_auto_converge(void)
2029 {
2030     MigrationState *s;
2031 
2032     s = migrate_get_current();
2033 
2034     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
2035 }
2036 
2037 bool migrate_zero_blocks(void)
2038 {
2039     MigrationState *s;
2040 
2041     s = migrate_get_current();
2042 
2043     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
2044 }
2045 
2046 bool migrate_postcopy_blocktime(void)
2047 {
2048     MigrationState *s;
2049 
2050     s = migrate_get_current();
2051 
2052     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
2053 }
2054 
2055 bool migrate_use_compression(void)
2056 {
2057     MigrationState *s;
2058 
2059     s = migrate_get_current();
2060 
2061     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
2062 }
2063 
2064 int migrate_compress_level(void)
2065 {
2066     MigrationState *s;
2067 
2068     s = migrate_get_current();
2069 
2070     return s->parameters.compress_level;
2071 }
2072 
2073 int migrate_compress_threads(void)
2074 {
2075     MigrationState *s;
2076 
2077     s = migrate_get_current();
2078 
2079     return s->parameters.compress_threads;
2080 }
2081 
2082 int migrate_compress_wait_thread(void)
2083 {
2084     MigrationState *s;
2085 
2086     s = migrate_get_current();
2087 
2088     return s->parameters.compress_wait_thread;
2089 }
2090 
2091 int migrate_decompress_threads(void)
2092 {
2093     MigrationState *s;
2094 
2095     s = migrate_get_current();
2096 
2097     return s->parameters.decompress_threads;
2098 }
2099 
2100 bool migrate_dirty_bitmaps(void)
2101 {
2102     MigrationState *s;
2103 
2104     s = migrate_get_current();
2105 
2106     return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
2107 }
2108 
2109 bool migrate_ignore_shared(void)
2110 {
2111     MigrationState *s;
2112 
2113     s = migrate_get_current();
2114 
2115     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
2116 }
2117 
2118 bool migrate_use_events(void)
2119 {
2120     MigrationState *s;
2121 
2122     s = migrate_get_current();
2123 
2124     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
2125 }
2126 
2127 bool migrate_use_multifd(void)
2128 {
2129     MigrationState *s;
2130 
2131     s = migrate_get_current();
2132 
2133     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
2134 }
2135 
2136 bool migrate_pause_before_switchover(void)
2137 {
2138     MigrationState *s;
2139 
2140     s = migrate_get_current();
2141 
2142     return s->enabled_capabilities[
2143         MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
2144 }
2145 
2146 int migrate_multifd_channels(void)
2147 {
2148     MigrationState *s;
2149 
2150     s = migrate_get_current();
2151 
2152     return s->parameters.x_multifd_channels;
2153 }
2154 
2155 int migrate_multifd_page_count(void)
2156 {
2157     MigrationState *s;
2158 
2159     s = migrate_get_current();
2160 
2161     return s->parameters.x_multifd_page_count;
2162 }
2163 
2164 int migrate_use_xbzrle(void)
2165 {
2166     MigrationState *s;
2167 
2168     s = migrate_get_current();
2169 
2170     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
2171 }
2172 
2173 int64_t migrate_xbzrle_cache_size(void)
2174 {
2175     MigrationState *s;
2176 
2177     s = migrate_get_current();
2178 
2179     return s->parameters.xbzrle_cache_size;
2180 }
2181 
2182 static int64_t migrate_max_postcopy_bandwidth(void)
2183 {
2184     MigrationState *s;
2185 
2186     s = migrate_get_current();
2187 
2188     return s->parameters.max_postcopy_bandwidth;
2189 }
2190 
2191 bool migrate_use_block(void)
2192 {
2193     MigrationState *s;
2194 
2195     s = migrate_get_current();
2196 
2197     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
2198 }
2199 
2200 bool migrate_use_return_path(void)
2201 {
2202     MigrationState *s;
2203 
2204     s = migrate_get_current();
2205 
2206     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
2207 }
2208 
2209 bool migrate_use_block_incremental(void)
2210 {
2211     MigrationState *s;
2212 
2213     s = migrate_get_current();
2214 
2215     return s->parameters.block_incremental;
2216 }
2217 
2218 /* migration thread support */
2219 /*
2220  * Something bad happened to the RP stream, mark an error
2221  * The caller shall print or trace something to indicate why
2222  */
2223 static void mark_source_rp_bad(MigrationState *s)
2224 {
2225     s->rp_state.error = true;
2226 }
2227 
2228 static struct rp_cmd_args {
2229     ssize_t     len; /* -1 = variable */
2230     const char *name;
2231 } rp_cmd_args[] = {
2232     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2233     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2234     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2235     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2236     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2237     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2238     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2239     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2240 };
2241 
2242 /*
2243  * Process a request for pages received on the return path,
2244  * We're allowed to send more than requested (e.g. to round to our page size)
2245  * and we don't need to send pages that have already been sent.
2246  */
2247 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2248                                        ram_addr_t start, size_t len)
2249 {
2250     long our_host_ps = getpagesize();
2251 
2252     trace_migrate_handle_rp_req_pages(rbname, start, len);
2253 
2254     /*
2255      * Since we currently insist on matching page sizes, just sanity check
2256      * we're being asked for whole host pages.
2257      */
2258     if (start & (our_host_ps-1) ||
2259        (len & (our_host_ps-1))) {
2260         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2261                      " len: %zd", __func__, start, len);
2262         mark_source_rp_bad(ms);
2263         return;
2264     }
2265 
2266     if (ram_save_queue_pages(rbname, start, len)) {
2267         mark_source_rp_bad(ms);
2268     }
2269 }
2270 
2271 /* Return true to retry, false to quit */
2272 static bool postcopy_pause_return_path_thread(MigrationState *s)
2273 {
2274     trace_postcopy_pause_return_path();
2275 
2276     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2277 
2278     trace_postcopy_pause_return_path_continued();
2279 
2280     return true;
2281 }
2282 
2283 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2284 {
2285     RAMBlock *block = qemu_ram_block_by_name(block_name);
2286 
2287     if (!block) {
2288         error_report("%s: invalid block name '%s'", __func__, block_name);
2289         return -EINVAL;
2290     }
2291 
2292     /* Fetch the received bitmap and refresh the dirty bitmap */
2293     return ram_dirty_bitmap_reload(s, block);
2294 }
2295 
2296 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2297 {
2298     trace_source_return_path_thread_resume_ack(value);
2299 
2300     if (value != MIGRATION_RESUME_ACK_VALUE) {
2301         error_report("%s: illegal resume_ack value %"PRIu32,
2302                      __func__, value);
2303         return -1;
2304     }
2305 
2306     /* Now both sides are active. */
2307     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2308                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2309 
2310     /* Notify send thread that time to continue send pages */
2311     qemu_sem_post(&s->rp_state.rp_sem);
2312 
2313     return 0;
2314 }
2315 
2316 /*
2317  * Handles messages sent on the return path towards the source VM
2318  *
2319  */
2320 static void *source_return_path_thread(void *opaque)
2321 {
2322     MigrationState *ms = opaque;
2323     QEMUFile *rp = ms->rp_state.from_dst_file;
2324     uint16_t header_len, header_type;
2325     uint8_t buf[512];
2326     uint32_t tmp32, sibling_error;
2327     ram_addr_t start = 0; /* =0 to silence warning */
2328     size_t  len = 0, expected_len;
2329     int res;
2330 
2331     trace_source_return_path_thread_entry();
2332     rcu_register_thread();
2333 
2334 retry:
2335     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2336            migration_is_setup_or_active(ms->state)) {
2337         trace_source_return_path_thread_loop_top();
2338         header_type = qemu_get_be16(rp);
2339         header_len = qemu_get_be16(rp);
2340 
2341         if (qemu_file_get_error(rp)) {
2342             mark_source_rp_bad(ms);
2343             goto out;
2344         }
2345 
2346         if (header_type >= MIG_RP_MSG_MAX ||
2347             header_type == MIG_RP_MSG_INVALID) {
2348             error_report("RP: Received invalid message 0x%04x length 0x%04x",
2349                     header_type, header_len);
2350             mark_source_rp_bad(ms);
2351             goto out;
2352         }
2353 
2354         if ((rp_cmd_args[header_type].len != -1 &&
2355             header_len != rp_cmd_args[header_type].len) ||
2356             header_len > sizeof(buf)) {
2357             error_report("RP: Received '%s' message (0x%04x) with"
2358                     "incorrect length %d expecting %zu",
2359                     rp_cmd_args[header_type].name, header_type, header_len,
2360                     (size_t)rp_cmd_args[header_type].len);
2361             mark_source_rp_bad(ms);
2362             goto out;
2363         }
2364 
2365         /* We know we've got a valid header by this point */
2366         res = qemu_get_buffer(rp, buf, header_len);
2367         if (res != header_len) {
2368             error_report("RP: Failed reading data for message 0x%04x"
2369                          " read %d expected %d",
2370                          header_type, res, header_len);
2371             mark_source_rp_bad(ms);
2372             goto out;
2373         }
2374 
2375         /* OK, we have the message and the data */
2376         switch (header_type) {
2377         case MIG_RP_MSG_SHUT:
2378             sibling_error = ldl_be_p(buf);
2379             trace_source_return_path_thread_shut(sibling_error);
2380             if (sibling_error) {
2381                 error_report("RP: Sibling indicated error %d", sibling_error);
2382                 mark_source_rp_bad(ms);
2383             }
2384             /*
2385              * We'll let the main thread deal with closing the RP
2386              * we could do a shutdown(2) on it, but we're the only user
2387              * anyway, so there's nothing gained.
2388              */
2389             goto out;
2390 
2391         case MIG_RP_MSG_PONG:
2392             tmp32 = ldl_be_p(buf);
2393             trace_source_return_path_thread_pong(tmp32);
2394             break;
2395 
2396         case MIG_RP_MSG_REQ_PAGES:
2397             start = ldq_be_p(buf);
2398             len = ldl_be_p(buf + 8);
2399             migrate_handle_rp_req_pages(ms, NULL, start, len);
2400             break;
2401 
2402         case MIG_RP_MSG_REQ_PAGES_ID:
2403             expected_len = 12 + 1; /* header + termination */
2404 
2405             if (header_len >= expected_len) {
2406                 start = ldq_be_p(buf);
2407                 len = ldl_be_p(buf + 8);
2408                 /* Now we expect an idstr */
2409                 tmp32 = buf[12]; /* Length of the following idstr */
2410                 buf[13 + tmp32] = '\0';
2411                 expected_len += tmp32;
2412             }
2413             if (header_len != expected_len) {
2414                 error_report("RP: Req_Page_id with length %d expecting %zd",
2415                         header_len, expected_len);
2416                 mark_source_rp_bad(ms);
2417                 goto out;
2418             }
2419             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
2420             break;
2421 
2422         case MIG_RP_MSG_RECV_BITMAP:
2423             if (header_len < 1) {
2424                 error_report("%s: missing block name", __func__);
2425                 mark_source_rp_bad(ms);
2426                 goto out;
2427             }
2428             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2429             buf[buf[0] + 1] = '\0';
2430             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
2431                 mark_source_rp_bad(ms);
2432                 goto out;
2433             }
2434             break;
2435 
2436         case MIG_RP_MSG_RESUME_ACK:
2437             tmp32 = ldl_be_p(buf);
2438             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
2439                 mark_source_rp_bad(ms);
2440                 goto out;
2441             }
2442             break;
2443 
2444         default:
2445             break;
2446         }
2447     }
2448 
2449 out:
2450     res = qemu_file_get_error(rp);
2451     if (res) {
2452         if (res == -EIO) {
2453             /*
2454              * Maybe there is something we can do: it looks like a
2455              * network down issue, and we pause for a recovery.
2456              */
2457             if (postcopy_pause_return_path_thread(ms)) {
2458                 /* Reload rp, reset the rest */
2459                 if (rp != ms->rp_state.from_dst_file) {
2460                     qemu_fclose(rp);
2461                     rp = ms->rp_state.from_dst_file;
2462                 }
2463                 ms->rp_state.error = false;
2464                 goto retry;
2465             }
2466         }
2467 
2468         trace_source_return_path_thread_bad_end();
2469         mark_source_rp_bad(ms);
2470     }
2471 
2472     trace_source_return_path_thread_end();
2473     ms->rp_state.from_dst_file = NULL;
2474     qemu_fclose(rp);
2475     rcu_unregister_thread();
2476     return NULL;
2477 }
2478 
2479 static int open_return_path_on_source(MigrationState *ms,
2480                                       bool create_thread)
2481 {
2482 
2483     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2484     if (!ms->rp_state.from_dst_file) {
2485         return -1;
2486     }
2487 
2488     trace_open_return_path_on_source();
2489 
2490     if (!create_thread) {
2491         /* We're done */
2492         return 0;
2493     }
2494 
2495     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
2496                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2497 
2498     trace_open_return_path_on_source_continue();
2499 
2500     return 0;
2501 }
2502 
2503 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
2504 static int await_return_path_close_on_source(MigrationState *ms)
2505 {
2506     /*
2507      * If this is a normal exit then the destination will send a SHUT and the
2508      * rp_thread will exit, however if there's an error we need to cause
2509      * it to exit.
2510      */
2511     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
2512         /*
2513          * shutdown(2), if we have it, will cause it to unblock if it's stuck
2514          * waiting for the destination.
2515          */
2516         qemu_file_shutdown(ms->rp_state.from_dst_file);
2517         mark_source_rp_bad(ms);
2518     }
2519     trace_await_return_path_close_on_source_joining();
2520     qemu_thread_join(&ms->rp_state.rp_thread);
2521     trace_await_return_path_close_on_source_close();
2522     return ms->rp_state.error;
2523 }
2524 
2525 /*
2526  * Switch from normal iteration to postcopy
2527  * Returns non-0 on error
2528  */
2529 static int postcopy_start(MigrationState *ms)
2530 {
2531     int ret;
2532     QIOChannelBuffer *bioc;
2533     QEMUFile *fb;
2534     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2535     int64_t bandwidth = migrate_max_postcopy_bandwidth();
2536     bool restart_block = false;
2537     int cur_state = MIGRATION_STATUS_ACTIVE;
2538     if (!migrate_pause_before_switchover()) {
2539         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
2540                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
2541     }
2542 
2543     trace_postcopy_start();
2544     qemu_mutex_lock_iothread();
2545     trace_postcopy_start_set_run();
2546 
2547     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2548     global_state_store();
2549     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2550     if (ret < 0) {
2551         goto fail;
2552     }
2553 
2554     ret = migration_maybe_pause(ms, &cur_state,
2555                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
2556     if (ret < 0) {
2557         goto fail;
2558     }
2559 
2560     ret = bdrv_inactivate_all();
2561     if (ret < 0) {
2562         goto fail;
2563     }
2564     restart_block = true;
2565 
2566     /*
2567      * Cause any non-postcopiable, but iterative devices to
2568      * send out their final data.
2569      */
2570     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
2571 
2572     /*
2573      * in Finish migrate and with the io-lock held everything should
2574      * be quiet, but we've potentially still got dirty pages and we
2575      * need to tell the destination to throw any pages it's already received
2576      * that are dirty
2577      */
2578     if (migrate_postcopy_ram()) {
2579         if (ram_postcopy_send_discard_bitmap(ms)) {
2580             error_report("postcopy send discard bitmap failed");
2581             goto fail;
2582         }
2583     }
2584 
2585     /*
2586      * send rest of state - note things that are doing postcopy
2587      * will notice we're in POSTCOPY_ACTIVE and not actually
2588      * wrap their state up here
2589      */
2590     /* 0 max-postcopy-bandwidth means unlimited */
2591     if (!bandwidth) {
2592         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
2593     } else {
2594         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
2595     }
2596     if (migrate_postcopy_ram()) {
2597         /* Ping just for debugging, helps line traces up */
2598         qemu_savevm_send_ping(ms->to_dst_file, 2);
2599     }
2600 
2601     /*
2602      * While loading the device state we may trigger page transfer
2603      * requests and the fd must be free to process those, and thus
2604      * the destination must read the whole device state off the fd before
2605      * it starts processing it.  Unfortunately the ad-hoc migration format
2606      * doesn't allow the destination to know the size to read without fully
2607      * parsing it through each devices load-state code (especially the open
2608      * coded devices that use get/put).
2609      * So we wrap the device state up in a package with a length at the start;
2610      * to do this we use a qemu_buf to hold the whole of the device state.
2611      */
2612     bioc = qio_channel_buffer_new(4096);
2613     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
2614     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
2615     object_unref(OBJECT(bioc));
2616 
2617     /*
2618      * Make sure the receiver can get incoming pages before we send the rest
2619      * of the state
2620      */
2621     qemu_savevm_send_postcopy_listen(fb);
2622 
2623     qemu_savevm_state_complete_precopy(fb, false, false);
2624     if (migrate_postcopy_ram()) {
2625         qemu_savevm_send_ping(fb, 3);
2626     }
2627 
2628     qemu_savevm_send_postcopy_run(fb);
2629 
2630     /* <><> end of stuff going into the package */
2631 
2632     /* Last point of recovery; as soon as we send the package the destination
2633      * can open devices and potentially start running.
2634      * Lets just check again we've not got any errors.
2635      */
2636     ret = qemu_file_get_error(ms->to_dst_file);
2637     if (ret) {
2638         error_report("postcopy_start: Migration stream errored (pre package)");
2639         goto fail_closefb;
2640     }
2641 
2642     restart_block = false;
2643 
2644     /* Now send that blob */
2645     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2646         goto fail_closefb;
2647     }
2648     qemu_fclose(fb);
2649 
2650     /* Send a notify to give a chance for anything that needs to happen
2651      * at the transition to postcopy and after the device state; in particular
2652      * spice needs to trigger a transition now
2653      */
2654     ms->postcopy_after_devices = true;
2655     notifier_list_notify(&migration_state_notifiers, ms);
2656 
2657     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
2658 
2659     qemu_mutex_unlock_iothread();
2660 
2661     if (migrate_postcopy_ram()) {
2662         /*
2663          * Although this ping is just for debug, it could potentially be
2664          * used for getting a better measurement of downtime at the source.
2665          */
2666         qemu_savevm_send_ping(ms->to_dst_file, 4);
2667     }
2668 
2669     if (migrate_release_ram()) {
2670         ram_postcopy_migrated_memory_release(ms);
2671     }
2672 
2673     ret = qemu_file_get_error(ms->to_dst_file);
2674     if (ret) {
2675         error_report("postcopy_start: Migration stream errored");
2676         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2677                               MIGRATION_STATUS_FAILED);
2678     }
2679 
2680     return ret;
2681 
2682 fail_closefb:
2683     qemu_fclose(fb);
2684 fail:
2685     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2686                           MIGRATION_STATUS_FAILED);
2687     if (restart_block) {
2688         /* A failure happened early enough that we know the destination hasn't
2689          * accessed block devices, so we're safe to recover.
2690          */
2691         Error *local_err = NULL;
2692 
2693         bdrv_invalidate_cache_all(&local_err);
2694         if (local_err) {
2695             error_report_err(local_err);
2696         }
2697     }
2698     qemu_mutex_unlock_iothread();
2699     return -1;
2700 }
2701 
2702 /**
2703  * migration_maybe_pause: Pause if required to by
2704  * migrate_pause_before_switchover called with the iothread locked
2705  * Returns: 0 on success
2706  */
2707 static int migration_maybe_pause(MigrationState *s,
2708                                  int *current_active_state,
2709                                  int new_state)
2710 {
2711     if (!migrate_pause_before_switchover()) {
2712         return 0;
2713     }
2714 
2715     /* Since leaving this state is not atomic with posting the semaphore
2716      * it's possible that someone could have issued multiple migrate_continue
2717      * and the semaphore is incorrectly positive at this point;
2718      * the docs say it's undefined to reinit a semaphore that's already
2719      * init'd, so use timedwait to eat up any existing posts.
2720      */
2721     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
2722         /* This block intentionally left blank */
2723     }
2724 
2725     qemu_mutex_unlock_iothread();
2726     migrate_set_state(&s->state, *current_active_state,
2727                       MIGRATION_STATUS_PRE_SWITCHOVER);
2728     qemu_sem_wait(&s->pause_sem);
2729     migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2730                       new_state);
2731     *current_active_state = new_state;
2732     qemu_mutex_lock_iothread();
2733 
2734     return s->state == new_state ? 0 : -EINVAL;
2735 }
2736 
2737 /**
2738  * migration_completion: Used by migration_thread when there's not much left.
2739  *   The caller 'breaks' the loop when this returns.
2740  *
2741  * @s: Current migration state
2742  */
2743 static void migration_completion(MigrationState *s)
2744 {
2745     int ret;
2746     int current_active_state = s->state;
2747 
2748     if (s->state == MIGRATION_STATUS_ACTIVE) {
2749         qemu_mutex_lock_iothread();
2750         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2751         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2752         s->vm_was_running = runstate_is_running();
2753         ret = global_state_store();
2754 
2755         if (!ret) {
2756             bool inactivate = !migrate_colo_enabled();
2757             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2758             if (ret >= 0) {
2759                 ret = migration_maybe_pause(s, &current_active_state,
2760                                             MIGRATION_STATUS_DEVICE);
2761             }
2762             if (ret >= 0) {
2763                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
2764                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
2765                                                          inactivate);
2766             }
2767             if (inactivate && ret >= 0) {
2768                 s->block_inactive = true;
2769             }
2770         }
2771         qemu_mutex_unlock_iothread();
2772 
2773         if (ret < 0) {
2774             goto fail;
2775         }
2776     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2777         trace_migration_completion_postcopy_end();
2778 
2779         qemu_savevm_state_complete_postcopy(s->to_dst_file);
2780         trace_migration_completion_postcopy_end_after_complete();
2781     }
2782 
2783     /*
2784      * If rp was opened we must clean up the thread before
2785      * cleaning everything else up (since if there are no failures
2786      * it will wait for the destination to send it's status in
2787      * a SHUT command).
2788      */
2789     if (s->rp_state.from_dst_file) {
2790         int rp_error;
2791         trace_migration_return_path_end_before();
2792         rp_error = await_return_path_close_on_source(s);
2793         trace_migration_return_path_end_after(rp_error);
2794         if (rp_error) {
2795             goto fail_invalidate;
2796         }
2797     }
2798 
2799     if (qemu_file_get_error(s->to_dst_file)) {
2800         trace_migration_completion_file_err();
2801         goto fail_invalidate;
2802     }
2803 
2804     if (!migrate_colo_enabled()) {
2805         migrate_set_state(&s->state, current_active_state,
2806                           MIGRATION_STATUS_COMPLETED);
2807     }
2808 
2809     return;
2810 
2811 fail_invalidate:
2812     /* If not doing postcopy, vm_start() will be called: let's regain
2813      * control on images.
2814      */
2815     if (s->state == MIGRATION_STATUS_ACTIVE ||
2816         s->state == MIGRATION_STATUS_DEVICE) {
2817         Error *local_err = NULL;
2818 
2819         qemu_mutex_lock_iothread();
2820         bdrv_invalidate_cache_all(&local_err);
2821         if (local_err) {
2822             error_report_err(local_err);
2823         } else {
2824             s->block_inactive = false;
2825         }
2826         qemu_mutex_unlock_iothread();
2827     }
2828 
2829 fail:
2830     migrate_set_state(&s->state, current_active_state,
2831                       MIGRATION_STATUS_FAILED);
2832 }
2833 
2834 bool migrate_colo_enabled(void)
2835 {
2836     MigrationState *s = migrate_get_current();
2837     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
2838 }
2839 
2840 typedef enum MigThrError {
2841     /* No error detected */
2842     MIG_THR_ERR_NONE = 0,
2843     /* Detected error, but resumed successfully */
2844     MIG_THR_ERR_RECOVERED = 1,
2845     /* Detected fatal error, need to exit */
2846     MIG_THR_ERR_FATAL = 2,
2847 } MigThrError;
2848 
2849 static int postcopy_resume_handshake(MigrationState *s)
2850 {
2851     qemu_savevm_send_postcopy_resume(s->to_dst_file);
2852 
2853     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2854         qemu_sem_wait(&s->rp_state.rp_sem);
2855     }
2856 
2857     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2858         return 0;
2859     }
2860 
2861     return -1;
2862 }
2863 
2864 /* Return zero if success, or <0 for error */
2865 static int postcopy_do_resume(MigrationState *s)
2866 {
2867     int ret;
2868 
2869     /*
2870      * Call all the resume_prepare() hooks, so that modules can be
2871      * ready for the migration resume.
2872      */
2873     ret = qemu_savevm_state_resume_prepare(s);
2874     if (ret) {
2875         error_report("%s: resume_prepare() failure detected: %d",
2876                      __func__, ret);
2877         return ret;
2878     }
2879 
2880     /*
2881      * Last handshake with destination on the resume (destination will
2882      * switch to postcopy-active afterwards)
2883      */
2884     ret = postcopy_resume_handshake(s);
2885     if (ret) {
2886         error_report("%s: handshake failed: %d", __func__, ret);
2887         return ret;
2888     }
2889 
2890     return 0;
2891 }
2892 
2893 /*
2894  * We don't return until we are in a safe state to continue current
2895  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
2896  * MIG_THR_ERR_FATAL if unrecovery failure happened.
2897  */
2898 static MigThrError postcopy_pause(MigrationState *s)
2899 {
2900     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2901 
2902     while (true) {
2903         QEMUFile *file;
2904 
2905         migrate_set_state(&s->state, s->state,
2906                           MIGRATION_STATUS_POSTCOPY_PAUSED);
2907 
2908         /* Current channel is possibly broken. Release it. */
2909         assert(s->to_dst_file);
2910         qemu_mutex_lock(&s->qemu_file_lock);
2911         file = s->to_dst_file;
2912         s->to_dst_file = NULL;
2913         qemu_mutex_unlock(&s->qemu_file_lock);
2914 
2915         qemu_file_shutdown(file);
2916         qemu_fclose(file);
2917 
2918         error_report("Detected IO failure for postcopy. "
2919                      "Migration paused.");
2920 
2921         /*
2922          * We wait until things fixed up. Then someone will setup the
2923          * status back for us.
2924          */
2925         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2926             qemu_sem_wait(&s->postcopy_pause_sem);
2927         }
2928 
2929         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2930             /* Woken up by a recover procedure. Give it a shot */
2931 
2932             /*
2933              * Firstly, let's wake up the return path now, with a new
2934              * return path channel.
2935              */
2936             qemu_sem_post(&s->postcopy_pause_rp_sem);
2937 
2938             /* Do the resume logic */
2939             if (postcopy_do_resume(s) == 0) {
2940                 /* Let's continue! */
2941                 trace_postcopy_pause_continued();
2942                 return MIG_THR_ERR_RECOVERED;
2943             } else {
2944                 /*
2945                  * Something wrong happened during the recovery, let's
2946                  * pause again. Pause is always better than throwing
2947                  * data away.
2948                  */
2949                 continue;
2950             }
2951         } else {
2952             /* This is not right... Time to quit. */
2953             return MIG_THR_ERR_FATAL;
2954         }
2955     }
2956 }
2957 
2958 static MigThrError migration_detect_error(MigrationState *s)
2959 {
2960     int ret;
2961     int state = s->state;
2962 
2963     if (state == MIGRATION_STATUS_CANCELLING ||
2964         state == MIGRATION_STATUS_CANCELLED) {
2965         /* End the migration, but don't set the state to failed */
2966         return MIG_THR_ERR_FATAL;
2967     }
2968 
2969     /* Try to detect any file errors */
2970     ret = qemu_file_get_error(s->to_dst_file);
2971 
2972     if (!ret) {
2973         /* Everything is fine */
2974         return MIG_THR_ERR_NONE;
2975     }
2976 
2977     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
2978         /*
2979          * For postcopy, we allow the network to be down for a
2980          * while. After that, it can be continued by a
2981          * recovery phase.
2982          */
2983         return postcopy_pause(s);
2984     } else {
2985         /*
2986          * For precopy (or postcopy with error outside IO), we fail
2987          * with no time.
2988          */
2989         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
2990         trace_migration_thread_file_err();
2991 
2992         /* Time to stop the migration, now. */
2993         return MIG_THR_ERR_FATAL;
2994     }
2995 }
2996 
2997 /* How many bytes have we transferred since the beggining of the migration */
2998 static uint64_t migration_total_bytes(MigrationState *s)
2999 {
3000     return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
3001 }
3002 
3003 static void migration_calculate_complete(MigrationState *s)
3004 {
3005     uint64_t bytes = migration_total_bytes(s);
3006     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3007     int64_t transfer_time;
3008 
3009     s->total_time = end_time - s->start_time;
3010     if (!s->downtime) {
3011         /*
3012          * It's still not set, so we are precopy migration.  For
3013          * postcopy, downtime is calculated during postcopy_start().
3014          */
3015         s->downtime = end_time - s->downtime_start;
3016     }
3017 
3018     transfer_time = s->total_time - s->setup_time;
3019     if (transfer_time) {
3020         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3021     }
3022 }
3023 
3024 static void migration_update_counters(MigrationState *s,
3025                                       int64_t current_time)
3026 {
3027     uint64_t transferred, transferred_pages, time_spent;
3028     uint64_t current_bytes; /* bytes transferred since the beginning */
3029     double bandwidth;
3030 
3031     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3032         return;
3033     }
3034 
3035     current_bytes = migration_total_bytes(s);
3036     transferred = current_bytes - s->iteration_initial_bytes;
3037     time_spent = current_time - s->iteration_start_time;
3038     bandwidth = (double)transferred / time_spent;
3039     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3040 
3041     s->mbps = (((double) transferred * 8.0) /
3042                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3043 
3044     transferred_pages = ram_get_total_transferred_pages() -
3045                             s->iteration_initial_pages;
3046     s->pages_per_second = (double) transferred_pages /
3047                              (((double) time_spent / 1000.0));
3048 
3049     /*
3050      * if we haven't sent anything, we don't want to
3051      * recalculate. 10000 is a small enough number for our purposes
3052      */
3053     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3054         s->expected_downtime = ram_counters.remaining / bandwidth;
3055     }
3056 
3057     qemu_file_reset_rate_limit(s->to_dst_file);
3058 
3059     s->iteration_start_time = current_time;
3060     s->iteration_initial_bytes = current_bytes;
3061     s->iteration_initial_pages = ram_get_total_transferred_pages();
3062 
3063     trace_migrate_transferred(transferred, time_spent,
3064                               bandwidth, s->threshold_size);
3065 }
3066 
3067 /* Migration thread iteration status */
3068 typedef enum {
3069     MIG_ITERATE_RESUME,         /* Resume current iteration */
3070     MIG_ITERATE_SKIP,           /* Skip current iteration */
3071     MIG_ITERATE_BREAK,          /* Break the loop */
3072 } MigIterateState;
3073 
3074 /*
3075  * Return true if continue to the next iteration directly, false
3076  * otherwise.
3077  */
3078 static MigIterateState migration_iteration_run(MigrationState *s)
3079 {
3080     uint64_t pending_size, pend_pre, pend_compat, pend_post;
3081     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3082 
3083     qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
3084                               &pend_compat, &pend_post);
3085     pending_size = pend_pre + pend_compat + pend_post;
3086 
3087     trace_migrate_pending(pending_size, s->threshold_size,
3088                           pend_pre, pend_compat, pend_post);
3089 
3090     if (pending_size && pending_size >= s->threshold_size) {
3091         /* Still a significant amount to transfer */
3092         if (migrate_postcopy() && !in_postcopy &&
3093             pend_pre <= s->threshold_size &&
3094             atomic_read(&s->start_postcopy)) {
3095             if (postcopy_start(s)) {
3096                 error_report("%s: postcopy failed to start", __func__);
3097             }
3098             return MIG_ITERATE_SKIP;
3099         }
3100         /* Just another iteration step */
3101         qemu_savevm_state_iterate(s->to_dst_file,
3102             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3103     } else {
3104         trace_migration_thread_low_pending(pending_size);
3105         migration_completion(s);
3106         return MIG_ITERATE_BREAK;
3107     }
3108 
3109     return MIG_ITERATE_RESUME;
3110 }
3111 
3112 static void migration_iteration_finish(MigrationState *s)
3113 {
3114     /* If we enabled cpu throttling for auto-converge, turn it off. */
3115     cpu_throttle_stop();
3116 
3117     qemu_mutex_lock_iothread();
3118     switch (s->state) {
3119     case MIGRATION_STATUS_COMPLETED:
3120         migration_calculate_complete(s);
3121         runstate_set(RUN_STATE_POSTMIGRATE);
3122         break;
3123 
3124     case MIGRATION_STATUS_ACTIVE:
3125         /*
3126          * We should really assert here, but since it's during
3127          * migration, let's try to reduce the usage of assertions.
3128          */
3129         if (!migrate_colo_enabled()) {
3130             error_report("%s: critical error: calling COLO code without "
3131                          "COLO enabled", __func__);
3132         }
3133         migrate_start_colo_process(s);
3134         /*
3135          * Fixme: we will run VM in COLO no matter its old running state.
3136          * After exited COLO, we will keep running.
3137          */
3138         s->vm_was_running = true;
3139         /* Fallthrough */
3140     case MIGRATION_STATUS_FAILED:
3141     case MIGRATION_STATUS_CANCELLED:
3142     case MIGRATION_STATUS_CANCELLING:
3143         if (s->vm_was_running) {
3144             vm_start();
3145         } else {
3146             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3147                 runstate_set(RUN_STATE_POSTMIGRATE);
3148             }
3149         }
3150         break;
3151 
3152     default:
3153         /* Should not reach here, but if so, forgive the VM. */
3154         error_report("%s: Unknown ending state %d", __func__, s->state);
3155         break;
3156     }
3157     qemu_bh_schedule(s->cleanup_bh);
3158     qemu_mutex_unlock_iothread();
3159 }
3160 
3161 void migration_make_urgent_request(void)
3162 {
3163     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3164 }
3165 
3166 void migration_consume_urgent_request(void)
3167 {
3168     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3169 }
3170 
3171 /*
3172  * Master migration thread on the source VM.
3173  * It drives the migration and pumps the data down the outgoing channel.
3174  */
3175 static void *migration_thread(void *opaque)
3176 {
3177     MigrationState *s = opaque;
3178     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3179     MigThrError thr_error;
3180     bool urgent = false;
3181 
3182     rcu_register_thread();
3183 
3184     object_ref(OBJECT(s));
3185     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3186 
3187     qemu_savevm_state_header(s->to_dst_file);
3188 
3189     /*
3190      * If we opened the return path, we need to make sure dst has it
3191      * opened as well.
3192      */
3193     if (s->rp_state.from_dst_file) {
3194         /* Now tell the dest that it should open its end so it can reply */
3195         qemu_savevm_send_open_return_path(s->to_dst_file);
3196 
3197         /* And do a ping that will make stuff easier to debug */
3198         qemu_savevm_send_ping(s->to_dst_file, 1);
3199     }
3200 
3201     if (migrate_postcopy()) {
3202         /*
3203          * Tell the destination that we *might* want to do postcopy later;
3204          * if the other end can't do postcopy it should fail now, nice and
3205          * early.
3206          */
3207         qemu_savevm_send_postcopy_advise(s->to_dst_file);
3208     }
3209 
3210     if (migrate_colo_enabled()) {
3211         /* Notify migration destination that we enable COLO */
3212         qemu_savevm_send_colo_enable(s->to_dst_file);
3213     }
3214 
3215     qemu_savevm_state_setup(s->to_dst_file);
3216 
3217     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3218     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
3219                       MIGRATION_STATUS_ACTIVE);
3220 
3221     trace_migration_thread_setup_complete();
3222 
3223     while (s->state == MIGRATION_STATUS_ACTIVE ||
3224            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3225         int64_t current_time;
3226 
3227         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
3228             MigIterateState iter_state = migration_iteration_run(s);
3229             if (iter_state == MIG_ITERATE_SKIP) {
3230                 continue;
3231             } else if (iter_state == MIG_ITERATE_BREAK) {
3232                 break;
3233             }
3234         }
3235 
3236         /*
3237          * Try to detect any kind of failures, and see whether we
3238          * should stop the migration now.
3239          */
3240         thr_error = migration_detect_error(s);
3241         if (thr_error == MIG_THR_ERR_FATAL) {
3242             /* Stop migration */
3243             break;
3244         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3245             /*
3246              * Just recovered from a e.g. network failure, reset all
3247              * the local variables. This is important to avoid
3248              * breaking transferred_bytes and bandwidth calculation
3249              */
3250             s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3251             s->iteration_initial_bytes = 0;
3252         }
3253 
3254         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3255 
3256         migration_update_counters(s, current_time);
3257 
3258         urgent = false;
3259         if (qemu_file_rate_limit(s->to_dst_file)) {
3260             /* Wait for a delay to do rate limiting OR
3261              * something urgent to post the semaphore.
3262              */
3263             int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
3264             trace_migration_thread_ratelimit_pre(ms);
3265             if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3266                 /* We were worken by one or more urgent things but
3267                  * the timedwait will have consumed one of them.
3268                  * The service routine for the urgent wake will dec
3269                  * the semaphore itself for each item it consumes,
3270                  * so add this one we just eat back.
3271                  */
3272                 qemu_sem_post(&s->rate_limit_sem);
3273                 urgent = true;
3274             }
3275             trace_migration_thread_ratelimit_post(urgent);
3276         }
3277     }
3278 
3279     trace_migration_thread_after_loop();
3280     migration_iteration_finish(s);
3281     object_unref(OBJECT(s));
3282     rcu_unregister_thread();
3283     return NULL;
3284 }
3285 
3286 void migrate_fd_connect(MigrationState *s, Error *error_in)
3287 {
3288     int64_t rate_limit;
3289     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
3290 
3291     s->expected_downtime = s->parameters.downtime_limit;
3292     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
3293     if (error_in) {
3294         migrate_fd_error(s, error_in);
3295         migrate_fd_cleanup(s);
3296         return;
3297     }
3298 
3299     if (resume) {
3300         /* This is a resumed migration */
3301         rate_limit = INT64_MAX;
3302     } else {
3303         /* This is a fresh new migration */
3304         rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
3305 
3306         /* Notify before starting migration thread */
3307         notifier_list_notify(&migration_state_notifiers, s);
3308     }
3309 
3310     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
3311     qemu_file_set_blocking(s->to_dst_file, true);
3312 
3313     /*
3314      * Open the return path. For postcopy, it is used exclusively. For
3315      * precopy, only if user specified "return-path" capability would
3316      * QEMU uses the return path.
3317      */
3318     if (migrate_postcopy_ram() || migrate_use_return_path()) {
3319         if (open_return_path_on_source(s, !resume)) {
3320             error_report("Unable to open return-path for postcopy");
3321             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
3322             migrate_fd_cleanup(s);
3323             return;
3324         }
3325     }
3326 
3327     if (resume) {
3328         /* Wakeup the main migration thread to do the recovery */
3329         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
3330                           MIGRATION_STATUS_POSTCOPY_RECOVER);
3331         qemu_sem_post(&s->postcopy_pause_sem);
3332         return;
3333     }
3334 
3335     if (multifd_save_setup() != 0) {
3336         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
3337                           MIGRATION_STATUS_FAILED);
3338         migrate_fd_cleanup(s);
3339         return;
3340     }
3341     qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
3342                        QEMU_THREAD_JOINABLE);
3343     s->migration_thread_running = true;
3344 }
3345 
3346 void migration_global_dump(Monitor *mon)
3347 {
3348     MigrationState *ms = migrate_get_current();
3349 
3350     monitor_printf(mon, "globals:\n");
3351     monitor_printf(mon, "store-global-state: %s\n",
3352                    ms->store_global_state ? "on" : "off");
3353     monitor_printf(mon, "only-migratable: %s\n",
3354                    ms->only_migratable ? "on" : "off");
3355     monitor_printf(mon, "send-configuration: %s\n",
3356                    ms->send_configuration ? "on" : "off");
3357     monitor_printf(mon, "send-section-footer: %s\n",
3358                    ms->send_section_footer ? "on" : "off");
3359     monitor_printf(mon, "decompress-error-check: %s\n",
3360                    ms->decompress_error_check ? "on" : "off");
3361 }
3362 
3363 #define DEFINE_PROP_MIG_CAP(name, x)             \
3364     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
3365 
3366 static Property migration_properties[] = {
3367     DEFINE_PROP_BOOL("store-global-state", MigrationState,
3368                      store_global_state, true),
3369     DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false),
3370     DEFINE_PROP_BOOL("send-configuration", MigrationState,
3371                      send_configuration, true),
3372     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
3373                      send_section_footer, true),
3374     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
3375                       decompress_error_check, true),
3376 
3377     /* Migration parameters */
3378     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
3379                       parameters.compress_level,
3380                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
3381     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
3382                       parameters.compress_threads,
3383                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
3384     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
3385                       parameters.compress_wait_thread, true),
3386     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
3387                       parameters.decompress_threads,
3388                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
3389     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
3390                       parameters.cpu_throttle_initial,
3391                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
3392     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
3393                       parameters.cpu_throttle_increment,
3394                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
3395     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
3396                       parameters.max_bandwidth, MAX_THROTTLE),
3397     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
3398                       parameters.downtime_limit,
3399                       DEFAULT_MIGRATE_SET_DOWNTIME),
3400     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
3401                       parameters.x_checkpoint_delay,
3402                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
3403     DEFINE_PROP_UINT8("x-multifd-channels", MigrationState,
3404                       parameters.x_multifd_channels,
3405                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
3406     DEFINE_PROP_UINT32("x-multifd-page-count", MigrationState,
3407                       parameters.x_multifd_page_count,
3408                       DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT),
3409     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
3410                       parameters.xbzrle_cache_size,
3411                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
3412     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
3413                       parameters.max_postcopy_bandwidth,
3414                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
3415     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
3416                       parameters.max_cpu_throttle,
3417                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
3418     DEFINE_PROP_SIZE("announce-initial", MigrationState,
3419                       parameters.announce_initial,
3420                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
3421     DEFINE_PROP_SIZE("announce-max", MigrationState,
3422                       parameters.announce_max,
3423                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
3424     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
3425                       parameters.announce_rounds,
3426                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
3427     DEFINE_PROP_SIZE("announce-step", MigrationState,
3428                       parameters.announce_step,
3429                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
3430 
3431     /* Migration capabilities */
3432     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
3433     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
3434     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
3435     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
3436     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
3437     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
3438     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
3439     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
3440     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
3441     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
3442     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
3443     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD),
3444 
3445     DEFINE_PROP_END_OF_LIST(),
3446 };
3447 
3448 static void migration_class_init(ObjectClass *klass, void *data)
3449 {
3450     DeviceClass *dc = DEVICE_CLASS(klass);
3451 
3452     dc->user_creatable = false;
3453     dc->props = migration_properties;
3454 }
3455 
3456 static void migration_instance_finalize(Object *obj)
3457 {
3458     MigrationState *ms = MIGRATION_OBJ(obj);
3459     MigrationParameters *params = &ms->parameters;
3460 
3461     qemu_mutex_destroy(&ms->error_mutex);
3462     qemu_mutex_destroy(&ms->qemu_file_lock);
3463     g_free(params->tls_hostname);
3464     g_free(params->tls_creds);
3465     qemu_sem_destroy(&ms->rate_limit_sem);
3466     qemu_sem_destroy(&ms->pause_sem);
3467     qemu_sem_destroy(&ms->postcopy_pause_sem);
3468     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
3469     qemu_sem_destroy(&ms->rp_state.rp_sem);
3470     error_free(ms->error);
3471 }
3472 
3473 static void migration_instance_init(Object *obj)
3474 {
3475     MigrationState *ms = MIGRATION_OBJ(obj);
3476     MigrationParameters *params = &ms->parameters;
3477 
3478     ms->state = MIGRATION_STATUS_NONE;
3479     ms->mbps = -1;
3480     ms->pages_per_second = -1;
3481     qemu_sem_init(&ms->pause_sem, 0);
3482     qemu_mutex_init(&ms->error_mutex);
3483 
3484     params->tls_hostname = g_strdup("");
3485     params->tls_creds = g_strdup("");
3486 
3487     /* Set has_* up only for parameter checks */
3488     params->has_compress_level = true;
3489     params->has_compress_threads = true;
3490     params->has_decompress_threads = true;
3491     params->has_cpu_throttle_initial = true;
3492     params->has_cpu_throttle_increment = true;
3493     params->has_max_bandwidth = true;
3494     params->has_downtime_limit = true;
3495     params->has_x_checkpoint_delay = true;
3496     params->has_block_incremental = true;
3497     params->has_x_multifd_channels = true;
3498     params->has_x_multifd_page_count = true;
3499     params->has_xbzrle_cache_size = true;
3500     params->has_max_postcopy_bandwidth = true;
3501     params->has_max_cpu_throttle = true;
3502     params->has_announce_initial = true;
3503     params->has_announce_max = true;
3504     params->has_announce_rounds = true;
3505     params->has_announce_step = true;
3506 
3507     qemu_sem_init(&ms->postcopy_pause_sem, 0);
3508     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
3509     qemu_sem_init(&ms->rp_state.rp_sem, 0);
3510     qemu_sem_init(&ms->rate_limit_sem, 0);
3511     qemu_mutex_init(&ms->qemu_file_lock);
3512 }
3513 
3514 /*
3515  * Return true if check pass, false otherwise. Error will be put
3516  * inside errp if provided.
3517  */
3518 static bool migration_object_check(MigrationState *ms, Error **errp)
3519 {
3520     MigrationCapabilityStatusList *head = NULL;
3521     /* Assuming all off */
3522     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
3523     int i;
3524 
3525     if (!migrate_params_check(&ms->parameters, errp)) {
3526         return false;
3527     }
3528 
3529     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
3530         if (ms->enabled_capabilities[i]) {
3531             head = migrate_cap_add(head, i, true);
3532         }
3533     }
3534 
3535     ret = migrate_caps_check(cap_list, head, errp);
3536 
3537     /* It works with head == NULL */
3538     qapi_free_MigrationCapabilityStatusList(head);
3539 
3540     return ret;
3541 }
3542 
3543 static const TypeInfo migration_type = {
3544     .name = TYPE_MIGRATION,
3545     /*
3546      * NOTE: TYPE_MIGRATION is not really a device, as the object is
3547      * not created using qdev_create(), it is not attached to the qdev
3548      * device tree, and it is never realized.
3549      *
3550      * TODO: Make this TYPE_OBJECT once QOM provides something like
3551      * TYPE_DEVICE's "-global" properties.
3552      */
3553     .parent = TYPE_DEVICE,
3554     .class_init = migration_class_init,
3555     .class_size = sizeof(MigrationClass),
3556     .instance_size = sizeof(MigrationState),
3557     .instance_init = migration_instance_init,
3558     .instance_finalize = migration_instance_finalize,
3559 };
3560 
3561 static void register_migration_types(void)
3562 {
3563     type_register_static(&migration_type);
3564 }
3565 
3566 type_init(register_migration_types);
3567