xref: /qemu/migration/colo.c (revision 2cc637f1)
135a6ed4fSzhanghailiang /*
235a6ed4fSzhanghailiang  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
335a6ed4fSzhanghailiang  * (a.k.a. Fault Tolerance or Continuous Replication)
435a6ed4fSzhanghailiang  *
535a6ed4fSzhanghailiang  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
635a6ed4fSzhanghailiang  * Copyright (c) 2016 FUJITSU LIMITED
735a6ed4fSzhanghailiang  * Copyright (c) 2016 Intel Corporation
835a6ed4fSzhanghailiang  *
935a6ed4fSzhanghailiang  * This work is licensed under the terms of the GNU GPL, version 2 or
1035a6ed4fSzhanghailiang  * later.  See the COPYING file in the top-level directory.
1135a6ed4fSzhanghailiang  */
1235a6ed4fSzhanghailiang 
1335a6ed4fSzhanghailiang #include "qemu/osdep.h"
140b827d5eSzhanghailiang #include "sysemu/sysemu.h"
15e688df6bSMarkus Armbruster #include "qapi/error.h"
169af23989SMarkus Armbruster #include "qapi/qapi-commands-migration.h"
176666c96aSJuan Quintela #include "migration.h"
1808a0aee1SJuan Quintela #include "qemu-file.h"
1920a519a0SJuan Quintela #include "savevm.h"
2035a6ed4fSzhanghailiang #include "migration/colo.h"
212c9e6fecSJuan Quintela #include "block.h"
22a91246c9Szhanghailiang #include "io/channel-buffer.h"
230b827d5eSzhanghailiang #include "trace.h"
2456ba83d2Szhanghailiang #include "qemu/error-report.h"
25db725815SMarkus Armbruster #include "qemu/main-loop.h"
26d4842052SMarkus Armbruster #include "qemu/rcu.h"
27d89e666eSzhanghailiang #include "migration/failover.h"
280393031aSzhanghailiang #include "migration/ram.h"
29b0262955SPaolo Bonzini #include "block/replication.h"
30131b2153SZhang Chen #include "net/colo-compare.h"
31131b2153SZhang Chen #include "net/colo.h"
328e48ac95SZhang Chen #include "block/block.h"
339ecff6d6Szhanghailiang #include "qapi/qapi-events-migration.h"
343f6df99dSZhang Chen #include "sysemu/cpus.h"
3554d31236SMarkus Armbruster #include "sysemu/runstate.h"
367b343530Szhanghailiang #include "net/filter.h"
371f0776f1SJuan Quintela #include "options.h"
3835a6ed4fSzhanghailiang 
39a8664ba5Szhanghailiang static bool vmstate_loading;
40131b2153SZhang Chen static Notifier packets_compare_notifier;
41a8664ba5Szhanghailiang 
425ed0decaSZhang Chen /* User need to know colo mode after COLO failover */
435ed0decaSZhang Chen static COLOMode last_colo_mode;
445ed0decaSZhang Chen 
45a91246c9Szhanghailiang #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
46a91246c9Szhanghailiang 
migration_in_colo_state(void)470b827d5eSzhanghailiang bool migration_in_colo_state(void)
480b827d5eSzhanghailiang {
490b827d5eSzhanghailiang     MigrationState *s = migrate_get_current();
500b827d5eSzhanghailiang 
510b827d5eSzhanghailiang     return (s->state == MIGRATION_STATUS_COLO);
520b827d5eSzhanghailiang }
530b827d5eSzhanghailiang 
migration_incoming_in_colo_state(void)5425d0c16fSzhanghailiang bool migration_incoming_in_colo_state(void)
5525d0c16fSzhanghailiang {
5625d0c16fSzhanghailiang     MigrationIncomingState *mis = migration_incoming_get_current();
5725d0c16fSzhanghailiang 
5825d0c16fSzhanghailiang     return mis && (mis->state == MIGRATION_STATUS_COLO);
5925d0c16fSzhanghailiang }
6025d0c16fSzhanghailiang 
colo_runstate_is_stopped(void)61b3f7f0c5Szhanghailiang static bool colo_runstate_is_stopped(void)
62b3f7f0c5Szhanghailiang {
63b3f7f0c5Szhanghailiang     return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
64b3f7f0c5Szhanghailiang }
65b3f7f0c5Szhanghailiang 
colo_checkpoint_notify(void)667395127fSSteve Sistare static void colo_checkpoint_notify(void)
674332ffcdSVladimir Sementsov-Ogievskiy {
687395127fSSteve Sistare     MigrationState *s = migrate_get_current();
694332ffcdSVladimir Sementsov-Ogievskiy     int64_t next_notify_time;
704332ffcdSVladimir Sementsov-Ogievskiy 
714332ffcdSVladimir Sementsov-Ogievskiy     qemu_event_set(&s->colo_checkpoint_event);
724332ffcdSVladimir Sementsov-Ogievskiy     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
734332ffcdSVladimir Sementsov-Ogievskiy     next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
744332ffcdSVladimir Sementsov-Ogievskiy     timer_mod(s->colo_delay_timer, next_notify_time);
754332ffcdSVladimir Sementsov-Ogievskiy }
764332ffcdSVladimir Sementsov-Ogievskiy 
colo_checkpoint_notify_timer(void * opaque)777395127fSSteve Sistare static void colo_checkpoint_notify_timer(void *opaque)
787395127fSSteve Sistare {
797395127fSSteve Sistare     colo_checkpoint_notify();
807395127fSSteve Sistare }
817395127fSSteve Sistare 
colo_checkpoint_delay_set(void)824332ffcdSVladimir Sementsov-Ogievskiy void colo_checkpoint_delay_set(void)
834332ffcdSVladimir Sementsov-Ogievskiy {
844332ffcdSVladimir Sementsov-Ogievskiy     if (migration_in_colo_state()) {
857395127fSSteve Sistare         colo_checkpoint_notify();
864332ffcdSVladimir Sementsov-Ogievskiy     }
874332ffcdSVladimir Sementsov-Ogievskiy }
884332ffcdSVladimir Sementsov-Ogievskiy 
secondary_vm_do_failover(void)899d2db376Szhanghailiang static void secondary_vm_do_failover(void)
909d2db376Szhanghailiang {
913ebb9c4fSZhang Chen /* COLO needs enable block-replication */
929d2db376Szhanghailiang     int old_state;
939d2db376Szhanghailiang     MigrationIncomingState *mis = migration_incoming_get_current();
948e48ac95SZhang Chen     Error *local_err = NULL;
959d2db376Szhanghailiang 
96a8664ba5Szhanghailiang     /* Can not do failover during the process of VM's loading VMstate, Or
97a8664ba5Szhanghailiang      * it will break the secondary VM.
98a8664ba5Szhanghailiang      */
99a8664ba5Szhanghailiang     if (vmstate_loading) {
100a8664ba5Szhanghailiang         old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
101a8664ba5Szhanghailiang                         FAILOVER_STATUS_RELAUNCH);
102a8664ba5Szhanghailiang         if (old_state != FAILOVER_STATUS_ACTIVE) {
103a8664ba5Szhanghailiang             error_report("Unknown error while do failover for secondary VM,"
104977c736fSMarkus Armbruster                          "old_state: %s", FailoverStatus_str(old_state));
105a8664ba5Szhanghailiang         }
106a8664ba5Szhanghailiang         return;
107a8664ba5Szhanghailiang     }
108a8664ba5Szhanghailiang 
1099d2db376Szhanghailiang     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
1109d2db376Szhanghailiang                       MIGRATION_STATUS_COMPLETED);
1119d2db376Szhanghailiang 
1128e48ac95SZhang Chen     replication_stop_all(true, &local_err);
1138e48ac95SZhang Chen     if (local_err) {
1148e48ac95SZhang Chen         error_report_err(local_err);
11527d07fcfSVladimir Sementsov-Ogievskiy         local_err = NULL;
1168e48ac95SZhang Chen     }
1178e48ac95SZhang Chen 
1187b343530Szhanghailiang     /* Notify all filters of all NIC to do checkpoint */
1197b343530Szhanghailiang     colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
1207b343530Szhanghailiang     if (local_err) {
1217b343530Szhanghailiang         error_report_err(local_err);
1227b343530Szhanghailiang     }
1237b343530Szhanghailiang 
1249d2db376Szhanghailiang     if (!autostart) {
1259d2db376Szhanghailiang         error_report("\"-S\" qemu option will be ignored in secondary side");
1269d2db376Szhanghailiang         /* recover runstate to normal migration finish state */
1279d2db376Szhanghailiang         autostart = true;
1289d2db376Szhanghailiang     }
129c937b9a6Szhanghailiang     /*
130c937b9a6Szhanghailiang      * Make sure COLO incoming thread not block in recv or send,
131c937b9a6Szhanghailiang      * If mis->from_src_file and mis->to_src_file use the same fd,
132c937b9a6Szhanghailiang      * The second shutdown() will return -1, we ignore this value,
133c937b9a6Szhanghailiang      * It is harmless.
134c937b9a6Szhanghailiang      */
135c937b9a6Szhanghailiang     if (mis->from_src_file) {
136c937b9a6Szhanghailiang         qemu_file_shutdown(mis->from_src_file);
137c937b9a6Szhanghailiang     }
138c937b9a6Szhanghailiang     if (mis->to_src_file) {
139c937b9a6Szhanghailiang         qemu_file_shutdown(mis->to_src_file);
140c937b9a6Szhanghailiang     }
1419d2db376Szhanghailiang 
1429d2db376Szhanghailiang     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
1439d2db376Szhanghailiang                                    FAILOVER_STATUS_COMPLETED);
1449d2db376Szhanghailiang     if (old_state != FAILOVER_STATUS_ACTIVE) {
1459d2db376Szhanghailiang         error_report("Incorrect state (%s) while doing failover for "
146977c736fSMarkus Armbruster                      "secondary VM", FailoverStatus_str(old_state));
1479d2db376Szhanghailiang         return;
1489d2db376Szhanghailiang     }
149c937b9a6Szhanghailiang     /* Notify COLO incoming thread that failover work is finished */
150c937b9a6Szhanghailiang     qemu_sem_post(&mis->colo_incoming_sem);
1511fe6ab26SZhang Chen 
1529d2db376Szhanghailiang     /* For Secondary VM, jump to incoming co */
153dd42ce24SVladimir Sementsov-Ogievskiy     if (mis->colo_incoming_co) {
154dd42ce24SVladimir Sementsov-Ogievskiy         qemu_coroutine_enter(mis->colo_incoming_co);
1559d2db376Szhanghailiang     }
1569d2db376Szhanghailiang }
1579d2db376Szhanghailiang 
primary_vm_do_failover(void)158b3f7f0c5Szhanghailiang static void primary_vm_do_failover(void)
159b3f7f0c5Szhanghailiang {
160b3f7f0c5Szhanghailiang     MigrationState *s = migrate_get_current();
161b3f7f0c5Szhanghailiang     int old_state;
1628e48ac95SZhang Chen     Error *local_err = NULL;
163b3f7f0c5Szhanghailiang 
164b3f7f0c5Szhanghailiang     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
165b3f7f0c5Szhanghailiang                       MIGRATION_STATUS_COMPLETED);
1662518aec1Szhanghailiang     /*
1672518aec1Szhanghailiang      * kick COLO thread which might wait at
1682518aec1Szhanghailiang      * qemu_sem_wait(&s->colo_checkpoint_sem).
1692518aec1Szhanghailiang      */
1707395127fSSteve Sistare     colo_checkpoint_notify();
171b3f7f0c5Szhanghailiang 
172c937b9a6Szhanghailiang     /*
173c937b9a6Szhanghailiang      * Wake up COLO thread which may blocked in recv() or send(),
174c937b9a6Szhanghailiang      * The s->rp_state.from_dst_file and s->to_dst_file may use the
175c937b9a6Szhanghailiang      * same fd, but we still shutdown the fd for twice, it is harmless.
176c937b9a6Szhanghailiang      */
177c937b9a6Szhanghailiang     if (s->to_dst_file) {
178c937b9a6Szhanghailiang         qemu_file_shutdown(s->to_dst_file);
179c937b9a6Szhanghailiang     }
180c937b9a6Szhanghailiang     if (s->rp_state.from_dst_file) {
181c937b9a6Szhanghailiang         qemu_file_shutdown(s->rp_state.from_dst_file);
182c937b9a6Szhanghailiang     }
183c937b9a6Szhanghailiang 
184b3f7f0c5Szhanghailiang     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
185b3f7f0c5Szhanghailiang                                    FAILOVER_STATUS_COMPLETED);
186b3f7f0c5Szhanghailiang     if (old_state != FAILOVER_STATUS_ACTIVE) {
187b3f7f0c5Szhanghailiang         error_report("Incorrect state (%s) while doing failover for Primary VM",
188977c736fSMarkus Armbruster                      FailoverStatus_str(old_state));
189b3f7f0c5Szhanghailiang         return;
190b3f7f0c5Szhanghailiang     }
1918e48ac95SZhang Chen 
1928e48ac95SZhang Chen     replication_stop_all(true, &local_err);
1938e48ac95SZhang Chen     if (local_err) {
1948e48ac95SZhang Chen         error_report_err(local_err);
1958e48ac95SZhang Chen         local_err = NULL;
1968e48ac95SZhang Chen     }
1978e48ac95SZhang Chen 
198c937b9a6Szhanghailiang     /* Notify COLO thread that failover work is finished */
199c937b9a6Szhanghailiang     qemu_sem_post(&s->colo_exit_sem);
200b3f7f0c5Szhanghailiang }
201b3f7f0c5Szhanghailiang 
get_colo_mode(void)202aad555c2SZhang Chen COLOMode get_colo_mode(void)
203aad555c2SZhang Chen {
204aad555c2SZhang Chen     if (migration_in_colo_state()) {
205aad555c2SZhang Chen         return COLO_MODE_PRIMARY;
206aad555c2SZhang Chen     } else if (migration_incoming_in_colo_state()) {
207aad555c2SZhang Chen         return COLO_MODE_SECONDARY;
208aad555c2SZhang Chen     } else {
20941b6b779SZhang Chen         return COLO_MODE_NONE;
210aad555c2SZhang Chen     }
211aad555c2SZhang Chen }
212aad555c2SZhang Chen 
colo_do_failover(void)213c0913d1dSZhang Chen void colo_do_failover(void)
214b3f7f0c5Szhanghailiang {
215b3f7f0c5Szhanghailiang     /* Make sure VM stopped while failover happened. */
216b3f7f0c5Szhanghailiang     if (!colo_runstate_is_stopped()) {
217b3f7f0c5Szhanghailiang         vm_stop_force_state(RUN_STATE_COLO);
218b3f7f0c5Szhanghailiang     }
219b3f7f0c5Szhanghailiang 
2202b9f6bf3SRao, Lei     switch (last_colo_mode = get_colo_mode()) {
22182cd368cSZhang Chen     case COLO_MODE_PRIMARY:
222b3f7f0c5Szhanghailiang         primary_vm_do_failover();
22382cd368cSZhang Chen         break;
22482cd368cSZhang Chen     case COLO_MODE_SECONDARY:
2259d2db376Szhanghailiang         secondary_vm_do_failover();
22682cd368cSZhang Chen         break;
22782cd368cSZhang Chen     default:
22882cd368cSZhang Chen         error_report("colo_do_failover failed because the colo mode"
22982cd368cSZhang Chen                      " could not be obtained");
230b3f7f0c5Szhanghailiang     }
231b3f7f0c5Szhanghailiang }
232b3f7f0c5Szhanghailiang 
qmp_xen_set_replication(bool enable,bool primary,bool has_failover,bool failover,Error ** errp)2332c9639ecSZhang Chen void qmp_xen_set_replication(bool enable, bool primary,
2342c9639ecSZhang Chen                              bool has_failover, bool failover,
2352c9639ecSZhang Chen                              Error **errp)
2362c9639ecSZhang Chen {
2372c9639ecSZhang Chen     ReplicationMode mode = primary ?
2382c9639ecSZhang Chen                            REPLICATION_MODE_PRIMARY :
2392c9639ecSZhang Chen                            REPLICATION_MODE_SECONDARY;
2402c9639ecSZhang Chen 
2412c9639ecSZhang Chen     if (has_failover && enable) {
2422c9639ecSZhang Chen         error_setg(errp, "Parameter 'failover' is only for"
2432c9639ecSZhang Chen                    " stopping replication");
2442c9639ecSZhang Chen         return;
2452c9639ecSZhang Chen     }
2462c9639ecSZhang Chen 
2472c9639ecSZhang Chen     if (enable) {
2482c9639ecSZhang Chen         replication_start_all(mode, errp);
2492c9639ecSZhang Chen     } else {
2502c9639ecSZhang Chen         if (!has_failover) {
2512c9639ecSZhang Chen             failover = NULL;
2522c9639ecSZhang Chen         }
2532c9639ecSZhang Chen         replication_stop_all(failover, failover ? NULL : errp);
2542c9639ecSZhang Chen     }
2552c9639ecSZhang Chen }
2562c9639ecSZhang Chen 
qmp_query_xen_replication_status(Error ** errp)257daa33c52SZhang Chen ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
258daa33c52SZhang Chen {
259daa33c52SZhang Chen     Error *err = NULL;
260daa33c52SZhang Chen     ReplicationStatus *s = g_new0(ReplicationStatus, 1);
261daa33c52SZhang Chen 
262daa33c52SZhang Chen     replication_get_error_all(&err);
263daa33c52SZhang Chen     if (err) {
264daa33c52SZhang Chen         s->error = true;
265daa33c52SZhang Chen         s->desc = g_strdup(error_get_pretty(err));
266daa33c52SZhang Chen     } else {
267daa33c52SZhang Chen         s->error = false;
268daa33c52SZhang Chen     }
269daa33c52SZhang Chen 
270daa33c52SZhang Chen     error_free(err);
271daa33c52SZhang Chen     return s;
272daa33c52SZhang Chen }
273daa33c52SZhang Chen 
qmp_xen_colo_do_checkpoint(Error ** errp)274daa33c52SZhang Chen void qmp_xen_colo_do_checkpoint(Error **errp)
275daa33c52SZhang Chen {
276735527e1SMarkus Armbruster     Error *err = NULL;
277735527e1SMarkus Armbruster 
278735527e1SMarkus Armbruster     replication_do_checkpoint_all(&err);
279735527e1SMarkus Armbruster     if (err) {
280735527e1SMarkus Armbruster         error_propagate(errp, err);
281735527e1SMarkus Armbruster         return;
282735527e1SMarkus Armbruster     }
2830e8818f0SZhang Chen     /* Notify all filters of all NIC to do checkpoint */
2840e8818f0SZhang Chen     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
285daa33c52SZhang Chen }
286daa33c52SZhang Chen 
qmp_query_colo_status(Error ** errp)287f56c0065SZhang Chen COLOStatus *qmp_query_colo_status(Error **errp)
288f56c0065SZhang Chen {
289f56c0065SZhang Chen     COLOStatus *s = g_new0(COLOStatus, 1);
290f56c0065SZhang Chen 
291f56c0065SZhang Chen     s->mode = get_colo_mode();
2925ed0decaSZhang Chen     s->last_mode = last_colo_mode;
293f56c0065SZhang Chen 
294f56c0065SZhang Chen     switch (failover_get_state()) {
295f56c0065SZhang Chen     case FAILOVER_STATUS_NONE:
296f56c0065SZhang Chen         s->reason = COLO_EXIT_REASON_NONE;
297f56c0065SZhang Chen         break;
2981fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
299f56c0065SZhang Chen         s->reason = COLO_EXIT_REASON_REQUEST;
300f56c0065SZhang Chen         break;
301f56c0065SZhang Chen     default:
3023a43ac47SZhang Chen         if (migration_in_colo_state()) {
3033a43ac47SZhang Chen             s->reason = COLO_EXIT_REASON_PROCESSING;
3043a43ac47SZhang Chen         } else {
305f56c0065SZhang Chen             s->reason = COLO_EXIT_REASON_ERROR;
306f56c0065SZhang Chen         }
3073a43ac47SZhang Chen     }
308f56c0065SZhang Chen 
309f56c0065SZhang Chen     return s;
310f56c0065SZhang Chen }
311f56c0065SZhang Chen 
colo_send_message(QEMUFile * f,COLOMessage msg,Error ** errp)3124f97558eSzhanghailiang static void colo_send_message(QEMUFile *f, COLOMessage msg,
3134f97558eSzhanghailiang                               Error **errp)
3144f97558eSzhanghailiang {
3154f97558eSzhanghailiang     int ret;
3164f97558eSzhanghailiang 
3174f97558eSzhanghailiang     if (msg >= COLO_MESSAGE__MAX) {
3184f97558eSzhanghailiang         error_setg(errp, "%s: Invalid message", __func__);
3194f97558eSzhanghailiang         return;
3204f97558eSzhanghailiang     }
3214f97558eSzhanghailiang     qemu_put_be32(f, msg);
322be07a0edSJuan Quintela     ret = qemu_fflush(f);
3234f97558eSzhanghailiang     if (ret < 0) {
3244f97558eSzhanghailiang         error_setg_errno(errp, -ret, "Can't send COLO message");
3254f97558eSzhanghailiang     }
326977c736fSMarkus Armbruster     trace_colo_send_message(COLOMessage_str(msg));
3274f97558eSzhanghailiang }
3284f97558eSzhanghailiang 
colo_send_message_value(QEMUFile * f,COLOMessage msg,uint64_t value,Error ** errp)329a91246c9Szhanghailiang static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
330a91246c9Szhanghailiang                                     uint64_t value, Error **errp)
331a91246c9Szhanghailiang {
332a91246c9Szhanghailiang     Error *local_err = NULL;
333a91246c9Szhanghailiang     int ret;
334a91246c9Szhanghailiang 
335a91246c9Szhanghailiang     colo_send_message(f, msg, &local_err);
336a91246c9Szhanghailiang     if (local_err) {
337a91246c9Szhanghailiang         error_propagate(errp, local_err);
338a91246c9Szhanghailiang         return;
339a91246c9Szhanghailiang     }
340a91246c9Szhanghailiang     qemu_put_be64(f, value);
341be07a0edSJuan Quintela     ret = qemu_fflush(f);
342a91246c9Szhanghailiang     if (ret < 0) {
343a91246c9Szhanghailiang         error_setg_errno(errp, -ret, "Failed to send value for message:%s",
344977c736fSMarkus Armbruster                          COLOMessage_str(msg));
345a91246c9Szhanghailiang     }
346a91246c9Szhanghailiang }
347a91246c9Szhanghailiang 
colo_receive_message(QEMUFile * f,Error ** errp)3484f97558eSzhanghailiang static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
3494f97558eSzhanghailiang {
3504f97558eSzhanghailiang     COLOMessage msg;
3514f97558eSzhanghailiang     int ret;
3524f97558eSzhanghailiang 
3534f97558eSzhanghailiang     msg = qemu_get_be32(f);
3544f97558eSzhanghailiang     ret = qemu_file_get_error(f);
3554f97558eSzhanghailiang     if (ret < 0) {
3564f97558eSzhanghailiang         error_setg_errno(errp, -ret, "Can't receive COLO message");
3574f97558eSzhanghailiang         return msg;
3584f97558eSzhanghailiang     }
3594f97558eSzhanghailiang     if (msg >= COLO_MESSAGE__MAX) {
3604f97558eSzhanghailiang         error_setg(errp, "%s: Invalid message", __func__);
3614f97558eSzhanghailiang         return msg;
3624f97558eSzhanghailiang     }
363977c736fSMarkus Armbruster     trace_colo_receive_message(COLOMessage_str(msg));
3644f97558eSzhanghailiang     return msg;
3654f97558eSzhanghailiang }
3664f97558eSzhanghailiang 
colo_receive_check_message(QEMUFile * f,COLOMessage expect_msg,Error ** errp)3674f97558eSzhanghailiang static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
3684f97558eSzhanghailiang                                        Error **errp)
3694f97558eSzhanghailiang {
3704f97558eSzhanghailiang     COLOMessage msg;
3714f97558eSzhanghailiang     Error *local_err = NULL;
3724f97558eSzhanghailiang 
3734f97558eSzhanghailiang     msg = colo_receive_message(f, &local_err);
3744f97558eSzhanghailiang     if (local_err) {
3754f97558eSzhanghailiang         error_propagate(errp, local_err);
3764f97558eSzhanghailiang         return;
3774f97558eSzhanghailiang     }
3784f97558eSzhanghailiang     if (msg != expect_msg) {
3794f97558eSzhanghailiang         error_setg(errp, "Unexpected COLO message %d, expected %d",
3804f97558eSzhanghailiang                           msg, expect_msg);
3814f97558eSzhanghailiang     }
3824f97558eSzhanghailiang }
3834f97558eSzhanghailiang 
colo_receive_message_value(QEMUFile * f,uint32_t expect_msg,Error ** errp)3844291d372Szhanghailiang static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
3854291d372Szhanghailiang                                            Error **errp)
3864291d372Szhanghailiang {
3874291d372Szhanghailiang     Error *local_err = NULL;
3884291d372Szhanghailiang     uint64_t value;
3894291d372Szhanghailiang     int ret;
3904291d372Szhanghailiang 
3914291d372Szhanghailiang     colo_receive_check_message(f, expect_msg, &local_err);
3924291d372Szhanghailiang     if (local_err) {
3934291d372Szhanghailiang         error_propagate(errp, local_err);
3944291d372Szhanghailiang         return 0;
3954291d372Szhanghailiang     }
3964291d372Szhanghailiang 
3974291d372Szhanghailiang     value = qemu_get_be64(f);
3984291d372Szhanghailiang     ret = qemu_file_get_error(f);
3994291d372Szhanghailiang     if (ret < 0) {
4004291d372Szhanghailiang         error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
401977c736fSMarkus Armbruster                          COLOMessage_str(expect_msg));
4024291d372Szhanghailiang     }
4034291d372Szhanghailiang     return value;
4044291d372Szhanghailiang }
4054291d372Szhanghailiang 
colo_do_checkpoint_transaction(MigrationState * s,QIOChannelBuffer * bioc,QEMUFile * fb)406a91246c9Szhanghailiang static int colo_do_checkpoint_transaction(MigrationState *s,
407a91246c9Szhanghailiang                                           QIOChannelBuffer *bioc,
408a91246c9Szhanghailiang                                           QEMUFile *fb)
4094f97558eSzhanghailiang {
4104f97558eSzhanghailiang     Error *local_err = NULL;
411a91246c9Szhanghailiang     int ret = -1;
4124f97558eSzhanghailiang 
4134f97558eSzhanghailiang     colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
4144f97558eSzhanghailiang                       &local_err);
4154f97558eSzhanghailiang     if (local_err) {
4164f97558eSzhanghailiang         goto out;
4174f97558eSzhanghailiang     }
4184f97558eSzhanghailiang 
4194f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
4204f97558eSzhanghailiang                     COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
4214f97558eSzhanghailiang     if (local_err) {
4224f97558eSzhanghailiang         goto out;
4234f97558eSzhanghailiang     }
424a91246c9Szhanghailiang     /* Reset channel-buffer directly */
425a91246c9Szhanghailiang     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
426a91246c9Szhanghailiang     bioc->usage = 0;
4274f97558eSzhanghailiang 
428195801d7SStefan Hajnoczi     bql_lock();
429b3f7f0c5Szhanghailiang     if (failover_get_state() != FAILOVER_STATUS_NONE) {
430195801d7SStefan Hajnoczi         bql_unlock();
431b3f7f0c5Szhanghailiang         goto out;
432b3f7f0c5Szhanghailiang     }
433a91246c9Szhanghailiang     vm_stop_force_state(RUN_STATE_COLO);
434195801d7SStefan Hajnoczi     bql_unlock();
435a91246c9Szhanghailiang     trace_colo_vm_state_change("run", "stop");
436b3f7f0c5Szhanghailiang     /*
437b3f7f0c5Szhanghailiang      * Failover request bh could be called after vm_stop_force_state(),
438b3f7f0c5Szhanghailiang      * So we need check failover_request_is_active() again.
439b3f7f0c5Szhanghailiang      */
440b3f7f0c5Szhanghailiang     if (failover_get_state() != FAILOVER_STATUS_NONE) {
441b3f7f0c5Szhanghailiang         goto out;
442b3f7f0c5Szhanghailiang     }
443195801d7SStefan Hajnoczi     bql_lock();
4443ebb9c4fSZhang Chen 
4458e48ac95SZhang Chen     replication_do_checkpoint_all(&local_err);
4468e48ac95SZhang Chen     if (local_err) {
447195801d7SStefan Hajnoczi         bql_unlock();
4488e48ac95SZhang Chen         goto out;
4498e48ac95SZhang Chen     }
4504f97558eSzhanghailiang 
4514f97558eSzhanghailiang     colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
4524f97558eSzhanghailiang     if (local_err) {
453195801d7SStefan Hajnoczi         bql_unlock();
4544f97558eSzhanghailiang         goto out;
4554f97558eSzhanghailiang     }
4563f6df99dSZhang Chen     /* Note: device state is saved into buffer */
4573f6df99dSZhang Chen     ret = qemu_save_device_state(fb);
4583f6df99dSZhang Chen 
459195801d7SStefan Hajnoczi     bql_unlock();
4603f6df99dSZhang Chen     if (ret < 0) {
4613f6df99dSZhang Chen         goto out;
4623f6df99dSZhang Chen     }
46391fe9a8dSRao, Lei 
46491fe9a8dSRao, Lei     if (migrate_auto_converge()) {
46591fe9a8dSRao, Lei         mig_throttle_counter_reset();
46691fe9a8dSRao, Lei     }
4673f6df99dSZhang Chen     /*
4683f6df99dSZhang Chen      * Only save VM's live state, which not including device state.
4693f6df99dSZhang Chen      * TODO: We may need a timeout mechanism to prevent COLO process
4703f6df99dSZhang Chen      * to be blocked here.
4713f6df99dSZhang Chen      */
4723f6df99dSZhang Chen     qemu_savevm_live_state(s->to_dst_file);
4733f6df99dSZhang Chen 
4743f6df99dSZhang Chen     qemu_fflush(fb);
4753f6df99dSZhang Chen 
476a91246c9Szhanghailiang     /*
477a91246c9Szhanghailiang      * We need the size of the VMstate data in Secondary side,
478a91246c9Szhanghailiang      * With which we can decide how much data should be read.
479a91246c9Szhanghailiang      */
480a91246c9Szhanghailiang     colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
481a91246c9Szhanghailiang                             bioc->usage, &local_err);
482a91246c9Szhanghailiang     if (local_err) {
483a91246c9Szhanghailiang         goto out;
484a91246c9Szhanghailiang     }
4854f97558eSzhanghailiang 
486a91246c9Szhanghailiang     qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
487be07a0edSJuan Quintela     ret = qemu_fflush(s->to_dst_file);
488a91246c9Szhanghailiang     if (ret < 0) {
489a91246c9Szhanghailiang         goto out;
490a91246c9Szhanghailiang     }
4914f97558eSzhanghailiang 
4924f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
4934f97558eSzhanghailiang                        COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
4944f97558eSzhanghailiang     if (local_err) {
4954f97558eSzhanghailiang         goto out;
4964f97558eSzhanghailiang     }
4974f97558eSzhanghailiang 
4984fa8ed25SLukas Straub     qemu_event_reset(&s->colo_checkpoint_event);
4994fa8ed25SLukas Straub     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
5004fa8ed25SLukas Straub     if (local_err) {
5014fa8ed25SLukas Straub         goto out;
5024fa8ed25SLukas Straub     }
5034fa8ed25SLukas Straub 
5044f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
5054f97558eSzhanghailiang                        COLO_MESSAGE_VMSTATE_LOADED, &local_err);
5064f97558eSzhanghailiang     if (local_err) {
5074f97558eSzhanghailiang         goto out;
5084f97558eSzhanghailiang     }
5094f97558eSzhanghailiang 
510a91246c9Szhanghailiang     ret = 0;
5114f97558eSzhanghailiang 
512195801d7SStefan Hajnoczi     bql_lock();
513a91246c9Szhanghailiang     vm_start();
514195801d7SStefan Hajnoczi     bql_unlock();
515a91246c9Szhanghailiang     trace_colo_vm_state_change("stop", "run");
516a91246c9Szhanghailiang 
5174f97558eSzhanghailiang out:
5184f97558eSzhanghailiang     if (local_err) {
5194f97558eSzhanghailiang         error_report_err(local_err);
5204f97558eSzhanghailiang     }
521a91246c9Szhanghailiang     return ret;
5224f97558eSzhanghailiang }
5234f97558eSzhanghailiang 
colo_compare_notify_checkpoint(Notifier * notifier,void * data)524131b2153SZhang Chen static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
525131b2153SZhang Chen {
5267395127fSSteve Sistare     colo_checkpoint_notify();
527131b2153SZhang Chen }
528131b2153SZhang Chen 
colo_process_checkpoint(MigrationState * s)5290b827d5eSzhanghailiang static void colo_process_checkpoint(MigrationState *s)
5300b827d5eSzhanghailiang {
531a91246c9Szhanghailiang     QIOChannelBuffer *bioc;
532a91246c9Szhanghailiang     QEMUFile *fb = NULL;
5334f97558eSzhanghailiang     Error *local_err = NULL;
5344f97558eSzhanghailiang     int ret;
5354f97558eSzhanghailiang 
5362b9f6bf3SRao, Lei     if (get_colo_mode() != COLO_MODE_PRIMARY) {
5375ed0decaSZhang Chen         error_report("COLO mode must be COLO_MODE_PRIMARY");
5385ed0decaSZhang Chen         return;
5395ed0decaSZhang Chen     }
5405ed0decaSZhang Chen 
541aef06085Szhanghailiang     failover_init_state();
542aef06085Szhanghailiang 
54356ba83d2Szhanghailiang     s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
54456ba83d2Szhanghailiang     if (!s->rp_state.from_dst_file) {
54556ba83d2Szhanghailiang         error_report("Open QEMUFile from_dst_file failed");
54656ba83d2Szhanghailiang         goto out;
54756ba83d2Szhanghailiang     }
54856ba83d2Szhanghailiang 
549131b2153SZhang Chen     packets_compare_notifier.notify = colo_compare_notify_checkpoint;
550131b2153SZhang Chen     colo_compare_register_notifier(&packets_compare_notifier);
551131b2153SZhang Chen 
5524f97558eSzhanghailiang     /*
5534f97558eSzhanghailiang      * Wait for Secondary finish loading VM states and enter COLO
5544f97558eSzhanghailiang      * restore.
5554f97558eSzhanghailiang      */
5564f97558eSzhanghailiang     colo_receive_check_message(s->rp_state.from_dst_file,
5574f97558eSzhanghailiang                        COLO_MESSAGE_CHECKPOINT_READY, &local_err);
5584f97558eSzhanghailiang     if (local_err) {
5594f97558eSzhanghailiang         goto out;
5604f97558eSzhanghailiang     }
561a91246c9Szhanghailiang     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
56277ef2dc1SDaniel P. Berrangé     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
563a91246c9Szhanghailiang     object_unref(OBJECT(bioc));
5644f97558eSzhanghailiang 
565195801d7SStefan Hajnoczi     bql_lock();
5668e48ac95SZhang Chen     replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
5678e48ac95SZhang Chen     if (local_err) {
568195801d7SStefan Hajnoczi         bql_unlock();
5698e48ac95SZhang Chen         goto out;
5708e48ac95SZhang Chen     }
5718e48ac95SZhang Chen 
5720b827d5eSzhanghailiang     vm_start();
573195801d7SStefan Hajnoczi     bql_unlock();
5740b827d5eSzhanghailiang     trace_colo_vm_state_change("stop", "run");
5750b827d5eSzhanghailiang 
5760e0f0479SZhang Chen     timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
577f94a858fSJuan Quintela               migrate_checkpoint_delay());
578479125d5Szhanghailiang 
5794f97558eSzhanghailiang     while (s->state == MIGRATION_STATUS_COLO) {
580b3f7f0c5Szhanghailiang         if (failover_get_state() != FAILOVER_STATUS_NONE) {
581b3f7f0c5Szhanghailiang             error_report("failover request");
582b3f7f0c5Szhanghailiang             goto out;
583b3f7f0c5Szhanghailiang         }
584b3f7f0c5Szhanghailiang 
585bb70b66eSLukas Straub         qemu_event_wait(&s->colo_checkpoint_event);
58618cc23d7Szhanghailiang 
5872518aec1Szhanghailiang         if (s->state != MIGRATION_STATUS_COLO) {
5882518aec1Szhanghailiang             goto out;
5892518aec1Szhanghailiang         }
590a91246c9Szhanghailiang         ret = colo_do_checkpoint_transaction(s, bioc, fb);
5914f97558eSzhanghailiang         if (ret < 0) {
5924f97558eSzhanghailiang             goto out;
5934f97558eSzhanghailiang         }
5944f97558eSzhanghailiang     }
5950b827d5eSzhanghailiang 
59656ba83d2Szhanghailiang out:
5974f97558eSzhanghailiang     /* Throw the unreported error message after exited from loop */
5984f97558eSzhanghailiang     if (local_err) {
5994f97558eSzhanghailiang         error_report_err(local_err);
6004f97558eSzhanghailiang     }
6014f97558eSzhanghailiang 
602a91246c9Szhanghailiang     if (fb) {
603a91246c9Szhanghailiang         qemu_fclose(fb);
604a91246c9Szhanghailiang     }
605a91246c9Szhanghailiang 
6069ecff6d6Szhanghailiang     /*
6079ecff6d6Szhanghailiang      * There are only two reasons we can get here, some error happened
6089ecff6d6Szhanghailiang      * or the user triggered failover.
6099ecff6d6Szhanghailiang      */
6109ecff6d6Szhanghailiang     switch (failover_get_state()) {
6111fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
6129ecff6d6Szhanghailiang         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6139ecff6d6Szhanghailiang                                   COLO_EXIT_REASON_REQUEST);
6149ecff6d6Szhanghailiang         break;
6159ecff6d6Szhanghailiang     default:
6163a43ac47SZhang Chen         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
6173a43ac47SZhang Chen                                   COLO_EXIT_REASON_ERROR);
6189ecff6d6Szhanghailiang     }
6199ecff6d6Szhanghailiang 
620c937b9a6Szhanghailiang     /* Hope this not to be too long to wait here */
621c937b9a6Szhanghailiang     qemu_sem_wait(&s->colo_exit_sem);
622c937b9a6Szhanghailiang     qemu_sem_destroy(&s->colo_exit_sem);
623131b2153SZhang Chen 
624131b2153SZhang Chen     /*
625131b2153SZhang Chen      * It is safe to unregister notifier after failover finished.
626131b2153SZhang Chen      * Besides, colo_delay_timer and colo_checkpoint_sem can't be
6273a4452d8Szhaolichang      * released before unregister notifier, or there will be use-after-free
628131b2153SZhang Chen      * error.
629131b2153SZhang Chen      */
630131b2153SZhang Chen     colo_compare_unregister_notifier(&packets_compare_notifier);
631131b2153SZhang Chen     timer_free(s->colo_delay_timer);
632bb70b66eSLukas Straub     qemu_event_destroy(&s->colo_checkpoint_event);
633131b2153SZhang Chen 
634c937b9a6Szhanghailiang     /*
635c937b9a6Szhanghailiang      * Must be called after failover BH is completed,
636c937b9a6Szhanghailiang      * Or the failover BH may shutdown the wrong fd that
637c937b9a6Szhanghailiang      * re-used by other threads after we release here.
638c937b9a6Szhanghailiang      */
63956ba83d2Szhanghailiang     if (s->rp_state.from_dst_file) {
64056ba83d2Szhanghailiang         qemu_fclose(s->rp_state.from_dst_file);
641ac183dacSRao, Lei         s->rp_state.from_dst_file = NULL;
64256ba83d2Szhanghailiang     }
6430b827d5eSzhanghailiang }
6440b827d5eSzhanghailiang 
migrate_start_colo_process(MigrationState * s)6450b827d5eSzhanghailiang void migrate_start_colo_process(MigrationState *s)
6460b827d5eSzhanghailiang {
647195801d7SStefan Hajnoczi     bql_unlock();
648bb70b66eSLukas Straub     qemu_event_init(&s->colo_checkpoint_event, false);
649479125d5Szhanghailiang     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
6507395127fSSteve Sistare                                 colo_checkpoint_notify_timer, NULL);
651479125d5Szhanghailiang 
652c937b9a6Szhanghailiang     qemu_sem_init(&s->colo_exit_sem, 0);
6530b827d5eSzhanghailiang     colo_process_checkpoint(s);
654195801d7SStefan Hajnoczi     bql_lock();
6550b827d5eSzhanghailiang }
65625d0c16fSzhanghailiang 
colo_incoming_process_checkpoint(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)6576ad8ad38Szhanghailiang static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
6586ad8ad38Szhanghailiang                       QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
6596ad8ad38Szhanghailiang {
6606ad8ad38Szhanghailiang     uint64_t total_size;
6616ad8ad38Szhanghailiang     uint64_t value;
6626ad8ad38Szhanghailiang     Error *local_err = NULL;
6636ad8ad38Szhanghailiang     int ret;
6646ad8ad38Szhanghailiang 
665195801d7SStefan Hajnoczi     bql_lock();
6666ad8ad38Szhanghailiang     vm_stop_force_state(RUN_STATE_COLO);
667195801d7SStefan Hajnoczi     bql_unlock();
6689c5c8ff2SRao, Lei     trace_colo_vm_state_change("run", "stop");
6696ad8ad38Szhanghailiang 
6706ad8ad38Szhanghailiang     /* FIXME: This is unnecessary for periodic checkpoint mode */
6716ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
6726ad8ad38Szhanghailiang                  &local_err);
6736ad8ad38Szhanghailiang     if (local_err) {
6746ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6756ad8ad38Szhanghailiang         return;
6766ad8ad38Szhanghailiang     }
6776ad8ad38Szhanghailiang 
6786ad8ad38Szhanghailiang     colo_receive_check_message(mis->from_src_file,
6796ad8ad38Szhanghailiang                        COLO_MESSAGE_VMSTATE_SEND, &local_err);
6806ad8ad38Szhanghailiang     if (local_err) {
6816ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6826ad8ad38Szhanghailiang         return;
6836ad8ad38Szhanghailiang     }
6846ad8ad38Szhanghailiang 
685195801d7SStefan Hajnoczi     bql_lock();
686786d8b8eSLukas Straub     cpu_synchronize_all_states();
6876ad8ad38Szhanghailiang     ret = qemu_loadvm_state_main(mis->from_src_file, mis);
688195801d7SStefan Hajnoczi     bql_unlock();
6896ad8ad38Szhanghailiang 
6906ad8ad38Szhanghailiang     if (ret < 0) {
6916ad8ad38Szhanghailiang         error_setg(errp, "Load VM's live state (ram) error");
6926ad8ad38Szhanghailiang         return;
6936ad8ad38Szhanghailiang     }
6946ad8ad38Szhanghailiang 
6956ad8ad38Szhanghailiang     value = colo_receive_message_value(mis->from_src_file,
6966ad8ad38Szhanghailiang                              COLO_MESSAGE_VMSTATE_SIZE, &local_err);
6976ad8ad38Szhanghailiang     if (local_err) {
6986ad8ad38Szhanghailiang         error_propagate(errp, local_err);
6996ad8ad38Szhanghailiang         return;
7006ad8ad38Szhanghailiang     }
7016ad8ad38Szhanghailiang 
7026ad8ad38Szhanghailiang     /*
7036ad8ad38Szhanghailiang      * Read VM device state data into channel buffer,
7046ad8ad38Szhanghailiang      * It's better to re-use the memory allocated.
7056ad8ad38Szhanghailiang      * Here we need to handle the channel buffer directly.
7066ad8ad38Szhanghailiang      */
7076ad8ad38Szhanghailiang     if (value > bioc->capacity) {
7086ad8ad38Szhanghailiang         bioc->capacity = value;
7096ad8ad38Szhanghailiang         bioc->data = g_realloc(bioc->data, bioc->capacity);
7106ad8ad38Szhanghailiang     }
7116ad8ad38Szhanghailiang     total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
7126ad8ad38Szhanghailiang     if (total_size != value) {
7136ad8ad38Szhanghailiang         error_setg(errp, "Got %" PRIu64 " VMState data, less than expected"
7146ad8ad38Szhanghailiang                     " %" PRIu64, total_size, value);
7156ad8ad38Szhanghailiang         return;
7166ad8ad38Szhanghailiang     }
7176ad8ad38Szhanghailiang     bioc->usage = total_size;
7186ad8ad38Szhanghailiang     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
7196ad8ad38Szhanghailiang 
7206ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
7216ad8ad38Szhanghailiang                  &local_err);
7226ad8ad38Szhanghailiang     if (local_err) {
7236ad8ad38Szhanghailiang         error_propagate(errp, local_err);
7246ad8ad38Szhanghailiang         return;
7256ad8ad38Szhanghailiang     }
7266ad8ad38Szhanghailiang 
727195801d7SStefan Hajnoczi     bql_lock();
7286ad8ad38Szhanghailiang     vmstate_loading = true;
72924fa16f8SLukas Straub     colo_flush_ram_cache();
7306ad8ad38Szhanghailiang     ret = qemu_load_device_state(fb);
7316ad8ad38Szhanghailiang     if (ret < 0) {
7326ad8ad38Szhanghailiang         error_setg(errp, "COLO: load device state failed");
73392c932deSLukas Straub         vmstate_loading = false;
734195801d7SStefan Hajnoczi         bql_unlock();
7356ad8ad38Szhanghailiang         return;
7366ad8ad38Szhanghailiang     }
7376ad8ad38Szhanghailiang 
7386ad8ad38Szhanghailiang     replication_get_error_all(&local_err);
7396ad8ad38Szhanghailiang     if (local_err) {
7406ad8ad38Szhanghailiang         error_propagate(errp, local_err);
74192c932deSLukas Straub         vmstate_loading = false;
742195801d7SStefan Hajnoczi         bql_unlock();
7436ad8ad38Szhanghailiang         return;
7446ad8ad38Szhanghailiang     }
7456ad8ad38Szhanghailiang 
7466ad8ad38Szhanghailiang     /* discard colo disk buffer */
7476ad8ad38Szhanghailiang     replication_do_checkpoint_all(&local_err);
7486ad8ad38Szhanghailiang     if (local_err) {
7496ad8ad38Szhanghailiang         error_propagate(errp, local_err);
75092c932deSLukas Straub         vmstate_loading = false;
751195801d7SStefan Hajnoczi         bql_unlock();
7526ad8ad38Szhanghailiang         return;
7536ad8ad38Szhanghailiang     }
7546ad8ad38Szhanghailiang     /* Notify all filters of all NIC to do checkpoint */
7556ad8ad38Szhanghailiang     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
7566ad8ad38Szhanghailiang 
7576ad8ad38Szhanghailiang     if (local_err) {
7586ad8ad38Szhanghailiang         error_propagate(errp, local_err);
75992c932deSLukas Straub         vmstate_loading = false;
760195801d7SStefan Hajnoczi         bql_unlock();
7616ad8ad38Szhanghailiang         return;
7626ad8ad38Szhanghailiang     }
7636ad8ad38Szhanghailiang 
7646ad8ad38Szhanghailiang     vmstate_loading = false;
7656ad8ad38Szhanghailiang     vm_start();
766195801d7SStefan Hajnoczi     bql_unlock();
7679c5c8ff2SRao, Lei     trace_colo_vm_state_change("stop", "run");
7686ad8ad38Szhanghailiang 
7696ad8ad38Szhanghailiang     if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
7706ad8ad38Szhanghailiang         return;
7716ad8ad38Szhanghailiang     }
7726ad8ad38Szhanghailiang 
7736ad8ad38Szhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
7746ad8ad38Szhanghailiang                  &local_err);
7756ad8ad38Szhanghailiang     error_propagate(errp, local_err);
7766ad8ad38Szhanghailiang }
7776ad8ad38Szhanghailiang 
colo_wait_handle_message(MigrationIncomingState * mis,QEMUFile * fb,QIOChannelBuffer * bioc,Error ** errp)7786ad8ad38Szhanghailiang static void colo_wait_handle_message(MigrationIncomingState *mis,
7796ad8ad38Szhanghailiang                 QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
7804f97558eSzhanghailiang {
7814f97558eSzhanghailiang     COLOMessage msg;
7824f97558eSzhanghailiang     Error *local_err = NULL;
7834f97558eSzhanghailiang 
7846ad8ad38Szhanghailiang     msg = colo_receive_message(mis->from_src_file, &local_err);
7854f97558eSzhanghailiang     if (local_err) {
7864f97558eSzhanghailiang         error_propagate(errp, local_err);
7874f97558eSzhanghailiang         return;
7884f97558eSzhanghailiang     }
7894f97558eSzhanghailiang 
7904f97558eSzhanghailiang     switch (msg) {
7914f97558eSzhanghailiang     case COLO_MESSAGE_CHECKPOINT_REQUEST:
7926ad8ad38Szhanghailiang         colo_incoming_process_checkpoint(mis, fb, bioc, errp);
7934f97558eSzhanghailiang         break;
7944f97558eSzhanghailiang     default:
7954f97558eSzhanghailiang         error_setg(errp, "Got unknown COLO message: %d", msg);
7964f97558eSzhanghailiang         break;
7974f97558eSzhanghailiang     }
7984f97558eSzhanghailiang }
7994f97558eSzhanghailiang 
colo_shutdown(void)800795969abSRao, Lei void colo_shutdown(void)
801795969abSRao, Lei {
802795969abSRao, Lei     MigrationIncomingState *mis = NULL;
803795969abSRao, Lei     MigrationState *s = NULL;
804795969abSRao, Lei 
805795969abSRao, Lei     switch (get_colo_mode()) {
806795969abSRao, Lei     case COLO_MODE_PRIMARY:
807795969abSRao, Lei         s = migrate_get_current();
808795969abSRao, Lei         qemu_event_set(&s->colo_checkpoint_event);
809795969abSRao, Lei         qemu_sem_post(&s->colo_exit_sem);
810795969abSRao, Lei         break;
811795969abSRao, Lei     case COLO_MODE_SECONDARY:
812795969abSRao, Lei         mis = migration_incoming_get_current();
813795969abSRao, Lei         qemu_sem_post(&mis->colo_incoming_sem);
814795969abSRao, Lei         break;
815795969abSRao, Lei     default:
816795969abSRao, Lei         break;
817795969abSRao, Lei     }
818795969abSRao, Lei }
819795969abSRao, Lei 
colo_process_incoming_thread(void * opaque)820d0a14a2bSVladimir Sementsov-Ogievskiy static void *colo_process_incoming_thread(void *opaque)
82125d0c16fSzhanghailiang {
82225d0c16fSzhanghailiang     MigrationIncomingState *mis = opaque;
8234291d372Szhanghailiang     QEMUFile *fb = NULL;
8244291d372Szhanghailiang     QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
8254f97558eSzhanghailiang     Error *local_err = NULL;
82625d0c16fSzhanghailiang 
82774637e6fSLidong Chen     rcu_register_thread();
828c937b9a6Szhanghailiang     qemu_sem_init(&mis->colo_incoming_sem, 0);
829c937b9a6Szhanghailiang 
83025d0c16fSzhanghailiang     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
83125d0c16fSzhanghailiang                       MIGRATION_STATUS_COLO);
83225d0c16fSzhanghailiang 
8332b9f6bf3SRao, Lei     if (get_colo_mode() != COLO_MODE_SECONDARY) {
8345ed0decaSZhang Chen         error_report("COLO mode must be COLO_MODE_SECONDARY");
8355ed0decaSZhang Chen         return NULL;
8365ed0decaSZhang Chen     }
8375ed0decaSZhang Chen 
8382cc637f1SLi Zhijian     /* Make sure all file formats throw away their mutable metadata */
8392cc637f1SLi Zhijian     bql_lock();
8402cc637f1SLi Zhijian     bdrv_activate_all(&local_err);
8412cc637f1SLi Zhijian     if (local_err) {
8422cc637f1SLi Zhijian         bql_unlock();
8432cc637f1SLi Zhijian         error_report_err(local_err);
8442cc637f1SLi Zhijian         return NULL;
8452cc637f1SLi Zhijian     }
8462cc637f1SLi Zhijian     bql_unlock();
8472cc637f1SLi Zhijian 
848aef06085Szhanghailiang     failover_init_state();
849aef06085Szhanghailiang 
85056ba83d2Szhanghailiang     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
85156ba83d2Szhanghailiang     if (!mis->to_src_file) {
85256ba83d2Szhanghailiang         error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
85356ba83d2Szhanghailiang         goto out;
85456ba83d2Szhanghailiang     }
85556ba83d2Szhanghailiang     /*
85656ba83d2Szhanghailiang      * Note: the communication between Primary side and Secondary side
85756ba83d2Szhanghailiang      * should be sequential, we set the fd to unblocked in migration incoming
85856ba83d2Szhanghailiang      * coroutine, and here we are in the COLO incoming thread, so it is ok to
85956ba83d2Szhanghailiang      * set the fd back to blocked.
86056ba83d2Szhanghailiang      */
86156ba83d2Szhanghailiang     qemu_file_set_blocking(mis->from_src_file, true);
86256ba83d2Szhanghailiang 
8630393031aSzhanghailiang     colo_incoming_start_dirty_log();
8640393031aSzhanghailiang 
8654291d372Szhanghailiang     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
86677ef2dc1SDaniel P. Berrangé     fb = qemu_file_new_input(QIO_CHANNEL(bioc));
8674291d372Szhanghailiang     object_unref(OBJECT(bioc));
8684291d372Szhanghailiang 
869195801d7SStefan Hajnoczi     bql_lock();
8708e48ac95SZhang Chen     replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
8718e48ac95SZhang Chen     if (local_err) {
872195801d7SStefan Hajnoczi         bql_unlock();
8738e48ac95SZhang Chen         goto out;
8748e48ac95SZhang Chen     }
875131b2153SZhang Chen     vm_start();
876195801d7SStefan Hajnoczi     bql_unlock();
8779c5c8ff2SRao, Lei     trace_colo_vm_state_change("stop", "run");
878131b2153SZhang Chen 
8794f97558eSzhanghailiang     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
8804f97558eSzhanghailiang                       &local_err);
8814f97558eSzhanghailiang     if (local_err) {
8824f97558eSzhanghailiang         goto out;
8834f97558eSzhanghailiang     }
8844f97558eSzhanghailiang 
8854f97558eSzhanghailiang     while (mis->state == MIGRATION_STATUS_COLO) {
8866ad8ad38Szhanghailiang         colo_wait_handle_message(mis, fb, bioc, &local_err);
8874f97558eSzhanghailiang         if (local_err) {
8886ad8ad38Szhanghailiang             error_report_err(local_err);
8896ad8ad38Szhanghailiang             break;
8904f97558eSzhanghailiang         }
89192c932deSLukas Straub 
89292c932deSLukas Straub         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
89392c932deSLukas Straub             failover_set_state(FAILOVER_STATUS_RELAUNCH,
89492c932deSLukas Straub                             FAILOVER_STATUS_NONE);
89592c932deSLukas Straub             failover_request_active(NULL);
89692c932deSLukas Straub             break;
89792c932deSLukas Straub         }
89892c932deSLukas Straub 
8999d2db376Szhanghailiang         if (failover_get_state() != FAILOVER_STATUS_NONE) {
9009d2db376Szhanghailiang             error_report("failover request");
9016ad8ad38Szhanghailiang             break;
9024f97558eSzhanghailiang         }
9034f97558eSzhanghailiang     }
90425d0c16fSzhanghailiang 
90556ba83d2Szhanghailiang out:
9063a43ac47SZhang Chen     /*
9073a43ac47SZhang Chen      * There are only two reasons we can get here, some error happened
9083a43ac47SZhang Chen      * or the user triggered failover.
9093a43ac47SZhang Chen      */
9109ecff6d6Szhanghailiang     switch (failover_get_state()) {
9111fe6ab26SZhang Chen     case FAILOVER_STATUS_COMPLETED:
9129ecff6d6Szhanghailiang         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9139ecff6d6Szhanghailiang                                   COLO_EXIT_REASON_REQUEST);
9149ecff6d6Szhanghailiang         break;
9159ecff6d6Szhanghailiang     default:
9163a43ac47SZhang Chen         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
9173a43ac47SZhang Chen                                   COLO_EXIT_REASON_ERROR);
9189ecff6d6Szhanghailiang     }
9199ecff6d6Szhanghailiang 
9204291d372Szhanghailiang     if (fb) {
9214291d372Szhanghailiang         qemu_fclose(fb);
9224291d372Szhanghailiang     }
9234291d372Szhanghailiang 
924c937b9a6Szhanghailiang     /* Hope this not to be too long to loop here */
925c937b9a6Szhanghailiang     qemu_sem_wait(&mis->colo_incoming_sem);
926c937b9a6Szhanghailiang     qemu_sem_destroy(&mis->colo_incoming_sem);
92725d0c16fSzhanghailiang 
92874637e6fSLidong Chen     rcu_unregister_thread();
92925d0c16fSzhanghailiang     return NULL;
93025d0c16fSzhanghailiang }
931d0a14a2bSVladimir Sementsov-Ogievskiy 
colo_incoming_co(void)932d0a14a2bSVladimir Sementsov-Ogievskiy int coroutine_fn colo_incoming_co(void)
933d0a14a2bSVladimir Sementsov-Ogievskiy {
934d0a14a2bSVladimir Sementsov-Ogievskiy     MigrationIncomingState *mis = migration_incoming_get_current();
935d0a14a2bSVladimir Sementsov-Ogievskiy     QemuThread th;
936d0a14a2bSVladimir Sementsov-Ogievskiy 
937195801d7SStefan Hajnoczi     assert(bql_locked());
938d0a14a2bSVladimir Sementsov-Ogievskiy 
939d0a14a2bSVladimir Sementsov-Ogievskiy     if (!migration_incoming_colo_enabled()) {
940d0a14a2bSVladimir Sementsov-Ogievskiy         return 0;
941d0a14a2bSVladimir Sementsov-Ogievskiy     }
942d0a14a2bSVladimir Sementsov-Ogievskiy 
943d0a14a2bSVladimir Sementsov-Ogievskiy     qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread,
944d0a14a2bSVladimir Sementsov-Ogievskiy                        mis, QEMU_THREAD_JOINABLE);
945d0a14a2bSVladimir Sementsov-Ogievskiy 
946d0a14a2bSVladimir Sementsov-Ogievskiy     mis->colo_incoming_co = qemu_coroutine_self();
947d0a14a2bSVladimir Sementsov-Ogievskiy     qemu_coroutine_yield();
948d0a14a2bSVladimir Sementsov-Ogievskiy     mis->colo_incoming_co = NULL;
949d0a14a2bSVladimir Sementsov-Ogievskiy 
950195801d7SStefan Hajnoczi     bql_unlock();
951d0a14a2bSVladimir Sementsov-Ogievskiy     /* Wait checkpoint incoming thread exit before free resource */
952d0a14a2bSVladimir Sementsov-Ogievskiy     qemu_thread_join(&th);
953195801d7SStefan Hajnoczi     bql_lock();
954d0a14a2bSVladimir Sementsov-Ogievskiy 
955a4a411fbSStefan Hajnoczi     /* We hold the global BQL, so it is safe here */
956d0a14a2bSVladimir Sementsov-Ogievskiy     colo_release_ram_cache();
957d0a14a2bSVladimir Sementsov-Ogievskiy 
958d0a14a2bSVladimir Sementsov-Ogievskiy     return 0;
959d0a14a2bSVladimir Sementsov-Ogievskiy }
960