xref: /qemu/migration/savevm.c (revision 7a4e543d)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "qemu-common.h"
31 #include "hw/boards.h"
32 #include "hw/hw.h"
33 #include "hw/qdev.h"
34 #include "net/net.h"
35 #include "monitor/monitor.h"
36 #include "sysemu/sysemu.h"
37 #include "qemu/timer.h"
38 #include "audio/audio.h"
39 #include "migration/migration.h"
40 #include "migration/postcopy-ram.h"
41 #include "qapi/qmp/qerror.h"
42 #include "qemu/error-report.h"
43 #include "qemu/sockets.h"
44 #include "qemu/queue.h"
45 #include "sysemu/cpus.h"
46 #include "exec/memory.h"
47 #include "qmp-commands.h"
48 #include "trace.h"
49 #include "qemu/bitops.h"
50 #include "qemu/iov.h"
51 #include "block/snapshot.h"
52 #include "block/qapi.h"
53 
54 
55 #ifndef ETH_P_RARP
56 #define ETH_P_RARP 0x8035
57 #endif
58 #define ARP_HTYPE_ETH 0x0001
59 #define ARP_PTYPE_IP 0x0800
60 #define ARP_OP_REQUEST_REV 0x3
61 
62 const unsigned int postcopy_ram_discard_version = 0;
63 
64 static bool skip_section_footers;
65 
66 static struct mig_cmd_args {
67     ssize_t     len; /* -1 = variable */
68     const char *name;
69 } mig_cmd_args[] = {
70     [MIG_CMD_INVALID]          = { .len = -1, .name = "INVALID" },
71     [MIG_CMD_OPEN_RETURN_PATH] = { .len =  0, .name = "OPEN_RETURN_PATH" },
72     [MIG_CMD_PING]             = { .len = sizeof(uint32_t), .name = "PING" },
73     [MIG_CMD_POSTCOPY_ADVISE]  = { .len = 16, .name = "POSTCOPY_ADVISE" },
74     [MIG_CMD_POSTCOPY_LISTEN]  = { .len =  0, .name = "POSTCOPY_LISTEN" },
75     [MIG_CMD_POSTCOPY_RUN]     = { .len =  0, .name = "POSTCOPY_RUN" },
76     [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
77                                    .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
78     [MIG_CMD_PACKAGED]         = { .len =  4, .name = "PACKAGED" },
79     [MIG_CMD_MAX]              = { .len = -1, .name = "MAX" },
80 };
81 
82 static int announce_self_create(uint8_t *buf,
83                                 uint8_t *mac_addr)
84 {
85     /* Ethernet header. */
86     memset(buf, 0xff, 6);         /* destination MAC addr */
87     memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
88     *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
89 
90     /* RARP header. */
91     *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
92     *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
93     *(buf + 18) = 6; /* hardware addr length (ethernet) */
94     *(buf + 19) = 4; /* protocol addr length (IPv4) */
95     *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
96     memcpy(buf + 22, mac_addr, 6); /* source hw addr */
97     memset(buf + 28, 0x00, 4);     /* source protocol addr */
98     memcpy(buf + 32, mac_addr, 6); /* target hw addr */
99     memset(buf + 38, 0x00, 4);     /* target protocol addr */
100 
101     /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
102     memset(buf + 42, 0x00, 18);
103 
104     return 60; /* len (FCS will be added by hardware) */
105 }
106 
107 static void qemu_announce_self_iter(NICState *nic, void *opaque)
108 {
109     uint8_t buf[60];
110     int len;
111 
112     trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
113     len = announce_self_create(buf, nic->conf->macaddr.a);
114 
115     qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
116 }
117 
118 
119 static void qemu_announce_self_once(void *opaque)
120 {
121     static int count = SELF_ANNOUNCE_ROUNDS;
122     QEMUTimer *timer = *(QEMUTimer **)opaque;
123 
124     qemu_foreach_nic(qemu_announce_self_iter, NULL);
125 
126     if (--count) {
127         /* delay 50ms, 150ms, 250ms, ... */
128         timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
129                   self_announce_delay(count));
130     } else {
131             timer_del(timer);
132             timer_free(timer);
133     }
134 }
135 
136 void qemu_announce_self(void)
137 {
138     static QEMUTimer *timer;
139     timer = timer_new_ms(QEMU_CLOCK_REALTIME, qemu_announce_self_once, &timer);
140     qemu_announce_self_once(&timer);
141 }
142 
143 /***********************************************************/
144 /* savevm/loadvm support */
145 
146 static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
147                                    int64_t pos)
148 {
149     int ret;
150     QEMUIOVector qiov;
151 
152     qemu_iovec_init_external(&qiov, iov, iovcnt);
153     ret = bdrv_writev_vmstate(opaque, &qiov, pos);
154     if (ret < 0) {
155         return ret;
156     }
157 
158     return qiov.size;
159 }
160 
161 static ssize_t block_put_buffer(void *opaque, const uint8_t *buf,
162                                 int64_t pos, size_t size)
163 {
164     bdrv_save_vmstate(opaque, buf, pos, size);
165     return size;
166 }
167 
168 static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
169                                 size_t size)
170 {
171     return bdrv_load_vmstate(opaque, buf, pos, size);
172 }
173 
174 static int bdrv_fclose(void *opaque)
175 {
176     return bdrv_flush(opaque);
177 }
178 
179 static const QEMUFileOps bdrv_read_ops = {
180     .get_buffer = block_get_buffer,
181     .close =      bdrv_fclose
182 };
183 
184 static const QEMUFileOps bdrv_write_ops = {
185     .put_buffer     = block_put_buffer,
186     .writev_buffer  = block_writev_buffer,
187     .close          = bdrv_fclose
188 };
189 
190 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
191 {
192     if (is_writable) {
193         return qemu_fopen_ops(bs, &bdrv_write_ops);
194     }
195     return qemu_fopen_ops(bs, &bdrv_read_ops);
196 }
197 
198 
199 /* QEMUFile timer support.
200  * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
201  */
202 
203 void timer_put(QEMUFile *f, QEMUTimer *ts)
204 {
205     uint64_t expire_time;
206 
207     expire_time = timer_expire_time_ns(ts);
208     qemu_put_be64(f, expire_time);
209 }
210 
211 void timer_get(QEMUFile *f, QEMUTimer *ts)
212 {
213     uint64_t expire_time;
214 
215     expire_time = qemu_get_be64(f);
216     if (expire_time != -1) {
217         timer_mod_ns(ts, expire_time);
218     } else {
219         timer_del(ts);
220     }
221 }
222 
223 
224 /* VMState timer support.
225  * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
226  */
227 
228 static int get_timer(QEMUFile *f, void *pv, size_t size)
229 {
230     QEMUTimer *v = pv;
231     timer_get(f, v);
232     return 0;
233 }
234 
235 static void put_timer(QEMUFile *f, void *pv, size_t size)
236 {
237     QEMUTimer *v = pv;
238     timer_put(f, v);
239 }
240 
241 const VMStateInfo vmstate_info_timer = {
242     .name = "timer",
243     .get  = get_timer,
244     .put  = put_timer,
245 };
246 
247 
248 typedef struct CompatEntry {
249     char idstr[256];
250     int instance_id;
251 } CompatEntry;
252 
253 typedef struct SaveStateEntry {
254     QTAILQ_ENTRY(SaveStateEntry) entry;
255     char idstr[256];
256     int instance_id;
257     int alias_id;
258     int version_id;
259     int section_id;
260     SaveVMHandlers *ops;
261     const VMStateDescription *vmsd;
262     void *opaque;
263     CompatEntry *compat;
264     int is_ram;
265 } SaveStateEntry;
266 
267 typedef struct SaveState {
268     QTAILQ_HEAD(, SaveStateEntry) handlers;
269     int global_section_id;
270     bool skip_configuration;
271     uint32_t len;
272     const char *name;
273 } SaveState;
274 
275 static SaveState savevm_state = {
276     .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
277     .global_section_id = 0,
278     .skip_configuration = false,
279 };
280 
281 void savevm_skip_configuration(void)
282 {
283     savevm_state.skip_configuration = true;
284 }
285 
286 
287 static void configuration_pre_save(void *opaque)
288 {
289     SaveState *state = opaque;
290     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
291 
292     state->len = strlen(current_name);
293     state->name = current_name;
294 }
295 
296 static int configuration_post_load(void *opaque, int version_id)
297 {
298     SaveState *state = opaque;
299     const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
300 
301     if (strncmp(state->name, current_name, state->len) != 0) {
302         error_report("Machine type received is '%.*s' and local is '%s'",
303                      (int) state->len, state->name, current_name);
304         return -EINVAL;
305     }
306     return 0;
307 }
308 
309 static const VMStateDescription vmstate_configuration = {
310     .name = "configuration",
311     .version_id = 1,
312     .post_load = configuration_post_load,
313     .pre_save = configuration_pre_save,
314     .fields = (VMStateField[]) {
315         VMSTATE_UINT32(len, SaveState),
316         VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
317         VMSTATE_END_OF_LIST()
318     },
319 };
320 
321 static void dump_vmstate_vmsd(FILE *out_file,
322                               const VMStateDescription *vmsd, int indent,
323                               bool is_subsection);
324 
325 static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
326                               int indent)
327 {
328     fprintf(out_file, "%*s{\n", indent, "");
329     indent += 2;
330     fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
331     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
332             field->version_id);
333     fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
334             field->field_exists ? "true" : "false");
335     fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
336     if (field->vmsd != NULL) {
337         fprintf(out_file, ",\n");
338         dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
339     }
340     fprintf(out_file, "\n%*s}", indent - 2, "");
341 }
342 
343 static void dump_vmstate_vmss(FILE *out_file,
344                               const VMStateDescription **subsection,
345                               int indent)
346 {
347     if (*subsection != NULL) {
348         dump_vmstate_vmsd(out_file, *subsection, indent, true);
349     }
350 }
351 
352 static void dump_vmstate_vmsd(FILE *out_file,
353                               const VMStateDescription *vmsd, int indent,
354                               bool is_subsection)
355 {
356     if (is_subsection) {
357         fprintf(out_file, "%*s{\n", indent, "");
358     } else {
359         fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
360     }
361     indent += 2;
362     fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
363     fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
364             vmsd->version_id);
365     fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
366             vmsd->minimum_version_id);
367     if (vmsd->fields != NULL) {
368         const VMStateField *field = vmsd->fields;
369         bool first;
370 
371         fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
372         first = true;
373         while (field->name != NULL) {
374             if (field->flags & VMS_MUST_EXIST) {
375                 /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
376                 field++;
377                 continue;
378             }
379             if (!first) {
380                 fprintf(out_file, ",\n");
381             }
382             dump_vmstate_vmsf(out_file, field, indent + 2);
383             field++;
384             first = false;
385         }
386         fprintf(out_file, "\n%*s]", indent, "");
387     }
388     if (vmsd->subsections != NULL) {
389         const VMStateDescription **subsection = vmsd->subsections;
390         bool first;
391 
392         fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
393         first = true;
394         while (*subsection != NULL) {
395             if (!first) {
396                 fprintf(out_file, ",\n");
397             }
398             dump_vmstate_vmss(out_file, subsection, indent + 2);
399             subsection++;
400             first = false;
401         }
402         fprintf(out_file, "\n%*s]", indent, "");
403     }
404     fprintf(out_file, "\n%*s}", indent - 2, "");
405 }
406 
407 static void dump_machine_type(FILE *out_file)
408 {
409     MachineClass *mc;
410 
411     mc = MACHINE_GET_CLASS(current_machine);
412 
413     fprintf(out_file, "  \"vmschkmachine\": {\n");
414     fprintf(out_file, "    \"Name\": \"%s\"\n", mc->name);
415     fprintf(out_file, "  },\n");
416 }
417 
418 void dump_vmstate_json_to_file(FILE *out_file)
419 {
420     GSList *list, *elt;
421     bool first;
422 
423     fprintf(out_file, "{\n");
424     dump_machine_type(out_file);
425 
426     first = true;
427     list = object_class_get_list(TYPE_DEVICE, true);
428     for (elt = list; elt; elt = elt->next) {
429         DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
430                                              TYPE_DEVICE);
431         const char *name;
432         int indent = 2;
433 
434         if (!dc->vmsd) {
435             continue;
436         }
437 
438         if (!first) {
439             fprintf(out_file, ",\n");
440         }
441         name = object_class_get_name(OBJECT_CLASS(dc));
442         fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
443         indent += 2;
444         fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
445         fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
446                 dc->vmsd->version_id);
447         fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
448                 dc->vmsd->minimum_version_id);
449 
450         dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
451 
452         fprintf(out_file, "\n%*s}", indent - 2, "");
453         first = false;
454     }
455     fprintf(out_file, "\n}\n");
456     fclose(out_file);
457 }
458 
459 static int calculate_new_instance_id(const char *idstr)
460 {
461     SaveStateEntry *se;
462     int instance_id = 0;
463 
464     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
465         if (strcmp(idstr, se->idstr) == 0
466             && instance_id <= se->instance_id) {
467             instance_id = se->instance_id + 1;
468         }
469     }
470     return instance_id;
471 }
472 
473 static int calculate_compat_instance_id(const char *idstr)
474 {
475     SaveStateEntry *se;
476     int instance_id = 0;
477 
478     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
479         if (!se->compat) {
480             continue;
481         }
482 
483         if (strcmp(idstr, se->compat->idstr) == 0
484             && instance_id <= se->compat->instance_id) {
485             instance_id = se->compat->instance_id + 1;
486         }
487     }
488     return instance_id;
489 }
490 
491 /* TODO: Individual devices generally have very little idea about the rest
492    of the system, so instance_id should be removed/replaced.
493    Meanwhile pass -1 as instance_id if you do not already have a clearly
494    distinguishing id for all instances of your device class. */
495 int register_savevm_live(DeviceState *dev,
496                          const char *idstr,
497                          int instance_id,
498                          int version_id,
499                          SaveVMHandlers *ops,
500                          void *opaque)
501 {
502     SaveStateEntry *se;
503 
504     se = g_new0(SaveStateEntry, 1);
505     se->version_id = version_id;
506     se->section_id = savevm_state.global_section_id++;
507     se->ops = ops;
508     se->opaque = opaque;
509     se->vmsd = NULL;
510     /* if this is a live_savem then set is_ram */
511     if (ops->save_live_setup != NULL) {
512         se->is_ram = 1;
513     }
514 
515     if (dev) {
516         char *id = qdev_get_dev_path(dev);
517         if (id) {
518             pstrcpy(se->idstr, sizeof(se->idstr), id);
519             pstrcat(se->idstr, sizeof(se->idstr), "/");
520             g_free(id);
521 
522             se->compat = g_new0(CompatEntry, 1);
523             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr);
524             se->compat->instance_id = instance_id == -1 ?
525                          calculate_compat_instance_id(idstr) : instance_id;
526             instance_id = -1;
527         }
528     }
529     pstrcat(se->idstr, sizeof(se->idstr), idstr);
530 
531     if (instance_id == -1) {
532         se->instance_id = calculate_new_instance_id(se->idstr);
533     } else {
534         se->instance_id = instance_id;
535     }
536     assert(!se->compat || se->instance_id == 0);
537     /* add at the end of list */
538     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
539     return 0;
540 }
541 
542 int register_savevm(DeviceState *dev,
543                     const char *idstr,
544                     int instance_id,
545                     int version_id,
546                     SaveStateHandler *save_state,
547                     LoadStateHandler *load_state,
548                     void *opaque)
549 {
550     SaveVMHandlers *ops = g_new0(SaveVMHandlers, 1);
551     ops->save_state = save_state;
552     ops->load_state = load_state;
553     return register_savevm_live(dev, idstr, instance_id, version_id,
554                                 ops, opaque);
555 }
556 
557 void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
558 {
559     SaveStateEntry *se, *new_se;
560     char id[256] = "";
561 
562     if (dev) {
563         char *path = qdev_get_dev_path(dev);
564         if (path) {
565             pstrcpy(id, sizeof(id), path);
566             pstrcat(id, sizeof(id), "/");
567             g_free(path);
568         }
569     }
570     pstrcat(id, sizeof(id), idstr);
571 
572     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
573         if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
574             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
575             g_free(se->compat);
576             g_free(se->ops);
577             g_free(se);
578         }
579     }
580 }
581 
582 int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
583                                    const VMStateDescription *vmsd,
584                                    void *opaque, int alias_id,
585                                    int required_for_version)
586 {
587     SaveStateEntry *se;
588 
589     /* If this triggers, alias support can be dropped for the vmsd. */
590     assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
591 
592     se = g_new0(SaveStateEntry, 1);
593     se->version_id = vmsd->version_id;
594     se->section_id = savevm_state.global_section_id++;
595     se->opaque = opaque;
596     se->vmsd = vmsd;
597     se->alias_id = alias_id;
598 
599     if (dev) {
600         char *id = qdev_get_dev_path(dev);
601         if (id) {
602             pstrcpy(se->idstr, sizeof(se->idstr), id);
603             pstrcat(se->idstr, sizeof(se->idstr), "/");
604             g_free(id);
605 
606             se->compat = g_new0(CompatEntry, 1);
607             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
608             se->compat->instance_id = instance_id == -1 ?
609                          calculate_compat_instance_id(vmsd->name) : instance_id;
610             instance_id = -1;
611         }
612     }
613     pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
614 
615     if (instance_id == -1) {
616         se->instance_id = calculate_new_instance_id(se->idstr);
617     } else {
618         se->instance_id = instance_id;
619     }
620     assert(!se->compat || se->instance_id == 0);
621     /* add at the end of list */
622     QTAILQ_INSERT_TAIL(&savevm_state.handlers, se, entry);
623     return 0;
624 }
625 
626 void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
627                         void *opaque)
628 {
629     SaveStateEntry *se, *new_se;
630 
631     QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
632         if (se->vmsd == vmsd && se->opaque == opaque) {
633             QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
634             g_free(se->compat);
635             g_free(se);
636         }
637     }
638 }
639 
640 static int vmstate_load(QEMUFile *f, SaveStateEntry *se, int version_id)
641 {
642     trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
643     if (!se->vmsd) {         /* Old style */
644         return se->ops->load_state(f, se->opaque, version_id);
645     }
646     return vmstate_load_state(f, se->vmsd, se->opaque, version_id);
647 }
648 
649 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
650 {
651     int64_t old_offset, size;
652 
653     old_offset = qemu_ftell_fast(f);
654     se->ops->save_state(f, se->opaque);
655     size = qemu_ftell_fast(f) - old_offset;
656 
657     if (vmdesc) {
658         json_prop_int(vmdesc, "size", size);
659         json_start_array(vmdesc, "fields");
660         json_start_object(vmdesc, NULL);
661         json_prop_str(vmdesc, "name", "data");
662         json_prop_int(vmdesc, "size", size);
663         json_prop_str(vmdesc, "type", "buffer");
664         json_end_object(vmdesc);
665         json_end_array(vmdesc);
666     }
667 }
668 
669 static void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
670 {
671     trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
672     if (!se->vmsd) {
673         vmstate_save_old_style(f, se, vmdesc);
674         return;
675     }
676     vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
677 }
678 
679 void savevm_skip_section_footers(void)
680 {
681     skip_section_footers = true;
682 }
683 
684 /*
685  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
686  */
687 static void save_section_header(QEMUFile *f, SaveStateEntry *se,
688                                 uint8_t section_type)
689 {
690     qemu_put_byte(f, section_type);
691     qemu_put_be32(f, se->section_id);
692 
693     if (section_type == QEMU_VM_SECTION_FULL ||
694         section_type == QEMU_VM_SECTION_START) {
695         /* ID string */
696         size_t len = strlen(se->idstr);
697         qemu_put_byte(f, len);
698         qemu_put_buffer(f, (uint8_t *)se->idstr, len);
699 
700         qemu_put_be32(f, se->instance_id);
701         qemu_put_be32(f, se->version_id);
702     }
703 }
704 
705 /*
706  * Write a footer onto device sections that catches cases misformatted device
707  * sections.
708  */
709 static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
710 {
711     if (!skip_section_footers) {
712         qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
713         qemu_put_be32(f, se->section_id);
714     }
715 }
716 
717 /**
718  * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
719  *                           command and associated data.
720  *
721  * @f: File to send command on
722  * @command: Command type to send
723  * @len: Length of associated data
724  * @data: Data associated with command.
725  */
726 void qemu_savevm_command_send(QEMUFile *f,
727                               enum qemu_vm_cmd command,
728                               uint16_t len,
729                               uint8_t *data)
730 {
731     trace_savevm_command_send(command, len);
732     qemu_put_byte(f, QEMU_VM_COMMAND);
733     qemu_put_be16(f, (uint16_t)command);
734     qemu_put_be16(f, len);
735     qemu_put_buffer(f, data, len);
736     qemu_fflush(f);
737 }
738 
739 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
740 {
741     uint32_t buf;
742 
743     trace_savevm_send_ping(value);
744     buf = cpu_to_be32(value);
745     qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
746 }
747 
748 void qemu_savevm_send_open_return_path(QEMUFile *f)
749 {
750     trace_savevm_send_open_return_path();
751     qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
752 }
753 
754 /* We have a buffer of data to send; we don't want that all to be loaded
755  * by the command itself, so the command contains just the length of the
756  * extra buffer that we then send straight after it.
757  * TODO: Must be a better way to organise that
758  *
759  * Returns:
760  *    0 on success
761  *    -ve on error
762  */
763 int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
764 {
765     size_t cur_iov;
766     size_t len = qsb_get_length(qsb);
767     uint32_t tmp;
768 
769     if (len > MAX_VM_CMD_PACKAGED_SIZE) {
770         error_report("%s: Unreasonably large packaged state: %zu",
771                      __func__, len);
772         return -1;
773     }
774 
775     tmp = cpu_to_be32(len);
776 
777     trace_qemu_savevm_send_packaged();
778     qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
779 
780     /* all the data follows (concatinating the iov's) */
781     for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
782         /* The iov entries are partially filled */
783         size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
784         len -= towrite;
785 
786         if (!towrite) {
787             break;
788         }
789 
790         qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
791     }
792 
793     return 0;
794 }
795 
796 /* Send prior to any postcopy transfer */
797 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
798 {
799     uint64_t tmp[2];
800     tmp[0] = cpu_to_be64(getpagesize());
801     tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
802 
803     trace_qemu_savevm_send_postcopy_advise();
804     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
805 }
806 
807 /* Sent prior to starting the destination running in postcopy, discard pages
808  * that have already been sent but redirtied on the source.
809  * CMD_POSTCOPY_RAM_DISCARD consist of:
810  *      byte   version (0)
811  *      byte   Length of name field (not including 0)
812  *  n x byte   RAM block name
813  *      byte   0 terminator (just for safety)
814  *  n x        Byte ranges within the named RAMBlock
815  *      be64   Start of the range
816  *      be64   Length
817  *
818  *  name:  RAMBlock name that these entries are part of
819  *  len: Number of page entries
820  *  start_list: 'len' addresses
821  *  length_list: 'len' addresses
822  *
823  */
824 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
825                                            uint16_t len,
826                                            uint64_t *start_list,
827                                            uint64_t *length_list)
828 {
829     uint8_t *buf;
830     uint16_t tmplen;
831     uint16_t t;
832     size_t name_len = strlen(name);
833 
834     trace_qemu_savevm_send_postcopy_ram_discard(name, len);
835     assert(name_len < 256);
836     buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
837     buf[0] = postcopy_ram_discard_version;
838     buf[1] = name_len;
839     memcpy(buf + 2, name, name_len);
840     tmplen = 2 + name_len;
841     buf[tmplen++] = '\0';
842 
843     for (t = 0; t < len; t++) {
844         cpu_to_be64w((uint64_t *)(buf + tmplen), start_list[t]);
845         tmplen += 8;
846         cpu_to_be64w((uint64_t *)(buf + tmplen), length_list[t]);
847         tmplen += 8;
848     }
849     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
850     g_free(buf);
851 }
852 
853 /* Get the destination into a state where it can receive postcopy data. */
854 void qemu_savevm_send_postcopy_listen(QEMUFile *f)
855 {
856     trace_savevm_send_postcopy_listen();
857     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
858 }
859 
860 /* Kick the destination into running */
861 void qemu_savevm_send_postcopy_run(QEMUFile *f)
862 {
863     trace_savevm_send_postcopy_run();
864     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
865 }
866 
867 bool qemu_savevm_state_blocked(Error **errp)
868 {
869     SaveStateEntry *se;
870 
871     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
872         if (se->vmsd && se->vmsd->unmigratable) {
873             error_setg(errp, "State blocked by non-migratable device '%s'",
874                        se->idstr);
875             return true;
876         }
877     }
878     return false;
879 }
880 
881 void qemu_savevm_state_header(QEMUFile *f)
882 {
883     trace_savevm_state_header();
884     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
885     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
886 
887     if (!savevm_state.skip_configuration) {
888         qemu_put_byte(f, QEMU_VM_CONFIGURATION);
889         vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
890     }
891 
892 }
893 
894 void qemu_savevm_state_begin(QEMUFile *f,
895                              const MigrationParams *params)
896 {
897     SaveStateEntry *se;
898     int ret;
899 
900     trace_savevm_state_begin();
901     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
902         if (!se->ops || !se->ops->set_params) {
903             continue;
904         }
905         se->ops->set_params(params, se->opaque);
906     }
907 
908     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
909         if (!se->ops || !se->ops->save_live_setup) {
910             continue;
911         }
912         if (se->ops && se->ops->is_active) {
913             if (!se->ops->is_active(se->opaque)) {
914                 continue;
915             }
916         }
917         save_section_header(f, se, QEMU_VM_SECTION_START);
918 
919         ret = se->ops->save_live_setup(f, se->opaque);
920         save_section_footer(f, se);
921         if (ret < 0) {
922             qemu_file_set_error(f, ret);
923             break;
924         }
925     }
926 }
927 
928 /*
929  * this function has three return values:
930  *   negative: there was one error, and we have -errno.
931  *   0 : We haven't finished, caller have to go again
932  *   1 : We have finished, we can go to complete phase
933  */
934 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
935 {
936     SaveStateEntry *se;
937     int ret = 1;
938 
939     trace_savevm_state_iterate();
940     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
941         if (!se->ops || !se->ops->save_live_iterate) {
942             continue;
943         }
944         if (se->ops && se->ops->is_active) {
945             if (!se->ops->is_active(se->opaque)) {
946                 continue;
947             }
948         }
949         /*
950          * In the postcopy phase, any device that doesn't know how to
951          * do postcopy should have saved it's state in the _complete
952          * call that's already run, it might get confused if we call
953          * iterate afterwards.
954          */
955         if (postcopy && !se->ops->save_live_complete_postcopy) {
956             continue;
957         }
958         if (qemu_file_rate_limit(f)) {
959             return 0;
960         }
961         trace_savevm_section_start(se->idstr, se->section_id);
962 
963         save_section_header(f, se, QEMU_VM_SECTION_PART);
964 
965         ret = se->ops->save_live_iterate(f, se->opaque);
966         trace_savevm_section_end(se->idstr, se->section_id, ret);
967         save_section_footer(f, se);
968 
969         if (ret < 0) {
970             qemu_file_set_error(f, ret);
971         }
972         if (ret <= 0) {
973             /* Do not proceed to the next vmstate before this one reported
974                completion of the current stage. This serializes the migration
975                and reduces the probability that a faster changing state is
976                synchronized over and over again. */
977             break;
978         }
979     }
980     return ret;
981 }
982 
983 static bool should_send_vmdesc(void)
984 {
985     MachineState *machine = MACHINE(qdev_get_machine());
986     bool in_postcopy = migration_in_postcopy(migrate_get_current());
987     return !machine->suppress_vmdesc && !in_postcopy;
988 }
989 
990 /*
991  * Calls the save_live_complete_postcopy methods
992  * causing the last few pages to be sent immediately and doing any associated
993  * cleanup.
994  * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
995  * all the other devices, but that happens at the point we switch to postcopy.
996  */
997 void qemu_savevm_state_complete_postcopy(QEMUFile *f)
998 {
999     SaveStateEntry *se;
1000     int ret;
1001 
1002     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1003         if (!se->ops || !se->ops->save_live_complete_postcopy) {
1004             continue;
1005         }
1006         if (se->ops && se->ops->is_active) {
1007             if (!se->ops->is_active(se->opaque)) {
1008                 continue;
1009             }
1010         }
1011         trace_savevm_section_start(se->idstr, se->section_id);
1012         /* Section type */
1013         qemu_put_byte(f, QEMU_VM_SECTION_END);
1014         qemu_put_be32(f, se->section_id);
1015 
1016         ret = se->ops->save_live_complete_postcopy(f, se->opaque);
1017         trace_savevm_section_end(se->idstr, se->section_id, ret);
1018         save_section_footer(f, se);
1019         if (ret < 0) {
1020             qemu_file_set_error(f, ret);
1021             return;
1022         }
1023     }
1024 
1025     qemu_put_byte(f, QEMU_VM_EOF);
1026     qemu_fflush(f);
1027 }
1028 
1029 void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
1030 {
1031     QJSON *vmdesc;
1032     int vmdesc_len;
1033     SaveStateEntry *se;
1034     int ret;
1035     bool in_postcopy = migration_in_postcopy(migrate_get_current());
1036 
1037     trace_savevm_state_complete_precopy();
1038 
1039     cpu_synchronize_all_states();
1040 
1041     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1042         if (!se->ops ||
1043             (in_postcopy && se->ops->save_live_complete_postcopy) ||
1044             (in_postcopy && !iterable_only) ||
1045             !se->ops->save_live_complete_precopy) {
1046             continue;
1047         }
1048 
1049         if (se->ops && se->ops->is_active) {
1050             if (!se->ops->is_active(se->opaque)) {
1051                 continue;
1052             }
1053         }
1054         trace_savevm_section_start(se->idstr, se->section_id);
1055 
1056         save_section_header(f, se, QEMU_VM_SECTION_END);
1057 
1058         ret = se->ops->save_live_complete_precopy(f, se->opaque);
1059         trace_savevm_section_end(se->idstr, se->section_id, ret);
1060         save_section_footer(f, se);
1061         if (ret < 0) {
1062             qemu_file_set_error(f, ret);
1063             return;
1064         }
1065     }
1066 
1067     if (iterable_only) {
1068         return;
1069     }
1070 
1071     vmdesc = qjson_new();
1072     json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
1073     json_start_array(vmdesc, "devices");
1074     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1075 
1076         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1077             continue;
1078         }
1079         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1080             trace_savevm_section_skip(se->idstr, se->section_id);
1081             continue;
1082         }
1083 
1084         trace_savevm_section_start(se->idstr, se->section_id);
1085 
1086         json_start_object(vmdesc, NULL);
1087         json_prop_str(vmdesc, "name", se->idstr);
1088         json_prop_int(vmdesc, "instance_id", se->instance_id);
1089 
1090         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1091 
1092         vmstate_save(f, se, vmdesc);
1093 
1094         json_end_object(vmdesc);
1095         trace_savevm_section_end(se->idstr, se->section_id, 0);
1096         save_section_footer(f, se);
1097     }
1098 
1099     if (!in_postcopy) {
1100         /* Postcopy stream will still be going */
1101         qemu_put_byte(f, QEMU_VM_EOF);
1102     }
1103 
1104     json_end_array(vmdesc);
1105     qjson_finish(vmdesc);
1106     vmdesc_len = strlen(qjson_get_str(vmdesc));
1107 
1108     if (should_send_vmdesc()) {
1109         qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
1110         qemu_put_be32(f, vmdesc_len);
1111         qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
1112     }
1113     object_unref(OBJECT(vmdesc));
1114 
1115     qemu_fflush(f);
1116 }
1117 
1118 /* Give an estimate of the amount left to be transferred,
1119  * the result is split into the amount for units that can and
1120  * for units that can't do postcopy.
1121  */
1122 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
1123                                uint64_t *res_non_postcopiable,
1124                                uint64_t *res_postcopiable)
1125 {
1126     SaveStateEntry *se;
1127 
1128     *res_non_postcopiable = 0;
1129     *res_postcopiable = 0;
1130 
1131 
1132     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1133         if (!se->ops || !se->ops->save_live_pending) {
1134             continue;
1135         }
1136         if (se->ops && se->ops->is_active) {
1137             if (!se->ops->is_active(se->opaque)) {
1138                 continue;
1139             }
1140         }
1141         se->ops->save_live_pending(f, se->opaque, max_size,
1142                                    res_non_postcopiable, res_postcopiable);
1143     }
1144 }
1145 
1146 void qemu_savevm_state_cleanup(void)
1147 {
1148     SaveStateEntry *se;
1149 
1150     trace_savevm_state_cleanup();
1151     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1152         if (se->ops && se->ops->cleanup) {
1153             se->ops->cleanup(se->opaque);
1154         }
1155     }
1156 }
1157 
1158 static int qemu_savevm_state(QEMUFile *f, Error **errp)
1159 {
1160     int ret;
1161     MigrationParams params = {
1162         .blk = 0,
1163         .shared = 0
1164     };
1165     MigrationState *ms = migrate_init(&params);
1166     ms->to_dst_file = f;
1167 
1168     if (qemu_savevm_state_blocked(errp)) {
1169         return -EINVAL;
1170     }
1171 
1172     qemu_mutex_unlock_iothread();
1173     qemu_savevm_state_header(f);
1174     qemu_savevm_state_begin(f, &params);
1175     qemu_mutex_lock_iothread();
1176 
1177     while (qemu_file_get_error(f) == 0) {
1178         if (qemu_savevm_state_iterate(f, false) > 0) {
1179             break;
1180         }
1181     }
1182 
1183     ret = qemu_file_get_error(f);
1184     if (ret == 0) {
1185         qemu_savevm_state_complete_precopy(f, false);
1186         ret = qemu_file_get_error(f);
1187     }
1188     qemu_savevm_state_cleanup();
1189     if (ret != 0) {
1190         error_setg_errno(errp, -ret, "Error while writing VM state");
1191     }
1192     return ret;
1193 }
1194 
1195 static int qemu_save_device_state(QEMUFile *f)
1196 {
1197     SaveStateEntry *se;
1198 
1199     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
1200     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
1201 
1202     cpu_synchronize_all_states();
1203 
1204     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1205         if (se->is_ram) {
1206             continue;
1207         }
1208         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
1209             continue;
1210         }
1211         if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
1212             continue;
1213         }
1214 
1215         save_section_header(f, se, QEMU_VM_SECTION_FULL);
1216 
1217         vmstate_save(f, se, NULL);
1218 
1219         save_section_footer(f, se);
1220     }
1221 
1222     qemu_put_byte(f, QEMU_VM_EOF);
1223 
1224     return qemu_file_get_error(f);
1225 }
1226 
1227 static SaveStateEntry *find_se(const char *idstr, int instance_id)
1228 {
1229     SaveStateEntry *se;
1230 
1231     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
1232         if (!strcmp(se->idstr, idstr) &&
1233             (instance_id == se->instance_id ||
1234              instance_id == se->alias_id))
1235             return se;
1236         /* Migrating from an older version? */
1237         if (strstr(se->idstr, idstr) && se->compat) {
1238             if (!strcmp(se->compat->idstr, idstr) &&
1239                 (instance_id == se->compat->instance_id ||
1240                  instance_id == se->alias_id))
1241                 return se;
1242         }
1243     }
1244     return NULL;
1245 }
1246 
1247 enum LoadVMExitCodes {
1248     /* Allow a command to quit all layers of nested loadvm loops */
1249     LOADVM_QUIT     =  1,
1250 };
1251 
1252 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
1253 
1254 /* ------ incoming postcopy messages ------ */
1255 /* 'advise' arrives before any transfers just to tell us that a postcopy
1256  * *might* happen - it might be skipped if precopy transferred everything
1257  * quickly.
1258  */
1259 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
1260 {
1261     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1262     uint64_t remote_hps, remote_tps;
1263 
1264     trace_loadvm_postcopy_handle_advise();
1265     if (ps != POSTCOPY_INCOMING_NONE) {
1266         error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
1267         return -1;
1268     }
1269 
1270     if (!postcopy_ram_supported_by_host()) {
1271         return -1;
1272     }
1273 
1274     remote_hps = qemu_get_be64(mis->from_src_file);
1275     if (remote_hps != getpagesize())  {
1276         /*
1277          * Some combinations of mismatch are probably possible but it gets
1278          * a bit more complicated.  In particular we need to place whole
1279          * host pages on the dest at once, and we need to ensure that we
1280          * handle dirtying to make sure we never end up sending part of
1281          * a hostpage on it's own.
1282          */
1283         error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
1284                      (int)remote_hps, getpagesize());
1285         return -1;
1286     }
1287 
1288     remote_tps = qemu_get_be64(mis->from_src_file);
1289     if (remote_tps != (1ul << qemu_target_page_bits())) {
1290         /*
1291          * Again, some differences could be dealt with, but for now keep it
1292          * simple.
1293          */
1294         error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
1295                      (int)remote_tps, 1 << qemu_target_page_bits());
1296         return -1;
1297     }
1298 
1299     if (ram_postcopy_incoming_init(mis)) {
1300         return -1;
1301     }
1302 
1303     postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
1304 
1305     return 0;
1306 }
1307 
1308 /* After postcopy we will be told to throw some pages away since they're
1309  * dirty and will have to be demand fetched.  Must happen before CPU is
1310  * started.
1311  * There can be 0..many of these messages, each encoding multiple pages.
1312  */
1313 static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
1314                                               uint16_t len)
1315 {
1316     int tmp;
1317     char ramid[256];
1318     PostcopyState ps = postcopy_state_get();
1319 
1320     trace_loadvm_postcopy_ram_handle_discard();
1321 
1322     switch (ps) {
1323     case POSTCOPY_INCOMING_ADVISE:
1324         /* 1st discard */
1325         tmp = postcopy_ram_prepare_discard(mis);
1326         if (tmp) {
1327             return tmp;
1328         }
1329         break;
1330 
1331     case POSTCOPY_INCOMING_DISCARD:
1332         /* Expected state */
1333         break;
1334 
1335     default:
1336         error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
1337                      ps);
1338         return -1;
1339     }
1340     /* We're expecting a
1341      *    Version (0)
1342      *    a RAM ID string (length byte, name, 0 term)
1343      *    then at least 1 16 byte chunk
1344     */
1345     if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
1346         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1347         return -1;
1348     }
1349 
1350     tmp = qemu_get_byte(mis->from_src_file);
1351     if (tmp != postcopy_ram_discard_version) {
1352         error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
1353         return -1;
1354     }
1355 
1356     if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
1357         error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
1358         return -1;
1359     }
1360     tmp = qemu_get_byte(mis->from_src_file);
1361     if (tmp != 0) {
1362         error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
1363         return -1;
1364     }
1365 
1366     len -= 3 + strlen(ramid);
1367     if (len % 16) {
1368         error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
1369         return -1;
1370     }
1371     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
1372     while (len) {
1373         uint64_t start_addr, block_length;
1374         start_addr = qemu_get_be64(mis->from_src_file);
1375         block_length = qemu_get_be64(mis->from_src_file);
1376 
1377         len -= 16;
1378         int ret = ram_discard_range(mis, ramid, start_addr,
1379                                     block_length);
1380         if (ret) {
1381             return ret;
1382         }
1383     }
1384     trace_loadvm_postcopy_ram_handle_discard_end();
1385 
1386     return 0;
1387 }
1388 
1389 /*
1390  * Triggered by a postcopy_listen command; this thread takes over reading
1391  * the input stream, leaving the main thread free to carry on loading the rest
1392  * of the device state (from RAM).
1393  * (TODO:This could do with being in a postcopy file - but there again it's
1394  * just another input loop, not that postcopy specific)
1395  */
1396 static void *postcopy_ram_listen_thread(void *opaque)
1397 {
1398     QEMUFile *f = opaque;
1399     MigrationIncomingState *mis = migration_incoming_get_current();
1400     int load_res;
1401 
1402     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
1403                                    MIGRATION_STATUS_POSTCOPY_ACTIVE);
1404     qemu_sem_post(&mis->listen_thread_sem);
1405     trace_postcopy_ram_listen_thread_start();
1406 
1407     /*
1408      * Because we're a thread and not a coroutine we can't yield
1409      * in qemu_file, and thus we must be blocking now.
1410      */
1411     qemu_file_set_blocking(f, true);
1412     load_res = qemu_loadvm_state_main(f, mis);
1413     /* And non-blocking again so we don't block in any cleanup */
1414     qemu_file_set_blocking(f, false);
1415 
1416     trace_postcopy_ram_listen_thread_exit();
1417     if (load_res < 0) {
1418         error_report("%s: loadvm failed: %d", __func__, load_res);
1419         qemu_file_set_error(f, load_res);
1420         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1421                                        MIGRATION_STATUS_FAILED);
1422     } else {
1423         /*
1424          * This looks good, but it's possible that the device loading in the
1425          * main thread hasn't finished yet, and so we might not be in 'RUN'
1426          * state yet; wait for the end of the main thread.
1427          */
1428         qemu_event_wait(&mis->main_thread_load_event);
1429     }
1430     postcopy_ram_incoming_cleanup(mis);
1431 
1432     if (load_res < 0) {
1433         /*
1434          * If something went wrong then we have a bad state so exit;
1435          * depending how far we got it might be possible at this point
1436          * to leave the guest running and fire MCEs for pages that never
1437          * arrived as a desperate recovery step.
1438          */
1439         exit(EXIT_FAILURE);
1440     }
1441 
1442     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1443                                    MIGRATION_STATUS_COMPLETED);
1444     /*
1445      * If everything has worked fine, then the main thread has waited
1446      * for us to start, and we're the last use of the mis.
1447      * (If something broke then qemu will have to exit anyway since it's
1448      * got a bad migration state).
1449      */
1450     migration_incoming_state_destroy();
1451 
1452 
1453     return NULL;
1454 }
1455 
1456 /* After this message we must be able to immediately receive postcopy data */
1457 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
1458 {
1459     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
1460     trace_loadvm_postcopy_handle_listen();
1461     if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
1462         error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
1463         return -1;
1464     }
1465     if (ps == POSTCOPY_INCOMING_ADVISE) {
1466         /*
1467          * A rare case, we entered listen without having to do any discards,
1468          * so do the setup that's normally done at the time of the 1st discard.
1469          */
1470         postcopy_ram_prepare_discard(mis);
1471     }
1472 
1473     /*
1474      * Sensitise RAM - can now generate requests for blocks that don't exist
1475      * However, at this point the CPU shouldn't be running, and the IO
1476      * shouldn't be doing anything yet so don't actually expect requests
1477      */
1478     if (postcopy_ram_enable_notify(mis)) {
1479         return -1;
1480     }
1481 
1482     if (mis->have_listen_thread) {
1483         error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
1484         return -1;
1485     }
1486 
1487     mis->have_listen_thread = true;
1488     /* Start up the listening thread and wait for it to signal ready */
1489     qemu_sem_init(&mis->listen_thread_sem, 0);
1490     qemu_thread_create(&mis->listen_thread, "postcopy/listen",
1491                        postcopy_ram_listen_thread, mis->from_src_file,
1492                        QEMU_THREAD_JOINABLE);
1493     qemu_sem_wait(&mis->listen_thread_sem);
1494     qemu_sem_destroy(&mis->listen_thread_sem);
1495 
1496     return 0;
1497 }
1498 
1499 /* After all discards we can start running and asking for pages */
1500 static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
1501 {
1502     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
1503     Error *local_err = NULL;
1504 
1505     trace_loadvm_postcopy_handle_run();
1506     if (ps != POSTCOPY_INCOMING_LISTENING) {
1507         error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
1508         return -1;
1509     }
1510 
1511     /* TODO we should move all of this lot into postcopy_ram.c or a shared code
1512      * in migration.c
1513      */
1514     cpu_synchronize_all_post_init();
1515 
1516     qemu_announce_self();
1517 
1518     /* Make sure all file formats flush their mutable metadata */
1519     bdrv_invalidate_cache_all(&local_err);
1520     if (local_err) {
1521         error_report_err(local_err);
1522         return -1;
1523     }
1524 
1525     trace_loadvm_postcopy_handle_run_cpu_sync();
1526     cpu_synchronize_all_post_init();
1527 
1528     trace_loadvm_postcopy_handle_run_vmstart();
1529 
1530     if (autostart) {
1531         /* Hold onto your hats, starting the CPU */
1532         vm_start();
1533     } else {
1534         /* leave it paused and let management decide when to start the CPU */
1535         runstate_set(RUN_STATE_PAUSED);
1536     }
1537 
1538     /* We need to finish reading the stream from the package
1539      * and also stop reading anything more from the stream that loaded the
1540      * package (since it's now being read by the listener thread).
1541      * LOADVM_QUIT will quit all the layers of nested loadvm loops.
1542      */
1543     return LOADVM_QUIT;
1544 }
1545 
1546 /**
1547  * Immediately following this command is a blob of data containing an embedded
1548  * chunk of migration stream; read it and load it.
1549  *
1550  * @mis: Incoming state
1551  * @length: Length of packaged data to read
1552  *
1553  * Returns: Negative values on error
1554  *
1555  */
1556 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
1557 {
1558     int ret;
1559     uint8_t *buffer;
1560     uint32_t length;
1561     QEMUSizedBuffer *qsb;
1562 
1563     length = qemu_get_be32(mis->from_src_file);
1564     trace_loadvm_handle_cmd_packaged(length);
1565 
1566     if (length > MAX_VM_CMD_PACKAGED_SIZE) {
1567         error_report("Unreasonably large packaged state: %u", length);
1568         return -1;
1569     }
1570     buffer = g_malloc0(length);
1571     ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
1572     if (ret != length) {
1573         g_free(buffer);
1574         error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d",
1575                      ret, length);
1576         return (ret < 0) ? ret : -EAGAIN;
1577     }
1578     trace_loadvm_handle_cmd_packaged_received(ret);
1579 
1580     /* Setup a dummy QEMUFile that actually reads from the buffer */
1581     qsb = qsb_create(buffer, length);
1582     g_free(buffer); /* Because qsb_create copies */
1583     if (!qsb) {
1584         error_report("Unable to create qsb");
1585     }
1586     QEMUFile *packf = qemu_bufopen("r", qsb);
1587 
1588     ret = qemu_loadvm_state_main(packf, mis);
1589     trace_loadvm_handle_cmd_packaged_main(ret);
1590     qemu_fclose(packf);
1591     qsb_free(qsb);
1592 
1593     return ret;
1594 }
1595 
1596 /*
1597  * Process an incoming 'QEMU_VM_COMMAND'
1598  * 0           just a normal return
1599  * LOADVM_QUIT All good, but exit the loop
1600  * <0          Error
1601  */
1602 static int loadvm_process_command(QEMUFile *f)
1603 {
1604     MigrationIncomingState *mis = migration_incoming_get_current();
1605     uint16_t cmd;
1606     uint16_t len;
1607     uint32_t tmp32;
1608 
1609     cmd = qemu_get_be16(f);
1610     len = qemu_get_be16(f);
1611 
1612     trace_loadvm_process_command(cmd, len);
1613     if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
1614         error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
1615         return -EINVAL;
1616     }
1617 
1618     if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
1619         error_report("%s received with bad length - expecting %zu, got %d",
1620                      mig_cmd_args[cmd].name,
1621                      (size_t)mig_cmd_args[cmd].len, len);
1622         return -ERANGE;
1623     }
1624 
1625     switch (cmd) {
1626     case MIG_CMD_OPEN_RETURN_PATH:
1627         if (mis->to_src_file) {
1628             error_report("CMD_OPEN_RETURN_PATH called when RP already open");
1629             /* Not really a problem, so don't give up */
1630             return 0;
1631         }
1632         mis->to_src_file = qemu_file_get_return_path(f);
1633         if (!mis->to_src_file) {
1634             error_report("CMD_OPEN_RETURN_PATH failed");
1635             return -1;
1636         }
1637         break;
1638 
1639     case MIG_CMD_PING:
1640         tmp32 = qemu_get_be32(f);
1641         trace_loadvm_process_command_ping(tmp32);
1642         if (!mis->to_src_file) {
1643             error_report("CMD_PING (0x%x) received with no return path",
1644                          tmp32);
1645             return -1;
1646         }
1647         migrate_send_rp_pong(mis, tmp32);
1648         break;
1649 
1650     case MIG_CMD_PACKAGED:
1651         return loadvm_handle_cmd_packaged(mis);
1652 
1653     case MIG_CMD_POSTCOPY_ADVISE:
1654         return loadvm_postcopy_handle_advise(mis);
1655 
1656     case MIG_CMD_POSTCOPY_LISTEN:
1657         return loadvm_postcopy_handle_listen(mis);
1658 
1659     case MIG_CMD_POSTCOPY_RUN:
1660         return loadvm_postcopy_handle_run(mis);
1661 
1662     case MIG_CMD_POSTCOPY_RAM_DISCARD:
1663         return loadvm_postcopy_ram_handle_discard(mis, len);
1664     }
1665 
1666     return 0;
1667 }
1668 
1669 struct LoadStateEntry {
1670     QLIST_ENTRY(LoadStateEntry) entry;
1671     SaveStateEntry *se;
1672     int section_id;
1673     int version_id;
1674 };
1675 
1676 /*
1677  * Read a footer off the wire and check that it matches the expected section
1678  *
1679  * Returns: true if the footer was good
1680  *          false if there is a problem (and calls error_report to say why)
1681  */
1682 static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
1683 {
1684     uint8_t read_mark;
1685     uint32_t read_section_id;
1686 
1687     if (skip_section_footers) {
1688         /* No footer to check */
1689         return true;
1690     }
1691 
1692     read_mark = qemu_get_byte(f);
1693 
1694     if (read_mark != QEMU_VM_SECTION_FOOTER) {
1695         error_report("Missing section footer for %s", le->se->idstr);
1696         return false;
1697     }
1698 
1699     read_section_id = qemu_get_be32(f);
1700     if (read_section_id != le->section_id) {
1701         error_report("Mismatched section id in footer for %s -"
1702                      " read 0x%x expected 0x%x",
1703                      le->se->idstr, read_section_id, le->section_id);
1704         return false;
1705     }
1706 
1707     /* All good */
1708     return true;
1709 }
1710 
1711 void loadvm_free_handlers(MigrationIncomingState *mis)
1712 {
1713     LoadStateEntry *le, *new_le;
1714 
1715     QLIST_FOREACH_SAFE(le, &mis->loadvm_handlers, entry, new_le) {
1716         QLIST_REMOVE(le, entry);
1717         g_free(le);
1718     }
1719 }
1720 
1721 static int
1722 qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
1723 {
1724     uint32_t instance_id, version_id, section_id;
1725     SaveStateEntry *se;
1726     LoadStateEntry *le;
1727     char idstr[256];
1728     int ret;
1729 
1730     /* Read section start */
1731     section_id = qemu_get_be32(f);
1732     if (!qemu_get_counted_string(f, idstr)) {
1733         error_report("Unable to read ID string for section %u",
1734                      section_id);
1735         return -EINVAL;
1736     }
1737     instance_id = qemu_get_be32(f);
1738     version_id = qemu_get_be32(f);
1739 
1740     trace_qemu_loadvm_state_section_startfull(section_id, idstr,
1741             instance_id, version_id);
1742     /* Find savevm section */
1743     se = find_se(idstr, instance_id);
1744     if (se == NULL) {
1745         error_report("Unknown savevm section or instance '%s' %d",
1746                      idstr, instance_id);
1747         return -EINVAL;
1748     }
1749 
1750     /* Validate version */
1751     if (version_id > se->version_id) {
1752         error_report("savevm: unsupported version %d for '%s' v%d",
1753                      version_id, idstr, se->version_id);
1754         return -EINVAL;
1755     }
1756 
1757     /* Add entry */
1758     le = g_malloc0(sizeof(*le));
1759 
1760     le->se = se;
1761     le->section_id = section_id;
1762     le->version_id = version_id;
1763     QLIST_INSERT_HEAD(&mis->loadvm_handlers, le, entry);
1764 
1765     ret = vmstate_load(f, le->se, le->version_id);
1766     if (ret < 0) {
1767         error_report("error while loading state for instance 0x%x of"
1768                      " device '%s'", instance_id, idstr);
1769         return ret;
1770     }
1771     if (!check_section_footer(f, le)) {
1772         return -EINVAL;
1773     }
1774 
1775     return 0;
1776 }
1777 
1778 static int
1779 qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
1780 {
1781     uint32_t section_id;
1782     LoadStateEntry *le;
1783     int ret;
1784 
1785     section_id = qemu_get_be32(f);
1786 
1787     trace_qemu_loadvm_state_section_partend(section_id);
1788     QLIST_FOREACH(le, &mis->loadvm_handlers, entry) {
1789         if (le->section_id == section_id) {
1790             break;
1791         }
1792     }
1793     if (le == NULL) {
1794         error_report("Unknown savevm section %d", section_id);
1795         return -EINVAL;
1796     }
1797 
1798     ret = vmstate_load(f, le->se, le->version_id);
1799     if (ret < 0) {
1800         error_report("error while loading state section id %d(%s)",
1801                      section_id, le->se->idstr);
1802         return ret;
1803     }
1804     if (!check_section_footer(f, le)) {
1805         return -EINVAL;
1806     }
1807 
1808     return 0;
1809 }
1810 
1811 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
1812 {
1813     uint8_t section_type;
1814     int ret;
1815 
1816     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
1817 
1818         trace_qemu_loadvm_state_section(section_type);
1819         switch (section_type) {
1820         case QEMU_VM_SECTION_START:
1821         case QEMU_VM_SECTION_FULL:
1822             ret = qemu_loadvm_section_start_full(f, mis);
1823             if (ret < 0) {
1824                 return ret;
1825             }
1826             break;
1827         case QEMU_VM_SECTION_PART:
1828         case QEMU_VM_SECTION_END:
1829             ret = qemu_loadvm_section_part_end(f, mis);
1830             if (ret < 0) {
1831                 return ret;
1832             }
1833             break;
1834         case QEMU_VM_COMMAND:
1835             ret = loadvm_process_command(f);
1836             trace_qemu_loadvm_state_section_command(ret);
1837             if ((ret < 0) || (ret & LOADVM_QUIT)) {
1838                 return ret;
1839             }
1840             break;
1841         default:
1842             error_report("Unknown savevm section type %d", section_type);
1843             return -EINVAL;
1844         }
1845     }
1846 
1847     return 0;
1848 }
1849 
1850 int qemu_loadvm_state(QEMUFile *f)
1851 {
1852     MigrationIncomingState *mis = migration_incoming_get_current();
1853     Error *local_err = NULL;
1854     unsigned int v;
1855     int ret;
1856 
1857     if (qemu_savevm_state_blocked(&local_err)) {
1858         error_report_err(local_err);
1859         return -EINVAL;
1860     }
1861 
1862     v = qemu_get_be32(f);
1863     if (v != QEMU_VM_FILE_MAGIC) {
1864         error_report("Not a migration stream");
1865         return -EINVAL;
1866     }
1867 
1868     v = qemu_get_be32(f);
1869     if (v == QEMU_VM_FILE_VERSION_COMPAT) {
1870         error_report("SaveVM v2 format is obsolete and don't work anymore");
1871         return -ENOTSUP;
1872     }
1873     if (v != QEMU_VM_FILE_VERSION) {
1874         error_report("Unsupported migration stream version");
1875         return -ENOTSUP;
1876     }
1877 
1878     if (!savevm_state.skip_configuration) {
1879         if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
1880             error_report("Configuration section missing");
1881             return -EINVAL;
1882         }
1883         ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
1884 
1885         if (ret) {
1886             return ret;
1887         }
1888     }
1889 
1890     ret = qemu_loadvm_state_main(f, mis);
1891     qemu_event_set(&mis->main_thread_load_event);
1892 
1893     trace_qemu_loadvm_state_post_main(ret);
1894 
1895     if (mis->have_listen_thread) {
1896         /* Listen thread still going, can't clean up yet */
1897         return ret;
1898     }
1899 
1900     if (ret == 0) {
1901         ret = qemu_file_get_error(f);
1902     }
1903 
1904     /*
1905      * Try to read in the VMDESC section as well, so that dumping tools that
1906      * intercept our migration stream have the chance to see it.
1907      */
1908 
1909     /* We've got to be careful; if we don't read the data and just shut the fd
1910      * then the sender can error if we close while it's still sending.
1911      * We also mustn't read data that isn't there; some transports (RDMA)
1912      * will stall waiting for that data when the source has already closed.
1913      */
1914     if (ret == 0 && should_send_vmdesc()) {
1915         uint8_t *buf;
1916         uint32_t size;
1917         uint8_t  section_type = qemu_get_byte(f);
1918 
1919         if (section_type != QEMU_VM_VMDESCRIPTION) {
1920             error_report("Expected vmdescription section, but got %d",
1921                          section_type);
1922             /*
1923              * It doesn't seem worth failing at this point since
1924              * we apparently have an otherwise valid VM state
1925              */
1926         } else {
1927             buf = g_malloc(0x1000);
1928             size = qemu_get_be32(f);
1929 
1930             while (size > 0) {
1931                 uint32_t read_chunk = MIN(size, 0x1000);
1932                 qemu_get_buffer(f, buf, read_chunk);
1933                 size -= read_chunk;
1934             }
1935             g_free(buf);
1936         }
1937     }
1938 
1939     cpu_synchronize_all_post_init();
1940 
1941     return ret;
1942 }
1943 
1944 void hmp_savevm(Monitor *mon, const QDict *qdict)
1945 {
1946     BlockDriverState *bs, *bs1;
1947     QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
1948     int ret;
1949     QEMUFile *f;
1950     int saved_vm_running;
1951     uint64_t vm_state_size;
1952     qemu_timeval tv;
1953     struct tm tm;
1954     const char *name = qdict_get_try_str(qdict, "name");
1955     Error *local_err = NULL;
1956     AioContext *aio_context;
1957 
1958     if (!bdrv_all_can_snapshot(&bs)) {
1959         monitor_printf(mon, "Device '%s' is writable but does not "
1960                        "support snapshots.\n", bdrv_get_device_name(bs));
1961         return;
1962     }
1963 
1964     /* Delete old snapshots of the same name */
1965     if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) {
1966         error_reportf_err(local_err,
1967                           "Error while deleting snapshot on device '%s': ",
1968                           bdrv_get_device_name(bs1));
1969         return;
1970     }
1971 
1972     bs = bdrv_all_find_vmstate_bs();
1973     if (bs == NULL) {
1974         monitor_printf(mon, "No block device can accept snapshots\n");
1975         return;
1976     }
1977     aio_context = bdrv_get_aio_context(bs);
1978 
1979     saved_vm_running = runstate_is_running();
1980 
1981     ret = global_state_store();
1982     if (ret) {
1983         monitor_printf(mon, "Error saving global state\n");
1984         return;
1985     }
1986     vm_stop(RUN_STATE_SAVE_VM);
1987 
1988     aio_context_acquire(aio_context);
1989 
1990     memset(sn, 0, sizeof(*sn));
1991 
1992     /* fill auxiliary fields */
1993     qemu_gettimeofday(&tv);
1994     sn->date_sec = tv.tv_sec;
1995     sn->date_nsec = tv.tv_usec * 1000;
1996     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1997 
1998     if (name) {
1999         ret = bdrv_snapshot_find(bs, old_sn, name);
2000         if (ret >= 0) {
2001             pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
2002             pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
2003         } else {
2004             pstrcpy(sn->name, sizeof(sn->name), name);
2005         }
2006     } else {
2007         /* cast below needed for OpenBSD where tv_sec is still 'long' */
2008         localtime_r((const time_t *)&tv.tv_sec, &tm);
2009         strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
2010     }
2011 
2012     /* save the VM state */
2013     f = qemu_fopen_bdrv(bs, 1);
2014     if (!f) {
2015         monitor_printf(mon, "Could not open VM state file\n");
2016         goto the_end;
2017     }
2018     ret = qemu_savevm_state(f, &local_err);
2019     vm_state_size = qemu_ftell(f);
2020     qemu_fclose(f);
2021     if (ret < 0) {
2022         error_report_err(local_err);
2023         goto the_end;
2024     }
2025 
2026     ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
2027     if (ret < 0) {
2028         monitor_printf(mon, "Error while creating snapshot on '%s'\n",
2029                        bdrv_get_device_name(bs));
2030     }
2031 
2032  the_end:
2033     aio_context_release(aio_context);
2034     if (saved_vm_running) {
2035         vm_start();
2036     }
2037 }
2038 
2039 void qmp_xen_save_devices_state(const char *filename, Error **errp)
2040 {
2041     QEMUFile *f;
2042     int saved_vm_running;
2043     int ret;
2044 
2045     saved_vm_running = runstate_is_running();
2046     vm_stop(RUN_STATE_SAVE_VM);
2047     global_state_store_running();
2048 
2049     f = qemu_fopen(filename, "wb");
2050     if (!f) {
2051         error_setg_file_open(errp, errno, filename);
2052         goto the_end;
2053     }
2054     ret = qemu_save_device_state(f);
2055     qemu_fclose(f);
2056     if (ret < 0) {
2057         error_setg(errp, QERR_IO_ERROR);
2058     }
2059 
2060  the_end:
2061     if (saved_vm_running) {
2062         vm_start();
2063     }
2064 }
2065 
2066 int load_vmstate(const char *name)
2067 {
2068     BlockDriverState *bs, *bs_vm_state;
2069     QEMUSnapshotInfo sn;
2070     QEMUFile *f;
2071     int ret;
2072     AioContext *aio_context;
2073 
2074     if (!bdrv_all_can_snapshot(&bs)) {
2075         error_report("Device '%s' is writable but does not support snapshots.",
2076                      bdrv_get_device_name(bs));
2077         return -ENOTSUP;
2078     }
2079     ret = bdrv_all_find_snapshot(name, &bs);
2080     if (ret < 0) {
2081         error_report("Device '%s' does not have the requested snapshot '%s'",
2082                      bdrv_get_device_name(bs), name);
2083         return ret;
2084     }
2085 
2086     bs_vm_state = bdrv_all_find_vmstate_bs();
2087     if (!bs_vm_state) {
2088         error_report("No block device supports snapshots");
2089         return -ENOTSUP;
2090     }
2091     aio_context = bdrv_get_aio_context(bs_vm_state);
2092 
2093     /* Don't even try to load empty VM states */
2094     aio_context_acquire(aio_context);
2095     ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
2096     aio_context_release(aio_context);
2097     if (ret < 0) {
2098         return ret;
2099     } else if (sn.vm_state_size == 0) {
2100         error_report("This is a disk-only snapshot. Revert to it offline "
2101             "using qemu-img.");
2102         return -EINVAL;
2103     }
2104 
2105     /* Flush all IO requests so they don't interfere with the new state.  */
2106     bdrv_drain_all();
2107 
2108     ret = bdrv_all_goto_snapshot(name, &bs);
2109     if (ret < 0) {
2110         error_report("Error %d while activating snapshot '%s' on '%s'",
2111                      ret, name, bdrv_get_device_name(bs));
2112         return ret;
2113     }
2114 
2115     /* restore the VM state */
2116     f = qemu_fopen_bdrv(bs_vm_state, 0);
2117     if (!f) {
2118         error_report("Could not open VM state file");
2119         return -EINVAL;
2120     }
2121 
2122     qemu_system_reset(VMRESET_SILENT);
2123     migration_incoming_state_new(f);
2124 
2125     aio_context_acquire(aio_context);
2126     ret = qemu_loadvm_state(f);
2127     qemu_fclose(f);
2128     aio_context_release(aio_context);
2129 
2130     migration_incoming_state_destroy();
2131     if (ret < 0) {
2132         error_report("Error %d while loading VM state", ret);
2133         return ret;
2134     }
2135 
2136     return 0;
2137 }
2138 
2139 void hmp_delvm(Monitor *mon, const QDict *qdict)
2140 {
2141     BlockDriverState *bs;
2142     Error *err;
2143     const char *name = qdict_get_str(qdict, "name");
2144 
2145     if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
2146         error_reportf_err(err,
2147                           "Error while deleting snapshot on device '%s': ",
2148                           bdrv_get_device_name(bs));
2149     }
2150 }
2151 
2152 void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
2153 {
2154     BlockDriverState *bs, *bs1;
2155     QEMUSnapshotInfo *sn_tab, *sn;
2156     int nb_sns, i;
2157     int total;
2158     int *available_snapshots;
2159     AioContext *aio_context;
2160 
2161     bs = bdrv_all_find_vmstate_bs();
2162     if (!bs) {
2163         monitor_printf(mon, "No available block device supports snapshots\n");
2164         return;
2165     }
2166     aio_context = bdrv_get_aio_context(bs);
2167 
2168     aio_context_acquire(aio_context);
2169     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2170     aio_context_release(aio_context);
2171 
2172     if (nb_sns < 0) {
2173         monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
2174         return;
2175     }
2176 
2177     if (nb_sns == 0) {
2178         monitor_printf(mon, "There is no snapshot available.\n");
2179         return;
2180     }
2181 
2182     available_snapshots = g_new0(int, nb_sns);
2183     total = 0;
2184     for (i = 0; i < nb_sns; i++) {
2185         if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) {
2186             available_snapshots[total] = i;
2187             total++;
2188         }
2189     }
2190 
2191     if (total > 0) {
2192         bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
2193         monitor_printf(mon, "\n");
2194         for (i = 0; i < total; i++) {
2195             sn = &sn_tab[available_snapshots[i]];
2196             bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
2197             monitor_printf(mon, "\n");
2198         }
2199     } else {
2200         monitor_printf(mon, "There is no suitable snapshot available\n");
2201     }
2202 
2203     g_free(sn_tab);
2204     g_free(available_snapshots);
2205 
2206 }
2207 
2208 void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
2209 {
2210     qemu_ram_set_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK,
2211                        memory_region_name(mr), dev);
2212 }
2213 
2214 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
2215 {
2216     qemu_ram_unset_idstr(memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK);
2217 }
2218 
2219 void vmstate_register_ram_global(MemoryRegion *mr)
2220 {
2221     vmstate_register_ram(mr, NULL);
2222 }
2223