1 /*
2 * Multifd common code
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/rcu.h"
16 #include "exec/target_page.h"
17 #include "sysemu/sysemu.h"
18 #include "exec/ramblock.h"
19 #include "qemu/error-report.h"
20 #include "qapi/error.h"
21 #include "file.h"
22 #include "migration.h"
23 #include "migration-stats.h"
24 #include "socket.h"
25 #include "tls.h"
26 #include "qemu-file.h"
27 #include "trace.h"
28 #include "multifd.h"
29 #include "threadinfo.h"
30 #include "options.h"
31 #include "qemu/yank.h"
32 #include "io/channel-file.h"
33 #include "io/channel-socket.h"
34 #include "yank_functions.h"
35
36 /* Multiple fd's */
37
38 #define MULTIFD_MAGIC 0x11223344U
39 #define MULTIFD_VERSION 1
40
41 typedef struct {
42 uint32_t magic;
43 uint32_t version;
44 unsigned char uuid[16]; /* QemuUUID */
45 uint8_t id;
46 uint8_t unused1[7]; /* Reserved for future use */
47 uint64_t unused2[4]; /* Reserved for future use */
48 } __attribute__((packed)) MultiFDInit_t;
49
50 struct {
51 MultiFDSendParams *params;
52 /* array of pages to sent */
53 MultiFDPages_t *pages;
54 /*
55 * Global number of generated multifd packets.
56 *
57 * Note that we used 'uintptr_t' because it'll naturally support atomic
58 * operations on both 32bit / 64 bits hosts. It means on 32bit systems
59 * multifd will overflow the packet_num easier, but that should be
60 * fine.
61 *
62 * Another option is to use QEMU's Stat64 then it'll be 64 bits on all
63 * hosts, however so far it does not support atomic fetch_add() yet.
64 * Make it easy for now.
65 */
66 uintptr_t packet_num;
67 /*
68 * Synchronization point past which no more channels will be
69 * created.
70 */
71 QemuSemaphore channels_created;
72 /* send channels ready */
73 QemuSemaphore channels_ready;
74 /*
75 * Have we already run terminate threads. There is a race when it
76 * happens that we got one error while we are exiting.
77 * We will use atomic operations. Only valid values are 0 and 1.
78 */
79 int exiting;
80 /* multifd ops */
81 MultiFDMethods *ops;
82 } *multifd_send_state;
83
84 struct {
85 MultiFDRecvParams *params;
86 MultiFDRecvData *data;
87 /* number of created threads */
88 int count;
89 /*
90 * This is always posted by the recv threads, the migration thread
91 * uses it to wait for recv threads to finish assigned tasks.
92 */
93 QemuSemaphore sem_sync;
94 /* global number of generated multifd packets */
95 uint64_t packet_num;
96 int exiting;
97 /* multifd ops */
98 MultiFDMethods *ops;
99 } *multifd_recv_state;
100
multifd_use_packets(void)101 static bool multifd_use_packets(void)
102 {
103 return !migrate_mapped_ram();
104 }
105
multifd_send_channel_created(void)106 void multifd_send_channel_created(void)
107 {
108 qemu_sem_post(&multifd_send_state->channels_created);
109 }
110
multifd_set_file_bitmap(MultiFDSendParams * p)111 static void multifd_set_file_bitmap(MultiFDSendParams *p)
112 {
113 MultiFDPages_t *pages = p->pages;
114
115 assert(pages->block);
116
117 for (int i = 0; i < p->pages->normal_num; i++) {
118 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
119 }
120
121 for (int i = p->pages->normal_num; i < p->pages->num; i++) {
122 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
123 }
124 }
125
126 /* Multifd without compression */
127
128 /**
129 * nocomp_send_setup: setup send side
130 *
131 * @p: Params for the channel that we are using
132 * @errp: pointer to an error
133 */
nocomp_send_setup(MultiFDSendParams * p,Error ** errp)134 static int nocomp_send_setup(MultiFDSendParams *p, Error **errp)
135 {
136 if (migrate_zero_copy_send()) {
137 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
138 }
139
140 return 0;
141 }
142
143 /**
144 * nocomp_send_cleanup: cleanup send side
145 *
146 * For no compression this function does nothing.
147 *
148 * @p: Params for the channel that we are using
149 * @errp: pointer to an error
150 */
nocomp_send_cleanup(MultiFDSendParams * p,Error ** errp)151 static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
152 {
153 return;
154 }
155
multifd_send_prepare_iovs(MultiFDSendParams * p)156 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
157 {
158 MultiFDPages_t *pages = p->pages;
159
160 for (int i = 0; i < pages->normal_num; i++) {
161 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
162 p->iov[p->iovs_num].iov_len = p->page_size;
163 p->iovs_num++;
164 }
165
166 p->next_packet_size = pages->normal_num * p->page_size;
167 }
168
169 /**
170 * nocomp_send_prepare: prepare date to be able to send
171 *
172 * For no compression we just have to calculate the size of the
173 * packet.
174 *
175 * Returns 0 for success or -1 for error
176 *
177 * @p: Params for the channel that we are using
178 * @errp: pointer to an error
179 */
nocomp_send_prepare(MultiFDSendParams * p,Error ** errp)180 static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
181 {
182 bool use_zero_copy_send = migrate_zero_copy_send();
183 int ret;
184
185 multifd_send_zero_page_detect(p);
186
187 if (!multifd_use_packets()) {
188 multifd_send_prepare_iovs(p);
189 multifd_set_file_bitmap(p);
190
191 return 0;
192 }
193
194 if (!use_zero_copy_send) {
195 /*
196 * Only !zerocopy needs the header in IOV; zerocopy will
197 * send it separately.
198 */
199 multifd_send_prepare_header(p);
200 }
201
202 multifd_send_prepare_iovs(p);
203 p->flags |= MULTIFD_FLAG_NOCOMP;
204
205 multifd_send_fill_packet(p);
206
207 if (use_zero_copy_send) {
208 /* Send header first, without zerocopy */
209 ret = qio_channel_write_all(p->c, (void *)p->packet,
210 p->packet_len, errp);
211 if (ret != 0) {
212 return -1;
213 }
214 }
215
216 return 0;
217 }
218
219 /**
220 * nocomp_recv_setup: setup receive side
221 *
222 * For no compression this function does nothing.
223 *
224 * Returns 0 for success or -1 for error
225 *
226 * @p: Params for the channel that we are using
227 * @errp: pointer to an error
228 */
nocomp_recv_setup(MultiFDRecvParams * p,Error ** errp)229 static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
230 {
231 return 0;
232 }
233
234 /**
235 * nocomp_recv_cleanup: setup receive side
236 *
237 * For no compression this function does nothing.
238 *
239 * @p: Params for the channel that we are using
240 */
nocomp_recv_cleanup(MultiFDRecvParams * p)241 static void nocomp_recv_cleanup(MultiFDRecvParams *p)
242 {
243 }
244
245 /**
246 * nocomp_recv: read the data from the channel
247 *
248 * For no compression we just need to read things into the correct place.
249 *
250 * Returns 0 for success or -1 for error
251 *
252 * @p: Params for the channel that we are using
253 * @errp: pointer to an error
254 */
nocomp_recv(MultiFDRecvParams * p,Error ** errp)255 static int nocomp_recv(MultiFDRecvParams *p, Error **errp)
256 {
257 uint32_t flags;
258
259 if (!multifd_use_packets()) {
260 return multifd_file_recv_data(p, errp);
261 }
262
263 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
264
265 if (flags != MULTIFD_FLAG_NOCOMP) {
266 error_setg(errp, "multifd %u: flags received %x flags expected %x",
267 p->id, flags, MULTIFD_FLAG_NOCOMP);
268 return -1;
269 }
270
271 multifd_recv_zero_page_process(p);
272
273 if (!p->normal_num) {
274 return 0;
275 }
276
277 for (int i = 0; i < p->normal_num; i++) {
278 p->iov[i].iov_base = p->host + p->normal[i];
279 p->iov[i].iov_len = p->page_size;
280 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
281 }
282 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
283 }
284
285 static MultiFDMethods multifd_nocomp_ops = {
286 .send_setup = nocomp_send_setup,
287 .send_cleanup = nocomp_send_cleanup,
288 .send_prepare = nocomp_send_prepare,
289 .recv_setup = nocomp_recv_setup,
290 .recv_cleanup = nocomp_recv_cleanup,
291 .recv = nocomp_recv
292 };
293
294 static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {
295 [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops,
296 };
297
multifd_register_ops(int method,MultiFDMethods * ops)298 void multifd_register_ops(int method, MultiFDMethods *ops)
299 {
300 assert(0 < method && method < MULTIFD_COMPRESSION__MAX);
301 multifd_ops[method] = ops;
302 }
303
304 /* Reset a MultiFDPages_t* object for the next use */
multifd_pages_reset(MultiFDPages_t * pages)305 static void multifd_pages_reset(MultiFDPages_t *pages)
306 {
307 /*
308 * We don't need to touch offset[] array, because it will be
309 * overwritten later when reused.
310 */
311 pages->num = 0;
312 pages->normal_num = 0;
313 pages->block = NULL;
314 }
315
multifd_send_initial_packet(MultiFDSendParams * p,Error ** errp)316 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
317 {
318 MultiFDInit_t msg = {};
319 size_t size = sizeof(msg);
320 int ret;
321
322 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
323 msg.version = cpu_to_be32(MULTIFD_VERSION);
324 msg.id = p->id;
325 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
326
327 ret = qio_channel_write_all(p->c, (char *)&msg, size, errp);
328 if (ret != 0) {
329 return -1;
330 }
331 stat64_add(&mig_stats.multifd_bytes, size);
332 return 0;
333 }
334
multifd_recv_initial_packet(QIOChannel * c,Error ** errp)335 static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
336 {
337 MultiFDInit_t msg;
338 int ret;
339
340 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
341 if (ret != 0) {
342 return -1;
343 }
344
345 msg.magic = be32_to_cpu(msg.magic);
346 msg.version = be32_to_cpu(msg.version);
347
348 if (msg.magic != MULTIFD_MAGIC) {
349 error_setg(errp, "multifd: received packet magic %x "
350 "expected %x", msg.magic, MULTIFD_MAGIC);
351 return -1;
352 }
353
354 if (msg.version != MULTIFD_VERSION) {
355 error_setg(errp, "multifd: received packet version %u "
356 "expected %u", msg.version, MULTIFD_VERSION);
357 return -1;
358 }
359
360 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
361 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
362 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
363
364 error_setg(errp, "multifd: received uuid '%s' and expected "
365 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
366 g_free(uuid);
367 g_free(msg_uuid);
368 return -1;
369 }
370
371 if (msg.id > migrate_multifd_channels()) {
372 error_setg(errp, "multifd: received channel id %u is greater than "
373 "number of channels %u", msg.id, migrate_multifd_channels());
374 return -1;
375 }
376
377 return msg.id;
378 }
379
multifd_pages_init(uint32_t n)380 static MultiFDPages_t *multifd_pages_init(uint32_t n)
381 {
382 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
383
384 pages->allocated = n;
385 pages->offset = g_new0(ram_addr_t, n);
386
387 return pages;
388 }
389
multifd_pages_clear(MultiFDPages_t * pages)390 static void multifd_pages_clear(MultiFDPages_t *pages)
391 {
392 multifd_pages_reset(pages);
393 pages->allocated = 0;
394 g_free(pages->offset);
395 pages->offset = NULL;
396 g_free(pages);
397 }
398
multifd_send_fill_packet(MultiFDSendParams * p)399 void multifd_send_fill_packet(MultiFDSendParams *p)
400 {
401 MultiFDPacket_t *packet = p->packet;
402 MultiFDPages_t *pages = p->pages;
403 uint64_t packet_num;
404 uint32_t zero_num = pages->num - pages->normal_num;
405 int i;
406
407 packet->flags = cpu_to_be32(p->flags);
408 packet->pages_alloc = cpu_to_be32(p->pages->allocated);
409 packet->normal_pages = cpu_to_be32(pages->normal_num);
410 packet->zero_pages = cpu_to_be32(zero_num);
411 packet->next_packet_size = cpu_to_be32(p->next_packet_size);
412
413 packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
414 packet->packet_num = cpu_to_be64(packet_num);
415
416 if (pages->block) {
417 strncpy(packet->ramblock, pages->block->idstr, 256);
418 }
419
420 for (i = 0; i < pages->num; i++) {
421 /* there are architectures where ram_addr_t is 32 bit */
422 uint64_t temp = pages->offset[i];
423
424 packet->offset[i] = cpu_to_be64(temp);
425 }
426
427 p->packets_sent++;
428 p->total_normal_pages += pages->normal_num;
429 p->total_zero_pages += zero_num;
430
431 trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num,
432 p->flags, p->next_packet_size);
433 }
434
multifd_recv_unfill_packet(MultiFDRecvParams * p,Error ** errp)435 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
436 {
437 MultiFDPacket_t *packet = p->packet;
438 int i;
439
440 packet->magic = be32_to_cpu(packet->magic);
441 if (packet->magic != MULTIFD_MAGIC) {
442 error_setg(errp, "multifd: received packet "
443 "magic %x and expected magic %x",
444 packet->magic, MULTIFD_MAGIC);
445 return -1;
446 }
447
448 packet->version = be32_to_cpu(packet->version);
449 if (packet->version != MULTIFD_VERSION) {
450 error_setg(errp, "multifd: received packet "
451 "version %u and expected version %u",
452 packet->version, MULTIFD_VERSION);
453 return -1;
454 }
455
456 p->flags = be32_to_cpu(packet->flags);
457
458 packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
459 /*
460 * If we received a packet that is 100 times bigger than expected
461 * just stop migration. It is a magic number.
462 */
463 if (packet->pages_alloc > p->page_count) {
464 error_setg(errp, "multifd: received packet "
465 "with size %u and expected a size of %u",
466 packet->pages_alloc, p->page_count) ;
467 return -1;
468 }
469
470 p->normal_num = be32_to_cpu(packet->normal_pages);
471 if (p->normal_num > packet->pages_alloc) {
472 error_setg(errp, "multifd: received packet "
473 "with %u normal pages and expected maximum pages are %u",
474 p->normal_num, packet->pages_alloc) ;
475 return -1;
476 }
477
478 p->zero_num = be32_to_cpu(packet->zero_pages);
479 if (p->zero_num > packet->pages_alloc - p->normal_num) {
480 error_setg(errp, "multifd: received packet "
481 "with %u zero pages and expected maximum zero pages are %u",
482 p->zero_num, packet->pages_alloc - p->normal_num) ;
483 return -1;
484 }
485
486 p->next_packet_size = be32_to_cpu(packet->next_packet_size);
487 p->packet_num = be64_to_cpu(packet->packet_num);
488 p->packets_recved++;
489 p->total_normal_pages += p->normal_num;
490 p->total_zero_pages += p->zero_num;
491
492 trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num,
493 p->flags, p->next_packet_size);
494
495 if (p->normal_num == 0 && p->zero_num == 0) {
496 return 0;
497 }
498
499 /* make sure that ramblock is 0 terminated */
500 packet->ramblock[255] = 0;
501 p->block = qemu_ram_block_by_name(packet->ramblock);
502 if (!p->block) {
503 error_setg(errp, "multifd: unknown ram block %s",
504 packet->ramblock);
505 return -1;
506 }
507
508 p->host = p->block->host;
509 for (i = 0; i < p->normal_num; i++) {
510 uint64_t offset = be64_to_cpu(packet->offset[i]);
511
512 if (offset > (p->block->used_length - p->page_size)) {
513 error_setg(errp, "multifd: offset too long %" PRIu64
514 " (max " RAM_ADDR_FMT ")",
515 offset, p->block->used_length);
516 return -1;
517 }
518 p->normal[i] = offset;
519 }
520
521 for (i = 0; i < p->zero_num; i++) {
522 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
523
524 if (offset > (p->block->used_length - p->page_size)) {
525 error_setg(errp, "multifd: offset too long %" PRIu64
526 " (max " RAM_ADDR_FMT ")",
527 offset, p->block->used_length);
528 return -1;
529 }
530 p->zero[i] = offset;
531 }
532
533 return 0;
534 }
535
multifd_send_should_exit(void)536 static bool multifd_send_should_exit(void)
537 {
538 return qatomic_read(&multifd_send_state->exiting);
539 }
540
multifd_recv_should_exit(void)541 static bool multifd_recv_should_exit(void)
542 {
543 return qatomic_read(&multifd_recv_state->exiting);
544 }
545
546 /*
547 * The migration thread can wait on either of the two semaphores. This
548 * function can be used to kick the main thread out of waiting on either of
549 * them. Should mostly only be called when something wrong happened with
550 * the current multifd send thread.
551 */
multifd_send_kick_main(MultiFDSendParams * p)552 static void multifd_send_kick_main(MultiFDSendParams *p)
553 {
554 qemu_sem_post(&p->sem_sync);
555 qemu_sem_post(&multifd_send_state->channels_ready);
556 }
557
558 /*
559 * How we use multifd_send_state->pages and channel->pages?
560 *
561 * We create a pages for each channel, and a main one. Each time that
562 * we need to send a batch of pages we interchange the ones between
563 * multifd_send_state and the channel that is sending it. There are
564 * two reasons for that:
565 * - to not have to do so many mallocs during migration
566 * - to make easier to know what to free at the end of migration
567 *
568 * This way we always know who is the owner of each "pages" struct,
569 * and we don't need any locking. It belongs to the migration thread
570 * or to the channel thread. Switching is safe because the migration
571 * thread is using the channel mutex when changing it, and the channel
572 * have to had finish with its own, otherwise pending_job can't be
573 * false.
574 *
575 * Returns true if succeed, false otherwise.
576 */
multifd_send_pages(void)577 static bool multifd_send_pages(void)
578 {
579 int i;
580 static int next_channel;
581 MultiFDSendParams *p = NULL; /* make happy gcc */
582 MultiFDPages_t *pages = multifd_send_state->pages;
583
584 if (multifd_send_should_exit()) {
585 return false;
586 }
587
588 /* We wait here, until at least one channel is ready */
589 qemu_sem_wait(&multifd_send_state->channels_ready);
590
591 /*
592 * next_channel can remain from a previous migration that was
593 * using more channels, so ensure it doesn't overflow if the
594 * limit is lower now.
595 */
596 next_channel %= migrate_multifd_channels();
597 for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
598 if (multifd_send_should_exit()) {
599 return false;
600 }
601 p = &multifd_send_state->params[i];
602 /*
603 * Lockless read to p->pending_job is safe, because only multifd
604 * sender thread can clear it.
605 */
606 if (qatomic_read(&p->pending_job) == false) {
607 next_channel = (i + 1) % migrate_multifd_channels();
608 break;
609 }
610 }
611
612 /*
613 * Make sure we read p->pending_job before all the rest. Pairs with
614 * qatomic_store_release() in multifd_send_thread().
615 */
616 smp_mb_acquire();
617 assert(!p->pages->num);
618 multifd_send_state->pages = p->pages;
619 p->pages = pages;
620 /*
621 * Making sure p->pages is setup before marking pending_job=true. Pairs
622 * with the qatomic_load_acquire() in multifd_send_thread().
623 */
624 qatomic_store_release(&p->pending_job, true);
625 qemu_sem_post(&p->sem);
626
627 return true;
628 }
629
multifd_queue_empty(MultiFDPages_t * pages)630 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
631 {
632 return pages->num == 0;
633 }
634
multifd_queue_full(MultiFDPages_t * pages)635 static inline bool multifd_queue_full(MultiFDPages_t *pages)
636 {
637 return pages->num == pages->allocated;
638 }
639
multifd_enqueue(MultiFDPages_t * pages,ram_addr_t offset)640 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
641 {
642 pages->offset[pages->num++] = offset;
643 }
644
645 /* Returns true if enqueue successful, false otherwise */
multifd_queue_page(RAMBlock * block,ram_addr_t offset)646 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
647 {
648 MultiFDPages_t *pages;
649
650 retry:
651 pages = multifd_send_state->pages;
652
653 /* If the queue is empty, we can already enqueue now */
654 if (multifd_queue_empty(pages)) {
655 pages->block = block;
656 multifd_enqueue(pages, offset);
657 return true;
658 }
659
660 /*
661 * Not empty, meanwhile we need a flush. It can because of either:
662 *
663 * (1) The page is not on the same ramblock of previous ones, or,
664 * (2) The queue is full.
665 *
666 * After flush, always retry.
667 */
668 if (pages->block != block || multifd_queue_full(pages)) {
669 if (!multifd_send_pages()) {
670 return false;
671 }
672 goto retry;
673 }
674
675 /* Not empty, and we still have space, do it! */
676 multifd_enqueue(pages, offset);
677 return true;
678 }
679
680 /* Multifd send side hit an error; remember it and prepare to quit */
multifd_send_set_error(Error * err)681 static void multifd_send_set_error(Error *err)
682 {
683 /*
684 * We don't want to exit each threads twice. Depending on where
685 * we get the error, or if there are two independent errors in two
686 * threads at the same time, we can end calling this function
687 * twice.
688 */
689 if (qatomic_xchg(&multifd_send_state->exiting, 1)) {
690 return;
691 }
692
693 if (err) {
694 MigrationState *s = migrate_get_current();
695 migrate_set_error(s, err);
696 if (s->state == MIGRATION_STATUS_SETUP ||
697 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
698 s->state == MIGRATION_STATUS_DEVICE ||
699 s->state == MIGRATION_STATUS_ACTIVE) {
700 migrate_set_state(&s->state, s->state,
701 MIGRATION_STATUS_FAILED);
702 }
703 }
704 }
705
multifd_send_terminate_threads(void)706 static void multifd_send_terminate_threads(void)
707 {
708 int i;
709
710 trace_multifd_send_terminate_threads();
711
712 /*
713 * Tell everyone we're quitting. No xchg() needed here; we simply
714 * always set it.
715 */
716 qatomic_set(&multifd_send_state->exiting, 1);
717
718 /*
719 * Firstly, kick all threads out; no matter whether they are just idle,
720 * or blocked in an IO system call.
721 */
722 for (i = 0; i < migrate_multifd_channels(); i++) {
723 MultiFDSendParams *p = &multifd_send_state->params[i];
724
725 qemu_sem_post(&p->sem);
726 if (p->c) {
727 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
728 }
729 }
730
731 /*
732 * Finally recycle all the threads.
733 */
734 for (i = 0; i < migrate_multifd_channels(); i++) {
735 MultiFDSendParams *p = &multifd_send_state->params[i];
736
737 if (p->tls_thread_created) {
738 qemu_thread_join(&p->tls_thread);
739 }
740
741 if (p->thread_created) {
742 qemu_thread_join(&p->thread);
743 }
744 }
745 }
746
multifd_send_cleanup_channel(MultiFDSendParams * p,Error ** errp)747 static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
748 {
749 if (p->c) {
750 migration_ioc_unregister_yank(p->c);
751 /*
752 * The object_unref() cannot guarantee the fd will always be
753 * released because finalize() of the iochannel is only
754 * triggered on the last reference and it's not guaranteed
755 * that we always hold the last refcount when reaching here.
756 *
757 * Closing the fd explicitly has the benefit that if there is any
758 * registered I/O handler callbacks on such fd, that will get a
759 * POLLNVAL event and will further trigger the cleanup to finally
760 * release the IOC.
761 *
762 * FIXME: It should logically be guaranteed that all multifd
763 * channels have no I/O handler callback registered when reaching
764 * here, because migration thread will wait for all multifd channel
765 * establishments to complete during setup. Since
766 * migrate_fd_cleanup() will be scheduled in main thread too, all
767 * previous callbacks should guarantee to be completed when
768 * reaching here. See multifd_send_state.channels_created and its
769 * usage. In the future, we could replace this with an assert
770 * making sure we're the last reference, or simply drop it if above
771 * is more clear to be justified.
772 */
773 qio_channel_close(p->c, &error_abort);
774 object_unref(OBJECT(p->c));
775 p->c = NULL;
776 }
777 qemu_sem_destroy(&p->sem);
778 qemu_sem_destroy(&p->sem_sync);
779 g_free(p->name);
780 p->name = NULL;
781 multifd_pages_clear(p->pages);
782 p->pages = NULL;
783 p->packet_len = 0;
784 g_free(p->packet);
785 p->packet = NULL;
786 g_free(p->iov);
787 p->iov = NULL;
788 multifd_send_state->ops->send_cleanup(p, errp);
789
790 return *errp == NULL;
791 }
792
multifd_send_cleanup_state(void)793 static void multifd_send_cleanup_state(void)
794 {
795 file_cleanup_outgoing_migration();
796 socket_cleanup_outgoing_migration();
797 qemu_sem_destroy(&multifd_send_state->channels_created);
798 qemu_sem_destroy(&multifd_send_state->channels_ready);
799 g_free(multifd_send_state->params);
800 multifd_send_state->params = NULL;
801 multifd_pages_clear(multifd_send_state->pages);
802 multifd_send_state->pages = NULL;
803 g_free(multifd_send_state);
804 multifd_send_state = NULL;
805 }
806
multifd_send_shutdown(void)807 void multifd_send_shutdown(void)
808 {
809 int i;
810
811 if (!migrate_multifd()) {
812 return;
813 }
814
815 multifd_send_terminate_threads();
816
817 for (i = 0; i < migrate_multifd_channels(); i++) {
818 MultiFDSendParams *p = &multifd_send_state->params[i];
819 Error *local_err = NULL;
820
821 if (!multifd_send_cleanup_channel(p, &local_err)) {
822 migrate_set_error(migrate_get_current(), local_err);
823 error_free(local_err);
824 }
825 }
826
827 multifd_send_cleanup_state();
828 }
829
multifd_zero_copy_flush(QIOChannel * c)830 static int multifd_zero_copy_flush(QIOChannel *c)
831 {
832 int ret;
833 Error *err = NULL;
834
835 ret = qio_channel_flush(c, &err);
836 if (ret < 0) {
837 error_report_err(err);
838 return -1;
839 }
840 if (ret == 1) {
841 stat64_add(&mig_stats.dirty_sync_missed_zero_copy, 1);
842 }
843
844 return ret;
845 }
846
multifd_send_sync_main(void)847 int multifd_send_sync_main(void)
848 {
849 int i;
850 bool flush_zero_copy;
851
852 if (!migrate_multifd()) {
853 return 0;
854 }
855 if (multifd_send_state->pages->num) {
856 if (!multifd_send_pages()) {
857 error_report("%s: multifd_send_pages fail", __func__);
858 return -1;
859 }
860 }
861
862 flush_zero_copy = migrate_zero_copy_send();
863
864 for (i = 0; i < migrate_multifd_channels(); i++) {
865 MultiFDSendParams *p = &multifd_send_state->params[i];
866
867 if (multifd_send_should_exit()) {
868 return -1;
869 }
870
871 trace_multifd_send_sync_main_signal(p->id);
872
873 /*
874 * We should be the only user so far, so not possible to be set by
875 * others concurrently.
876 */
877 assert(qatomic_read(&p->pending_sync) == false);
878 qatomic_set(&p->pending_sync, true);
879 qemu_sem_post(&p->sem);
880 }
881 for (i = 0; i < migrate_multifd_channels(); i++) {
882 MultiFDSendParams *p = &multifd_send_state->params[i];
883
884 if (multifd_send_should_exit()) {
885 return -1;
886 }
887
888 qemu_sem_wait(&multifd_send_state->channels_ready);
889 trace_multifd_send_sync_main_wait(p->id);
890 qemu_sem_wait(&p->sem_sync);
891
892 if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
893 return -1;
894 }
895 }
896 trace_multifd_send_sync_main(multifd_send_state->packet_num);
897
898 return 0;
899 }
900
multifd_send_thread(void * opaque)901 static void *multifd_send_thread(void *opaque)
902 {
903 MultiFDSendParams *p = opaque;
904 MigrationThread *thread = NULL;
905 Error *local_err = NULL;
906 int ret = 0;
907 bool use_packets = multifd_use_packets();
908
909 thread = migration_threads_add(p->name, qemu_get_thread_id());
910
911 trace_multifd_send_thread_start(p->id);
912 rcu_register_thread();
913
914 if (use_packets) {
915 if (multifd_send_initial_packet(p, &local_err) < 0) {
916 ret = -1;
917 goto out;
918 }
919 }
920
921 while (true) {
922 qemu_sem_post(&multifd_send_state->channels_ready);
923 qemu_sem_wait(&p->sem);
924
925 if (multifd_send_should_exit()) {
926 break;
927 }
928
929 /*
930 * Read pending_job flag before p->pages. Pairs with the
931 * qatomic_store_release() in multifd_send_pages().
932 */
933 if (qatomic_load_acquire(&p->pending_job)) {
934 MultiFDPages_t *pages = p->pages;
935
936 p->iovs_num = 0;
937 assert(pages->num);
938
939 ret = multifd_send_state->ops->send_prepare(p, &local_err);
940 if (ret != 0) {
941 break;
942 }
943
944 if (migrate_mapped_ram()) {
945 ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num,
946 p->pages->block, &local_err);
947 } else {
948 ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num,
949 NULL, 0, p->write_flags,
950 &local_err);
951 }
952
953 if (ret != 0) {
954 break;
955 }
956
957 stat64_add(&mig_stats.multifd_bytes,
958 p->next_packet_size + p->packet_len);
959 stat64_add(&mig_stats.normal_pages, pages->normal_num);
960 stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num);
961
962 multifd_pages_reset(p->pages);
963 p->next_packet_size = 0;
964
965 /*
966 * Making sure p->pages is published before saying "we're
967 * free". Pairs with the smp_mb_acquire() in
968 * multifd_send_pages().
969 */
970 qatomic_store_release(&p->pending_job, false);
971 } else {
972 /*
973 * If not a normal job, must be a sync request. Note that
974 * pending_sync is a standalone flag (unlike pending_job), so
975 * it doesn't require explicit memory barriers.
976 */
977 assert(qatomic_read(&p->pending_sync));
978
979 if (use_packets) {
980 p->flags = MULTIFD_FLAG_SYNC;
981 multifd_send_fill_packet(p);
982 ret = qio_channel_write_all(p->c, (void *)p->packet,
983 p->packet_len, &local_err);
984 if (ret != 0) {
985 break;
986 }
987 /* p->next_packet_size will always be zero for a SYNC packet */
988 stat64_add(&mig_stats.multifd_bytes, p->packet_len);
989 p->flags = 0;
990 }
991
992 qatomic_set(&p->pending_sync, false);
993 qemu_sem_post(&p->sem_sync);
994 }
995 }
996
997 out:
998 if (ret) {
999 assert(local_err);
1000 trace_multifd_send_error(p->id);
1001 multifd_send_set_error(local_err);
1002 multifd_send_kick_main(p);
1003 error_free(local_err);
1004 }
1005
1006 rcu_unregister_thread();
1007 migration_threads_remove(thread);
1008 trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages,
1009 p->total_zero_pages);
1010
1011 return NULL;
1012 }
1013
1014 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque);
1015
1016 typedef struct {
1017 MultiFDSendParams *p;
1018 QIOChannelTLS *tioc;
1019 } MultiFDTLSThreadArgs;
1020
multifd_tls_handshake_thread(void * opaque)1021 static void *multifd_tls_handshake_thread(void *opaque)
1022 {
1023 MultiFDTLSThreadArgs *args = opaque;
1024
1025 qio_channel_tls_handshake(args->tioc,
1026 multifd_new_send_channel_async,
1027 args->p,
1028 NULL,
1029 NULL);
1030 g_free(args);
1031
1032 return NULL;
1033 }
1034
multifd_tls_channel_connect(MultiFDSendParams * p,QIOChannel * ioc,Error ** errp)1035 static bool multifd_tls_channel_connect(MultiFDSendParams *p,
1036 QIOChannel *ioc,
1037 Error **errp)
1038 {
1039 MigrationState *s = migrate_get_current();
1040 const char *hostname = s->hostname;
1041 MultiFDTLSThreadArgs *args;
1042 QIOChannelTLS *tioc;
1043
1044 tioc = migration_tls_client_create(ioc, hostname, errp);
1045 if (!tioc) {
1046 return false;
1047 }
1048
1049 /*
1050 * Ownership of the socket channel now transfers to the newly
1051 * created TLS channel, which has already taken a reference.
1052 */
1053 object_unref(OBJECT(ioc));
1054 trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
1055 qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
1056
1057 args = g_new0(MultiFDTLSThreadArgs, 1);
1058 args->tioc = tioc;
1059 args->p = p;
1060
1061 p->tls_thread_created = true;
1062 qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker",
1063 multifd_tls_handshake_thread, args,
1064 QEMU_THREAD_JOINABLE);
1065 return true;
1066 }
1067
multifd_channel_connect(MultiFDSendParams * p,QIOChannel * ioc)1068 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc)
1069 {
1070 qio_channel_set_delay(ioc, false);
1071
1072 migration_ioc_register_yank(ioc);
1073 /* Setup p->c only if the channel is completely setup */
1074 p->c = ioc;
1075
1076 p->thread_created = true;
1077 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
1078 QEMU_THREAD_JOINABLE);
1079 }
1080
1081 /*
1082 * When TLS is enabled this function is called once to establish the
1083 * TLS connection and a second time after the TLS handshake to create
1084 * the multifd channel. Without TLS it goes straight into the channel
1085 * creation.
1086 */
multifd_new_send_channel_async(QIOTask * task,gpointer opaque)1087 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
1088 {
1089 MultiFDSendParams *p = opaque;
1090 QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
1091 Error *local_err = NULL;
1092 bool ret;
1093
1094 trace_multifd_new_send_channel_async(p->id);
1095
1096 if (qio_task_propagate_error(task, &local_err)) {
1097 ret = false;
1098 goto out;
1099 }
1100
1101 trace_multifd_set_outgoing_channel(ioc, object_get_typename(OBJECT(ioc)),
1102 migrate_get_current()->hostname);
1103
1104 if (migrate_channel_requires_tls_upgrade(ioc)) {
1105 ret = multifd_tls_channel_connect(p, ioc, &local_err);
1106 if (ret) {
1107 return;
1108 }
1109 } else {
1110 multifd_channel_connect(p, ioc);
1111 ret = true;
1112 }
1113
1114 out:
1115 /*
1116 * Here we're not interested whether creation succeeded, only that
1117 * it happened at all.
1118 */
1119 multifd_send_channel_created();
1120
1121 if (ret) {
1122 return;
1123 }
1124
1125 trace_multifd_new_send_channel_async_error(p->id, local_err);
1126 multifd_send_set_error(local_err);
1127 /*
1128 * For error cases (TLS or non-TLS), IO channel is always freed here
1129 * rather than when cleanup multifd: since p->c is not set, multifd
1130 * cleanup code doesn't even know its existence.
1131 */
1132 object_unref(OBJECT(ioc));
1133 error_free(local_err);
1134 }
1135
multifd_new_send_channel_create(gpointer opaque,Error ** errp)1136 static bool multifd_new_send_channel_create(gpointer opaque, Error **errp)
1137 {
1138 if (!multifd_use_packets()) {
1139 return file_send_channel_create(opaque, errp);
1140 }
1141
1142 socket_send_channel_create(multifd_new_send_channel_async, opaque);
1143 return true;
1144 }
1145
multifd_send_setup(void)1146 bool multifd_send_setup(void)
1147 {
1148 MigrationState *s = migrate_get_current();
1149 Error *local_err = NULL;
1150 int thread_count, ret = 0;
1151 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
1152 bool use_packets = multifd_use_packets();
1153 uint8_t i;
1154
1155 if (!migrate_multifd()) {
1156 return true;
1157 }
1158
1159 thread_count = migrate_multifd_channels();
1160 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1161 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
1162 multifd_send_state->pages = multifd_pages_init(page_count);
1163 qemu_sem_init(&multifd_send_state->channels_created, 0);
1164 qemu_sem_init(&multifd_send_state->channels_ready, 0);
1165 qatomic_set(&multifd_send_state->exiting, 0);
1166 multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
1167
1168 for (i = 0; i < thread_count; i++) {
1169 MultiFDSendParams *p = &multifd_send_state->params[i];
1170
1171 qemu_sem_init(&p->sem, 0);
1172 qemu_sem_init(&p->sem_sync, 0);
1173 p->id = i;
1174 p->pages = multifd_pages_init(page_count);
1175
1176 if (use_packets) {
1177 p->packet_len = sizeof(MultiFDPacket_t)
1178 + sizeof(uint64_t) * page_count;
1179 p->packet = g_malloc0(p->packet_len);
1180 p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
1181 p->packet->version = cpu_to_be32(MULTIFD_VERSION);
1182
1183 /* We need one extra place for the packet header */
1184 p->iov = g_new0(struct iovec, page_count + 1);
1185 } else {
1186 p->iov = g_new0(struct iovec, page_count);
1187 }
1188 p->name = g_strdup_printf("multifdsend_%d", i);
1189 p->page_size = qemu_target_page_size();
1190 p->page_count = page_count;
1191 p->write_flags = 0;
1192
1193 if (!multifd_new_send_channel_create(p, &local_err)) {
1194 return false;
1195 }
1196 }
1197
1198 /*
1199 * Wait until channel creation has started for all channels. The
1200 * creation can still fail, but no more channels will be created
1201 * past this point.
1202 */
1203 for (i = 0; i < thread_count; i++) {
1204 qemu_sem_wait(&multifd_send_state->channels_created);
1205 }
1206
1207 for (i = 0; i < thread_count; i++) {
1208 MultiFDSendParams *p = &multifd_send_state->params[i];
1209
1210 ret = multifd_send_state->ops->send_setup(p, &local_err);
1211 if (ret) {
1212 break;
1213 }
1214 }
1215
1216 if (ret) {
1217 migrate_set_error(s, local_err);
1218 error_report_err(local_err);
1219 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1220 MIGRATION_STATUS_FAILED);
1221 return false;
1222 }
1223
1224 return true;
1225 }
1226
multifd_recv(void)1227 bool multifd_recv(void)
1228 {
1229 int i;
1230 static int next_recv_channel;
1231 MultiFDRecvParams *p = NULL;
1232 MultiFDRecvData *data = multifd_recv_state->data;
1233
1234 /*
1235 * next_channel can remain from a previous migration that was
1236 * using more channels, so ensure it doesn't overflow if the
1237 * limit is lower now.
1238 */
1239 next_recv_channel %= migrate_multifd_channels();
1240 for (i = next_recv_channel;; i = (i + 1) % migrate_multifd_channels()) {
1241 if (multifd_recv_should_exit()) {
1242 return false;
1243 }
1244
1245 p = &multifd_recv_state->params[i];
1246
1247 if (qatomic_read(&p->pending_job) == false) {
1248 next_recv_channel = (i + 1) % migrate_multifd_channels();
1249 break;
1250 }
1251 }
1252
1253 /*
1254 * Order pending_job read before manipulating p->data below. Pairs
1255 * with qatomic_store_release() at multifd_recv_thread().
1256 */
1257 smp_mb_acquire();
1258
1259 assert(!p->data->size);
1260 multifd_recv_state->data = p->data;
1261 p->data = data;
1262
1263 /*
1264 * Order p->data update before setting pending_job. Pairs with
1265 * qatomic_load_acquire() at multifd_recv_thread().
1266 */
1267 qatomic_store_release(&p->pending_job, true);
1268 qemu_sem_post(&p->sem);
1269
1270 return true;
1271 }
1272
multifd_get_recv_data(void)1273 MultiFDRecvData *multifd_get_recv_data(void)
1274 {
1275 return multifd_recv_state->data;
1276 }
1277
multifd_recv_terminate_threads(Error * err)1278 static void multifd_recv_terminate_threads(Error *err)
1279 {
1280 int i;
1281
1282 trace_multifd_recv_terminate_threads(err != NULL);
1283
1284 if (qatomic_xchg(&multifd_recv_state->exiting, 1)) {
1285 return;
1286 }
1287
1288 if (err) {
1289 MigrationState *s = migrate_get_current();
1290 migrate_set_error(s, err);
1291 if (s->state == MIGRATION_STATUS_SETUP ||
1292 s->state == MIGRATION_STATUS_ACTIVE) {
1293 migrate_set_state(&s->state, s->state,
1294 MIGRATION_STATUS_FAILED);
1295 }
1296 }
1297
1298 for (i = 0; i < migrate_multifd_channels(); i++) {
1299 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1300
1301 /*
1302 * The migration thread and channels interact differently
1303 * depending on the presence of packets.
1304 */
1305 if (multifd_use_packets()) {
1306 /*
1307 * The channel receives as long as there are packets. When
1308 * packets end (i.e. MULTIFD_FLAG_SYNC is reached), the
1309 * channel waits for the migration thread to sync. If the
1310 * sync never happens, do it here.
1311 */
1312 qemu_sem_post(&p->sem_sync);
1313 } else {
1314 /*
1315 * The channel waits for the migration thread to give it
1316 * work. When the migration thread runs out of work, it
1317 * releases the channel and waits for any pending work to
1318 * finish. If we reach here (e.g. due to error) before the
1319 * work runs out, release the channel.
1320 */
1321 qemu_sem_post(&p->sem);
1322 }
1323
1324 /*
1325 * We could arrive here for two reasons:
1326 * - normal quit, i.e. everything went fine, just finished
1327 * - error quit: We close the channels so the channel threads
1328 * finish the qio_channel_read_all_eof()
1329 */
1330 if (p->c) {
1331 qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1332 }
1333 }
1334 }
1335
multifd_recv_shutdown(void)1336 void multifd_recv_shutdown(void)
1337 {
1338 if (migrate_multifd()) {
1339 multifd_recv_terminate_threads(NULL);
1340 }
1341 }
1342
multifd_recv_cleanup_channel(MultiFDRecvParams * p)1343 static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
1344 {
1345 migration_ioc_unregister_yank(p->c);
1346 object_unref(OBJECT(p->c));
1347 p->c = NULL;
1348 qemu_mutex_destroy(&p->mutex);
1349 qemu_sem_destroy(&p->sem_sync);
1350 qemu_sem_destroy(&p->sem);
1351 g_free(p->name);
1352 p->name = NULL;
1353 p->packet_len = 0;
1354 g_free(p->packet);
1355 p->packet = NULL;
1356 g_free(p->iov);
1357 p->iov = NULL;
1358 g_free(p->normal);
1359 p->normal = NULL;
1360 g_free(p->zero);
1361 p->zero = NULL;
1362 multifd_recv_state->ops->recv_cleanup(p);
1363 }
1364
multifd_recv_cleanup_state(void)1365 static void multifd_recv_cleanup_state(void)
1366 {
1367 qemu_sem_destroy(&multifd_recv_state->sem_sync);
1368 g_free(multifd_recv_state->params);
1369 multifd_recv_state->params = NULL;
1370 g_free(multifd_recv_state->data);
1371 multifd_recv_state->data = NULL;
1372 g_free(multifd_recv_state);
1373 multifd_recv_state = NULL;
1374 }
1375
multifd_recv_cleanup(void)1376 void multifd_recv_cleanup(void)
1377 {
1378 int i;
1379
1380 if (!migrate_multifd()) {
1381 return;
1382 }
1383 multifd_recv_terminate_threads(NULL);
1384 for (i = 0; i < migrate_multifd_channels(); i++) {
1385 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1386
1387 if (p->thread_created) {
1388 qemu_thread_join(&p->thread);
1389 }
1390 }
1391 for (i = 0; i < migrate_multifd_channels(); i++) {
1392 multifd_recv_cleanup_channel(&multifd_recv_state->params[i]);
1393 }
1394 multifd_recv_cleanup_state();
1395 }
1396
multifd_recv_sync_main(void)1397 void multifd_recv_sync_main(void)
1398 {
1399 int thread_count = migrate_multifd_channels();
1400 bool file_based = !multifd_use_packets();
1401 int i;
1402
1403 if (!migrate_multifd()) {
1404 return;
1405 }
1406
1407 /*
1408 * File-based channels don't use packets and therefore need to
1409 * wait for more work. Release them to start the sync.
1410 */
1411 if (file_based) {
1412 for (i = 0; i < thread_count; i++) {
1413 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1414
1415 trace_multifd_recv_sync_main_signal(p->id);
1416 qemu_sem_post(&p->sem);
1417 }
1418 }
1419
1420 /*
1421 * Initiate the synchronization by waiting for all channels.
1422 *
1423 * For socket-based migration this means each channel has received
1424 * the SYNC packet on the stream.
1425 *
1426 * For file-based migration this means each channel is done with
1427 * the work (pending_job=false).
1428 */
1429 for (i = 0; i < thread_count; i++) {
1430 trace_multifd_recv_sync_main_wait(i);
1431 qemu_sem_wait(&multifd_recv_state->sem_sync);
1432 }
1433
1434 if (file_based) {
1435 /*
1436 * For file-based loading is done in one iteration. We're
1437 * done.
1438 */
1439 return;
1440 }
1441
1442 /*
1443 * Sync done. Release the channels for the next iteration.
1444 */
1445 for (i = 0; i < thread_count; i++) {
1446 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1447
1448 WITH_QEMU_LOCK_GUARD(&p->mutex) {
1449 if (multifd_recv_state->packet_num < p->packet_num) {
1450 multifd_recv_state->packet_num = p->packet_num;
1451 }
1452 }
1453 trace_multifd_recv_sync_main_signal(p->id);
1454 qemu_sem_post(&p->sem_sync);
1455 }
1456 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1457 }
1458
multifd_recv_thread(void * opaque)1459 static void *multifd_recv_thread(void *opaque)
1460 {
1461 MultiFDRecvParams *p = opaque;
1462 Error *local_err = NULL;
1463 bool use_packets = multifd_use_packets();
1464 int ret;
1465
1466 trace_multifd_recv_thread_start(p->id);
1467 rcu_register_thread();
1468
1469 while (true) {
1470 uint32_t flags = 0;
1471 bool has_data = false;
1472 p->normal_num = 0;
1473
1474 if (use_packets) {
1475 if (multifd_recv_should_exit()) {
1476 break;
1477 }
1478
1479 ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
1480 p->packet_len, &local_err);
1481 if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */
1482 break;
1483 }
1484
1485 qemu_mutex_lock(&p->mutex);
1486 ret = multifd_recv_unfill_packet(p, &local_err);
1487 if (ret) {
1488 qemu_mutex_unlock(&p->mutex);
1489 break;
1490 }
1491
1492 flags = p->flags;
1493 /* recv methods don't know how to handle the SYNC flag */
1494 p->flags &= ~MULTIFD_FLAG_SYNC;
1495 has_data = p->normal_num || p->zero_num;
1496 qemu_mutex_unlock(&p->mutex);
1497 } else {
1498 /*
1499 * No packets, so we need to wait for the vmstate code to
1500 * give us work.
1501 */
1502 qemu_sem_wait(&p->sem);
1503
1504 if (multifd_recv_should_exit()) {
1505 break;
1506 }
1507
1508 /* pairs with qatomic_store_release() at multifd_recv() */
1509 if (!qatomic_load_acquire(&p->pending_job)) {
1510 /*
1511 * Migration thread did not send work, this is
1512 * equivalent to pending_sync on the sending
1513 * side. Post sem_sync to notify we reached this
1514 * point.
1515 */
1516 qemu_sem_post(&multifd_recv_state->sem_sync);
1517 continue;
1518 }
1519
1520 has_data = !!p->data->size;
1521 }
1522
1523 if (has_data) {
1524 ret = multifd_recv_state->ops->recv(p, &local_err);
1525 if (ret != 0) {
1526 break;
1527 }
1528 }
1529
1530 if (use_packets) {
1531 if (flags & MULTIFD_FLAG_SYNC) {
1532 qemu_sem_post(&multifd_recv_state->sem_sync);
1533 qemu_sem_wait(&p->sem_sync);
1534 }
1535 } else {
1536 p->total_normal_pages += p->data->size / qemu_target_page_size();
1537 p->data->size = 0;
1538 /*
1539 * Order data->size update before clearing
1540 * pending_job. Pairs with smp_mb_acquire() at
1541 * multifd_recv().
1542 */
1543 qatomic_store_release(&p->pending_job, false);
1544 }
1545 }
1546
1547 if (local_err) {
1548 multifd_recv_terminate_threads(local_err);
1549 error_free(local_err);
1550 }
1551
1552 rcu_unregister_thread();
1553 trace_multifd_recv_thread_end(p->id, p->packets_recved,
1554 p->total_normal_pages,
1555 p->total_zero_pages);
1556
1557 return NULL;
1558 }
1559
multifd_recv_setup(Error ** errp)1560 int multifd_recv_setup(Error **errp)
1561 {
1562 int thread_count;
1563 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
1564 bool use_packets = multifd_use_packets();
1565 uint8_t i;
1566
1567 /*
1568 * Return successfully if multiFD recv state is already initialised
1569 * or multiFD is not enabled.
1570 */
1571 if (multifd_recv_state || !migrate_multifd()) {
1572 return 0;
1573 }
1574
1575 thread_count = migrate_multifd_channels();
1576 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1577 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
1578
1579 multifd_recv_state->data = g_new0(MultiFDRecvData, 1);
1580 multifd_recv_state->data->size = 0;
1581
1582 qatomic_set(&multifd_recv_state->count, 0);
1583 qatomic_set(&multifd_recv_state->exiting, 0);
1584 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
1585 multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
1586
1587 for (i = 0; i < thread_count; i++) {
1588 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1589
1590 qemu_mutex_init(&p->mutex);
1591 qemu_sem_init(&p->sem_sync, 0);
1592 qemu_sem_init(&p->sem, 0);
1593 p->pending_job = false;
1594 p->id = i;
1595
1596 p->data = g_new0(MultiFDRecvData, 1);
1597 p->data->size = 0;
1598
1599 if (use_packets) {
1600 p->packet_len = sizeof(MultiFDPacket_t)
1601 + sizeof(uint64_t) * page_count;
1602 p->packet = g_malloc0(p->packet_len);
1603 }
1604 p->name = g_strdup_printf("multifdrecv_%d", i);
1605 p->iov = g_new0(struct iovec, page_count);
1606 p->normal = g_new0(ram_addr_t, page_count);
1607 p->zero = g_new0(ram_addr_t, page_count);
1608 p->page_count = page_count;
1609 p->page_size = qemu_target_page_size();
1610 }
1611
1612 for (i = 0; i < thread_count; i++) {
1613 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1614 int ret;
1615
1616 ret = multifd_recv_state->ops->recv_setup(p, errp);
1617 if (ret) {
1618 return ret;
1619 }
1620 }
1621 return 0;
1622 }
1623
multifd_recv_all_channels_created(void)1624 bool multifd_recv_all_channels_created(void)
1625 {
1626 int thread_count = migrate_multifd_channels();
1627
1628 if (!migrate_multifd()) {
1629 return true;
1630 }
1631
1632 if (!multifd_recv_state) {
1633 /* Called before any connections created */
1634 return false;
1635 }
1636
1637 return thread_count == qatomic_read(&multifd_recv_state->count);
1638 }
1639
1640 /*
1641 * Try to receive all multifd channels to get ready for the migration.
1642 * Sets @errp when failing to receive the current channel.
1643 */
multifd_recv_new_channel(QIOChannel * ioc,Error ** errp)1644 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
1645 {
1646 MultiFDRecvParams *p;
1647 Error *local_err = NULL;
1648 bool use_packets = multifd_use_packets();
1649 int id;
1650
1651 if (use_packets) {
1652 id = multifd_recv_initial_packet(ioc, &local_err);
1653 if (id < 0) {
1654 multifd_recv_terminate_threads(local_err);
1655 error_propagate_prepend(errp, local_err,
1656 "failed to receive packet"
1657 " via multifd channel %d: ",
1658 qatomic_read(&multifd_recv_state->count));
1659 return;
1660 }
1661 trace_multifd_recv_new_channel(id);
1662 } else {
1663 id = qatomic_read(&multifd_recv_state->count);
1664 }
1665
1666 p = &multifd_recv_state->params[id];
1667 if (p->c != NULL) {
1668 error_setg(&local_err, "multifd: received id '%d' already setup'",
1669 id);
1670 multifd_recv_terminate_threads(local_err);
1671 error_propagate(errp, local_err);
1672 return;
1673 }
1674 p->c = ioc;
1675 object_ref(OBJECT(ioc));
1676
1677 p->thread_created = true;
1678 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1679 QEMU_THREAD_JOINABLE);
1680 qatomic_inc(&multifd_recv_state->count);
1681 }
1682
multifd_send_prepare_common(MultiFDSendParams * p)1683 bool multifd_send_prepare_common(MultiFDSendParams *p)
1684 {
1685 multifd_send_zero_page_detect(p);
1686
1687 if (!p->pages->normal_num) {
1688 p->next_packet_size = 0;
1689 return false;
1690 }
1691
1692 multifd_send_prepare_header(p);
1693
1694 return true;
1695 }
1696