1 /*
2 * Multifd RAM migration without compression
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "exec/ramblock.h"
15 #include "exec/target_page.h"
16 #include "file.h"
17 #include "multifd.h"
18 #include "options.h"
19 #include "qapi/error.h"
20 #include "qemu/cutils.h"
21 #include "qemu/error-report.h"
22 #include "trace.h"
23
24 static MultiFDSendData *multifd_ram_send;
25
multifd_ram_payload_size(void)26 size_t multifd_ram_payload_size(void)
27 {
28 uint32_t n = multifd_ram_page_count();
29
30 /*
31 * We keep an array of page offsets at the end of MultiFDPages_t,
32 * add space for it in the allocation.
33 */
34 return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
35 }
36
multifd_ram_save_setup(void)37 void multifd_ram_save_setup(void)
38 {
39 multifd_ram_send = multifd_send_data_alloc();
40 }
41
multifd_ram_save_cleanup(void)42 void multifd_ram_save_cleanup(void)
43 {
44 g_free(multifd_ram_send);
45 multifd_ram_send = NULL;
46 }
47
multifd_set_file_bitmap(MultiFDSendParams * p)48 static void multifd_set_file_bitmap(MultiFDSendParams *p)
49 {
50 MultiFDPages_t *pages = &p->data->u.ram;
51
52 assert(pages->block);
53
54 for (int i = 0; i < pages->normal_num; i++) {
55 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
56 }
57
58 for (int i = pages->normal_num; i < pages->num; i++) {
59 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
60 }
61 }
62
multifd_nocomp_send_setup(MultiFDSendParams * p,Error ** errp)63 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
64 {
65 uint32_t page_count = multifd_ram_page_count();
66
67 if (migrate_zero_copy_send()) {
68 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
69 }
70
71 if (!migrate_mapped_ram()) {
72 /* We need one extra place for the packet header */
73 p->iov = g_new0(struct iovec, page_count + 1);
74 } else {
75 p->iov = g_new0(struct iovec, page_count);
76 }
77
78 return 0;
79 }
80
multifd_nocomp_send_cleanup(MultiFDSendParams * p,Error ** errp)81 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
82 {
83 g_free(p->iov);
84 p->iov = NULL;
85 return;
86 }
87
multifd_send_prepare_iovs(MultiFDSendParams * p)88 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
89 {
90 MultiFDPages_t *pages = &p->data->u.ram;
91 uint32_t page_size = multifd_ram_page_size();
92
93 for (int i = 0; i < pages->normal_num; i++) {
94 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
95 p->iov[p->iovs_num].iov_len = page_size;
96 p->iovs_num++;
97 }
98
99 p->next_packet_size = pages->normal_num * page_size;
100 }
101
multifd_nocomp_send_prepare(MultiFDSendParams * p,Error ** errp)102 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
103 {
104 bool use_zero_copy_send = migrate_zero_copy_send();
105 int ret;
106
107 multifd_send_zero_page_detect(p);
108
109 if (migrate_mapped_ram()) {
110 multifd_send_prepare_iovs(p);
111 multifd_set_file_bitmap(p);
112
113 return 0;
114 }
115
116 if (!use_zero_copy_send) {
117 /*
118 * Only !zerocopy needs the header in IOV; zerocopy will
119 * send it separately.
120 */
121 multifd_send_prepare_header(p);
122 }
123
124 multifd_send_prepare_iovs(p);
125 p->flags |= MULTIFD_FLAG_NOCOMP;
126
127 multifd_send_fill_packet(p);
128
129 if (use_zero_copy_send) {
130 /* Send header first, without zerocopy */
131 ret = qio_channel_write_all(p->c, (void *)p->packet,
132 p->packet_len, errp);
133 if (ret != 0) {
134 return -1;
135 }
136 }
137
138 return 0;
139 }
140
multifd_nocomp_recv_setup(MultiFDRecvParams * p,Error ** errp)141 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
142 {
143 p->iov = g_new0(struct iovec, multifd_ram_page_count());
144 return 0;
145 }
146
multifd_nocomp_recv_cleanup(MultiFDRecvParams * p)147 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
148 {
149 g_free(p->iov);
150 p->iov = NULL;
151 }
152
multifd_nocomp_recv(MultiFDRecvParams * p,Error ** errp)153 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
154 {
155 uint32_t flags;
156
157 if (migrate_mapped_ram()) {
158 return multifd_file_recv_data(p, errp);
159 }
160
161 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
162
163 if (flags != MULTIFD_FLAG_NOCOMP) {
164 error_setg(errp, "multifd %u: flags received %x flags expected %x",
165 p->id, flags, MULTIFD_FLAG_NOCOMP);
166 return -1;
167 }
168
169 multifd_recv_zero_page_process(p);
170
171 if (!p->normal_num) {
172 return 0;
173 }
174
175 for (int i = 0; i < p->normal_num; i++) {
176 p->iov[i].iov_base = p->host + p->normal[i];
177 p->iov[i].iov_len = multifd_ram_page_size();
178 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
179 }
180 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
181 }
182
multifd_pages_reset(MultiFDPages_t * pages)183 static void multifd_pages_reset(MultiFDPages_t *pages)
184 {
185 /*
186 * We don't need to touch offset[] array, because it will be
187 * overwritten later when reused.
188 */
189 pages->num = 0;
190 pages->normal_num = 0;
191 pages->block = NULL;
192 }
193
multifd_ram_fill_packet(MultiFDSendParams * p)194 void multifd_ram_fill_packet(MultiFDSendParams *p)
195 {
196 MultiFDPacket_t *packet = p->packet;
197 MultiFDPages_t *pages = &p->data->u.ram;
198 uint32_t zero_num = pages->num - pages->normal_num;
199
200 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
201 packet->normal_pages = cpu_to_be32(pages->normal_num);
202 packet->zero_pages = cpu_to_be32(zero_num);
203
204 if (pages->block) {
205 pstrcpy(packet->ramblock, sizeof(packet->ramblock),
206 pages->block->idstr);
207 }
208
209 for (int i = 0; i < pages->num; i++) {
210 /* there are architectures where ram_addr_t is 32 bit */
211 uint64_t temp = pages->offset[i];
212
213 packet->offset[i] = cpu_to_be64(temp);
214 }
215
216 trace_multifd_send_ram_fill(p->id, pages->normal_num,
217 zero_num);
218 }
219
multifd_ram_unfill_packet(MultiFDRecvParams * p,Error ** errp)220 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
221 {
222 MultiFDPacket_t *packet = p->packet;
223 uint32_t page_count = multifd_ram_page_count();
224 uint32_t page_size = multifd_ram_page_size();
225 uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
226 int i;
227
228 if (pages_per_packet > page_count) {
229 error_setg(errp, "multifd: received packet with %u pages, expected %u",
230 pages_per_packet, page_count);
231 return -1;
232 }
233
234 p->normal_num = be32_to_cpu(packet->normal_pages);
235 if (p->normal_num > pages_per_packet) {
236 error_setg(errp, "multifd: received packet with %u non-zero pages, "
237 "which exceeds maximum expected pages %u",
238 p->normal_num, pages_per_packet);
239 return -1;
240 }
241
242 p->zero_num = be32_to_cpu(packet->zero_pages);
243 if (p->zero_num > pages_per_packet - p->normal_num) {
244 error_setg(errp,
245 "multifd: received packet with %u zero pages, expected maximum %u",
246 p->zero_num, pages_per_packet - p->normal_num);
247 return -1;
248 }
249
250 if (p->normal_num == 0 && p->zero_num == 0) {
251 return 0;
252 }
253
254 /* make sure that ramblock is 0 terminated */
255 packet->ramblock[255] = 0;
256 p->block = qemu_ram_block_by_name(packet->ramblock);
257 if (!p->block) {
258 error_setg(errp, "multifd: unknown ram block %s",
259 packet->ramblock);
260 return -1;
261 }
262
263 p->host = p->block->host;
264 for (i = 0; i < p->normal_num; i++) {
265 uint64_t offset = be64_to_cpu(packet->offset[i]);
266
267 if (offset > (p->block->used_length - page_size)) {
268 error_setg(errp, "multifd: offset too long %" PRIu64
269 " (max " RAM_ADDR_FMT ")",
270 offset, p->block->used_length);
271 return -1;
272 }
273 p->normal[i] = offset;
274 }
275
276 for (i = 0; i < p->zero_num; i++) {
277 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
278
279 if (offset > (p->block->used_length - page_size)) {
280 error_setg(errp, "multifd: offset too long %" PRIu64
281 " (max " RAM_ADDR_FMT ")",
282 offset, p->block->used_length);
283 return -1;
284 }
285 p->zero[i] = offset;
286 }
287
288 return 0;
289 }
290
multifd_queue_empty(MultiFDPages_t * pages)291 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
292 {
293 return pages->num == 0;
294 }
295
multifd_queue_full(MultiFDPages_t * pages)296 static inline bool multifd_queue_full(MultiFDPages_t *pages)
297 {
298 return pages->num == multifd_ram_page_count();
299 }
300
multifd_enqueue(MultiFDPages_t * pages,ram_addr_t offset)301 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
302 {
303 pages->offset[pages->num++] = offset;
304 }
305
306 /* Returns true if enqueue successful, false otherwise */
multifd_queue_page(RAMBlock * block,ram_addr_t offset)307 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
308 {
309 MultiFDPages_t *pages;
310
311 retry:
312 pages = &multifd_ram_send->u.ram;
313
314 if (multifd_payload_empty(multifd_ram_send)) {
315 multifd_pages_reset(pages);
316 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
317 }
318
319 /* If the queue is empty, we can already enqueue now */
320 if (multifd_queue_empty(pages)) {
321 pages->block = block;
322 multifd_enqueue(pages, offset);
323 return true;
324 }
325
326 /*
327 * Not empty, meanwhile we need a flush. It can because of either:
328 *
329 * (1) The page is not on the same ramblock of previous ones, or,
330 * (2) The queue is full.
331 *
332 * After flush, always retry.
333 */
334 if (pages->block != block || multifd_queue_full(pages)) {
335 if (!multifd_send(&multifd_ram_send)) {
336 return false;
337 }
338 goto retry;
339 }
340
341 /* Not empty, and we still have space, do it! */
342 multifd_enqueue(pages, offset);
343 return true;
344 }
345
multifd_ram_flush_and_sync(void)346 int multifd_ram_flush_and_sync(void)
347 {
348 if (!migrate_multifd()) {
349 return 0;
350 }
351
352 if (!multifd_payload_empty(multifd_ram_send)) {
353 if (!multifd_send(&multifd_ram_send)) {
354 error_report("%s: multifd_send fail", __func__);
355 return -1;
356 }
357 }
358
359 return multifd_send_sync_main();
360 }
361
multifd_send_prepare_common(MultiFDSendParams * p)362 bool multifd_send_prepare_common(MultiFDSendParams *p)
363 {
364 MultiFDPages_t *pages = &p->data->u.ram;
365 multifd_send_zero_page_detect(p);
366
367 if (!pages->normal_num) {
368 p->next_packet_size = 0;
369 return false;
370 }
371
372 multifd_send_prepare_header(p);
373
374 return true;
375 }
376
377 static const MultiFDMethods multifd_nocomp_ops = {
378 .send_setup = multifd_nocomp_send_setup,
379 .send_cleanup = multifd_nocomp_send_cleanup,
380 .send_prepare = multifd_nocomp_send_prepare,
381 .recv_setup = multifd_nocomp_recv_setup,
382 .recv_cleanup = multifd_nocomp_recv_cleanup,
383 .recv = multifd_nocomp_recv
384 };
385
multifd_nocomp_register(void)386 static void multifd_nocomp_register(void)
387 {
388 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
389 }
390
391 migration_init(multifd_nocomp_register);
392