1 /*
2 * Multifd RAM migration without compression
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "system/ramblock.h"
15 #include "exec/target_page.h"
16 #include "file.h"
17 #include "migration-stats.h"
18 #include "multifd.h"
19 #include "options.h"
20 #include "migration.h"
21 #include "qapi/error.h"
22 #include "qemu/cutils.h"
23 #include "qemu/error-report.h"
24 #include "trace.h"
25 #include "qemu-file.h"
26
27 static MultiFDSendData *multifd_ram_send;
28
multifd_ram_payload_alloc(MultiFDPages_t * pages)29 void multifd_ram_payload_alloc(MultiFDPages_t *pages)
30 {
31 pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
32 }
33
multifd_ram_payload_free(MultiFDPages_t * pages)34 void multifd_ram_payload_free(MultiFDPages_t *pages)
35 {
36 g_clear_pointer(&pages->offset, g_free);
37 }
38
multifd_ram_save_setup(void)39 void multifd_ram_save_setup(void)
40 {
41 multifd_ram_send = multifd_send_data_alloc();
42 }
43
multifd_ram_save_cleanup(void)44 void multifd_ram_save_cleanup(void)
45 {
46 g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
47 }
48
multifd_set_file_bitmap(MultiFDSendParams * p)49 static void multifd_set_file_bitmap(MultiFDSendParams *p)
50 {
51 MultiFDPages_t *pages = &p->data->u.ram;
52
53 assert(pages->block);
54
55 for (int i = 0; i < pages->normal_num; i++) {
56 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
57 }
58
59 for (int i = pages->normal_num; i < pages->num; i++) {
60 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
61 }
62 }
63
multifd_nocomp_send_setup(MultiFDSendParams * p,Error ** errp)64 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
65 {
66 uint32_t page_count = multifd_ram_page_count();
67
68 if (migrate_zero_copy_send()) {
69 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
70 }
71
72 if (!migrate_mapped_ram()) {
73 /* We need one extra place for the packet header */
74 p->iov = g_new0(struct iovec, page_count + 1);
75 } else {
76 p->iov = g_new0(struct iovec, page_count);
77 }
78
79 return 0;
80 }
81
multifd_nocomp_send_cleanup(MultiFDSendParams * p,Error ** errp)82 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
83 {
84 g_free(p->iov);
85 p->iov = NULL;
86 }
87
multifd_ram_prepare_header(MultiFDSendParams * p)88 static void multifd_ram_prepare_header(MultiFDSendParams *p)
89 {
90 p->iov[0].iov_len = p->packet_len;
91 p->iov[0].iov_base = p->packet;
92 p->iovs_num++;
93 }
94
multifd_send_prepare_iovs(MultiFDSendParams * p)95 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
96 {
97 MultiFDPages_t *pages = &p->data->u.ram;
98 uint32_t page_size = multifd_ram_page_size();
99
100 for (int i = 0; i < pages->normal_num; i++) {
101 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
102 p->iov[p->iovs_num].iov_len = page_size;
103 p->iovs_num++;
104 }
105
106 p->next_packet_size = pages->normal_num * page_size;
107 }
108
multifd_nocomp_send_prepare(MultiFDSendParams * p,Error ** errp)109 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
110 {
111 bool use_zero_copy_send = migrate_zero_copy_send();
112 int ret;
113
114 multifd_send_zero_page_detect(p);
115
116 if (migrate_mapped_ram()) {
117 multifd_send_prepare_iovs(p);
118 multifd_set_file_bitmap(p);
119
120 return 0;
121 }
122
123 if (!use_zero_copy_send) {
124 /*
125 * Only !zerocopy needs the header in IOV; zerocopy will
126 * send it separately.
127 */
128 multifd_ram_prepare_header(p);
129 }
130
131 multifd_send_prepare_iovs(p);
132 p->flags |= MULTIFD_FLAG_NOCOMP;
133
134 multifd_send_fill_packet(p);
135
136 if (use_zero_copy_send) {
137 /* Send header first, without zerocopy */
138 ret = qio_channel_write_all(p->c, (void *)p->packet,
139 p->packet_len, errp);
140 if (ret != 0) {
141 return -1;
142 }
143
144 stat64_add(&mig_stats.multifd_bytes, p->packet_len);
145 }
146
147 return 0;
148 }
149
multifd_nocomp_recv_setup(MultiFDRecvParams * p,Error ** errp)150 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
151 {
152 p->iov = g_new0(struct iovec, multifd_ram_page_count());
153 return 0;
154 }
155
multifd_nocomp_recv_cleanup(MultiFDRecvParams * p)156 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
157 {
158 g_free(p->iov);
159 p->iov = NULL;
160 }
161
multifd_nocomp_recv(MultiFDRecvParams * p,Error ** errp)162 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
163 {
164 uint32_t flags;
165
166 if (migrate_mapped_ram()) {
167 return multifd_file_recv_data(p, errp);
168 }
169
170 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
171
172 if (flags != MULTIFD_FLAG_NOCOMP) {
173 error_setg(errp, "multifd %u: flags received %x flags expected %x",
174 p->id, flags, MULTIFD_FLAG_NOCOMP);
175 return -1;
176 }
177
178 multifd_recv_zero_page_process(p);
179
180 if (!p->normal_num) {
181 return 0;
182 }
183
184 for (int i = 0; i < p->normal_num; i++) {
185 p->iov[i].iov_base = p->host + p->normal[i];
186 p->iov[i].iov_len = multifd_ram_page_size();
187 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
188 }
189 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
190 }
191
multifd_pages_reset(MultiFDPages_t * pages)192 static void multifd_pages_reset(MultiFDPages_t *pages)
193 {
194 /*
195 * We don't need to touch offset[] array, because it will be
196 * overwritten later when reused.
197 */
198 pages->num = 0;
199 pages->normal_num = 0;
200 pages->block = NULL;
201 }
202
multifd_ram_fill_packet(MultiFDSendParams * p)203 void multifd_ram_fill_packet(MultiFDSendParams *p)
204 {
205 MultiFDPacket_t *packet = p->packet;
206 MultiFDPages_t *pages = &p->data->u.ram;
207 uint32_t zero_num = pages->num - pages->normal_num;
208
209 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
210 packet->normal_pages = cpu_to_be32(pages->normal_num);
211 packet->zero_pages = cpu_to_be32(zero_num);
212
213 if (pages->block) {
214 pstrcpy(packet->ramblock, sizeof(packet->ramblock),
215 pages->block->idstr);
216 }
217
218 for (int i = 0; i < pages->num; i++) {
219 /* there are architectures where ram_addr_t is 32 bit */
220 uint64_t temp = pages->offset[i];
221
222 packet->offset[i] = cpu_to_be64(temp);
223 }
224
225 trace_multifd_send_ram_fill(p->id, pages->normal_num,
226 zero_num);
227 }
228
multifd_ram_unfill_packet(MultiFDRecvParams * p,Error ** errp)229 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
230 {
231 MultiFDPacket_t *packet = p->packet;
232 uint32_t page_count = multifd_ram_page_count();
233 uint32_t page_size = multifd_ram_page_size();
234 uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
235 int i;
236
237 if (pages_per_packet > page_count) {
238 error_setg(errp, "multifd: received packet with %u pages, expected %u",
239 pages_per_packet, page_count);
240 return -1;
241 }
242
243 p->normal_num = be32_to_cpu(packet->normal_pages);
244 if (p->normal_num > pages_per_packet) {
245 error_setg(errp, "multifd: received packet with %u non-zero pages, "
246 "which exceeds maximum expected pages %u",
247 p->normal_num, pages_per_packet);
248 return -1;
249 }
250
251 p->zero_num = be32_to_cpu(packet->zero_pages);
252 if (p->zero_num > pages_per_packet - p->normal_num) {
253 error_setg(errp,
254 "multifd: received packet with %u zero pages, expected maximum %u",
255 p->zero_num, pages_per_packet - p->normal_num);
256 return -1;
257 }
258
259 if (p->normal_num == 0 && p->zero_num == 0) {
260 return 0;
261 }
262
263 /* make sure that ramblock is 0 terminated */
264 packet->ramblock[255] = 0;
265 p->block = qemu_ram_block_by_name(packet->ramblock);
266 if (!p->block) {
267 error_setg(errp, "multifd: unknown ram block %s",
268 packet->ramblock);
269 return -1;
270 }
271
272 p->host = p->block->host;
273 for (i = 0; i < p->normal_num; i++) {
274 uint64_t offset = be64_to_cpu(packet->offset[i]);
275
276 if (offset > (p->block->used_length - page_size)) {
277 error_setg(errp, "multifd: offset too long %" PRIu64
278 " (max " RAM_ADDR_FMT ")",
279 offset, p->block->used_length);
280 return -1;
281 }
282 p->normal[i] = offset;
283 }
284
285 for (i = 0; i < p->zero_num; i++) {
286 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
287
288 if (offset > (p->block->used_length - page_size)) {
289 error_setg(errp, "multifd: offset too long %" PRIu64
290 " (max " RAM_ADDR_FMT ")",
291 offset, p->block->used_length);
292 return -1;
293 }
294 p->zero[i] = offset;
295 }
296
297 return 0;
298 }
299
multifd_queue_empty(MultiFDPages_t * pages)300 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
301 {
302 return pages->num == 0;
303 }
304
multifd_queue_full(MultiFDPages_t * pages)305 static inline bool multifd_queue_full(MultiFDPages_t *pages)
306 {
307 return pages->num == multifd_ram_page_count();
308 }
309
multifd_enqueue(MultiFDPages_t * pages,ram_addr_t offset)310 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
311 {
312 pages->offset[pages->num++] = offset;
313 }
314
315 /* Returns true if enqueue successful, false otherwise */
multifd_queue_page(RAMBlock * block,ram_addr_t offset)316 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
317 {
318 MultiFDPages_t *pages;
319
320 retry:
321 pages = &multifd_ram_send->u.ram;
322
323 if (multifd_payload_empty(multifd_ram_send)) {
324 multifd_pages_reset(pages);
325 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
326 }
327
328 /* If the queue is empty, we can already enqueue now */
329 if (multifd_queue_empty(pages)) {
330 pages->block = block;
331 multifd_enqueue(pages, offset);
332 return true;
333 }
334
335 /*
336 * Not empty, meanwhile we need a flush. It can because of either:
337 *
338 * (1) The page is not on the same ramblock of previous ones, or,
339 * (2) The queue is full.
340 *
341 * After flush, always retry.
342 */
343 if (pages->block != block || multifd_queue_full(pages)) {
344 if (!multifd_send(&multifd_ram_send)) {
345 return false;
346 }
347 goto retry;
348 }
349
350 /* Not empty, and we still have space, do it! */
351 multifd_enqueue(pages, offset);
352 return true;
353 }
354
355 /*
356 * We have two modes for multifd flushes:
357 *
358 * - Per-section mode: this is the legacy way to flush, it requires one
359 * MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
360 *
361 * - Per-round mode: this is the modern way to flush, it requires one
362 * MULTIFD_FLAG_SYNC message only for each round of RAM scan. Normally
363 * it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
364 * based migrations.
365 *
366 * One thing to mention is mapped-ram always use the modern way to sync.
367 */
368
369 /* Do we need a per-section multifd flush (legacy way)? */
multifd_ram_sync_per_section(void)370 bool multifd_ram_sync_per_section(void)
371 {
372 if (!migrate_multifd()) {
373 return false;
374 }
375
376 if (migrate_mapped_ram()) {
377 return false;
378 }
379
380 return migrate_multifd_flush_after_each_section();
381 }
382
383 /* Do we need a per-round multifd flush (modern way)? */
multifd_ram_sync_per_round(void)384 bool multifd_ram_sync_per_round(void)
385 {
386 if (!migrate_multifd()) {
387 return false;
388 }
389
390 if (migrate_mapped_ram()) {
391 return true;
392 }
393
394 return !migrate_multifd_flush_after_each_section();
395 }
396
multifd_ram_flush_and_sync(QEMUFile * f)397 int multifd_ram_flush_and_sync(QEMUFile *f)
398 {
399 MultiFDSyncReq req;
400 int ret;
401
402 if (!migrate_multifd() || migration_in_postcopy()) {
403 return 0;
404 }
405
406 if (!multifd_payload_empty(multifd_ram_send)) {
407 if (!multifd_send(&multifd_ram_send)) {
408 error_report("%s: multifd_send fail", __func__);
409 return -1;
410 }
411 }
412
413 /* File migrations only need to sync with threads */
414 req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
415
416 ret = multifd_send_sync_main(req);
417 if (ret) {
418 return ret;
419 }
420
421 /* If we don't need to sync with remote at all, nothing else to do */
422 if (req == MULTIFD_SYNC_LOCAL) {
423 return 0;
424 }
425
426 /*
427 * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
428 * on RAM_SAVE_FLAG_EOS instead.
429 */
430 if (migrate_multifd_flush_after_each_section()) {
431 return 0;
432 }
433
434 qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
435 qemu_fflush(f);
436
437 return 0;
438 }
439
multifd_send_prepare_common(MultiFDSendParams * p)440 bool multifd_send_prepare_common(MultiFDSendParams *p)
441 {
442 MultiFDPages_t *pages = &p->data->u.ram;
443 multifd_ram_prepare_header(p);
444 multifd_send_zero_page_detect(p);
445
446 if (!pages->normal_num) {
447 p->next_packet_size = 0;
448 return false;
449 }
450
451 return true;
452 }
453
454 static const MultiFDMethods multifd_nocomp_ops = {
455 .send_setup = multifd_nocomp_send_setup,
456 .send_cleanup = multifd_nocomp_send_cleanup,
457 .send_prepare = multifd_nocomp_send_prepare,
458 .recv_setup = multifd_nocomp_recv_setup,
459 .recv_cleanup = multifd_nocomp_recv_cleanup,
460 .recv = multifd_nocomp_recv
461 };
462
multifd_nocomp_register(void)463 static void multifd_nocomp_register(void)
464 {
465 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
466 }
467
468 migration_init(multifd_nocomp_register);
469