xref: /qemu/migration/multifd-nocomp.c (revision 03f50d7ee756eecbd4481c3008b5e01e999729c7)
1 /*
2  * Multifd RAM migration without compression
3  *
4  * Copyright (c) 2019-2020 Red Hat Inc
5  *
6  * Authors:
7  *  Juan Quintela <quintela@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "system/ramblock.h"
15 #include "exec/target_page.h"
16 #include "file.h"
17 #include "migration-stats.h"
18 #include "multifd.h"
19 #include "options.h"
20 #include "qapi/error.h"
21 #include "qemu/cutils.h"
22 #include "qemu/error-report.h"
23 #include "trace.h"
24 #include "qemu-file.h"
25 
26 static MultiFDSendData *multifd_ram_send;
27 
28 void multifd_ram_payload_alloc(MultiFDPages_t *pages)
29 {
30     pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
31 }
32 
33 void multifd_ram_payload_free(MultiFDPages_t *pages)
34 {
35     g_clear_pointer(&pages->offset, g_free);
36 }
37 
38 void multifd_ram_save_setup(void)
39 {
40     multifd_ram_send = multifd_send_data_alloc();
41 }
42 
43 void multifd_ram_save_cleanup(void)
44 {
45     g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
46 }
47 
48 static void multifd_set_file_bitmap(MultiFDSendParams *p)
49 {
50     MultiFDPages_t *pages = &p->data->u.ram;
51 
52     assert(pages->block);
53 
54     for (int i = 0; i < pages->normal_num; i++) {
55         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
56     }
57 
58     for (int i = pages->normal_num; i < pages->num; i++) {
59         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
60     }
61 }
62 
63 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
64 {
65     uint32_t page_count = multifd_ram_page_count();
66 
67     if (migrate_zero_copy_send()) {
68         p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
69     }
70 
71     if (!migrate_mapped_ram()) {
72         /* We need one extra place for the packet header */
73         p->iov = g_new0(struct iovec, page_count + 1);
74     } else {
75         p->iov = g_new0(struct iovec, page_count);
76     }
77 
78     return 0;
79 }
80 
81 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
82 {
83     g_free(p->iov);
84     p->iov = NULL;
85 }
86 
87 static void multifd_ram_prepare_header(MultiFDSendParams *p)
88 {
89     p->iov[0].iov_len = p->packet_len;
90     p->iov[0].iov_base = p->packet;
91     p->iovs_num++;
92 }
93 
94 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
95 {
96     MultiFDPages_t *pages = &p->data->u.ram;
97     uint32_t page_size = multifd_ram_page_size();
98 
99     for (int i = 0; i < pages->normal_num; i++) {
100         p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
101         p->iov[p->iovs_num].iov_len = page_size;
102         p->iovs_num++;
103     }
104 
105     p->next_packet_size = pages->normal_num * page_size;
106 }
107 
108 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
109 {
110     bool use_zero_copy_send = migrate_zero_copy_send();
111     int ret;
112 
113     multifd_send_zero_page_detect(p);
114 
115     if (migrate_mapped_ram()) {
116         multifd_send_prepare_iovs(p);
117         multifd_set_file_bitmap(p);
118 
119         return 0;
120     }
121 
122     if (!use_zero_copy_send) {
123         /*
124          * Only !zerocopy needs the header in IOV; zerocopy will
125          * send it separately.
126          */
127         multifd_ram_prepare_header(p);
128     }
129 
130     multifd_send_prepare_iovs(p);
131     p->flags |= MULTIFD_FLAG_NOCOMP;
132 
133     multifd_send_fill_packet(p);
134 
135     if (use_zero_copy_send) {
136         /* Send header first, without zerocopy */
137         ret = qio_channel_write_all(p->c, (void *)p->packet,
138                                     p->packet_len, errp);
139         if (ret != 0) {
140             return -1;
141         }
142 
143         stat64_add(&mig_stats.multifd_bytes, p->packet_len);
144     }
145 
146     return 0;
147 }
148 
149 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
150 {
151     p->iov = g_new0(struct iovec, multifd_ram_page_count());
152     return 0;
153 }
154 
155 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
156 {
157     g_free(p->iov);
158     p->iov = NULL;
159 }
160 
161 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
162 {
163     uint32_t flags;
164 
165     if (migrate_mapped_ram()) {
166         return multifd_file_recv_data(p, errp);
167     }
168 
169     flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
170 
171     if (flags != MULTIFD_FLAG_NOCOMP) {
172         error_setg(errp, "multifd %u: flags received %x flags expected %x",
173                    p->id, flags, MULTIFD_FLAG_NOCOMP);
174         return -1;
175     }
176 
177     multifd_recv_zero_page_process(p);
178 
179     if (!p->normal_num) {
180         return 0;
181     }
182 
183     for (int i = 0; i < p->normal_num; i++) {
184         p->iov[i].iov_base = p->host + p->normal[i];
185         p->iov[i].iov_len = multifd_ram_page_size();
186         ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
187     }
188     return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
189 }
190 
191 static void multifd_pages_reset(MultiFDPages_t *pages)
192 {
193     /*
194      * We don't need to touch offset[] array, because it will be
195      * overwritten later when reused.
196      */
197     pages->num = 0;
198     pages->normal_num = 0;
199     pages->block = NULL;
200 }
201 
202 void multifd_ram_fill_packet(MultiFDSendParams *p)
203 {
204     MultiFDPacket_t *packet = p->packet;
205     MultiFDPages_t *pages = &p->data->u.ram;
206     uint32_t zero_num = pages->num - pages->normal_num;
207 
208     packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
209     packet->normal_pages = cpu_to_be32(pages->normal_num);
210     packet->zero_pages = cpu_to_be32(zero_num);
211 
212     if (pages->block) {
213         pstrcpy(packet->ramblock, sizeof(packet->ramblock),
214                 pages->block->idstr);
215     }
216 
217     for (int i = 0; i < pages->num; i++) {
218         /* there are architectures where ram_addr_t is 32 bit */
219         uint64_t temp = pages->offset[i];
220 
221         packet->offset[i] = cpu_to_be64(temp);
222     }
223 
224     trace_multifd_send_ram_fill(p->id, pages->normal_num,
225                                 zero_num);
226 }
227 
228 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
229 {
230     MultiFDPacket_t *packet = p->packet;
231     uint32_t page_count = multifd_ram_page_count();
232     uint32_t page_size = multifd_ram_page_size();
233     uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
234     int i;
235 
236     if (pages_per_packet > page_count) {
237         error_setg(errp, "multifd: received packet with %u pages, expected %u",
238                    pages_per_packet, page_count);
239         return -1;
240     }
241 
242     p->normal_num = be32_to_cpu(packet->normal_pages);
243     if (p->normal_num > pages_per_packet) {
244         error_setg(errp, "multifd: received packet with %u non-zero pages, "
245                    "which exceeds maximum expected pages %u",
246                    p->normal_num, pages_per_packet);
247         return -1;
248     }
249 
250     p->zero_num = be32_to_cpu(packet->zero_pages);
251     if (p->zero_num > pages_per_packet - p->normal_num) {
252         error_setg(errp,
253                    "multifd: received packet with %u zero pages, expected maximum %u",
254                    p->zero_num, pages_per_packet - p->normal_num);
255         return -1;
256     }
257 
258     if (p->normal_num == 0 && p->zero_num == 0) {
259         return 0;
260     }
261 
262     /* make sure that ramblock is 0 terminated */
263     packet->ramblock[255] = 0;
264     p->block = qemu_ram_block_by_name(packet->ramblock);
265     if (!p->block) {
266         error_setg(errp, "multifd: unknown ram block %s",
267                    packet->ramblock);
268         return -1;
269     }
270 
271     p->host = p->block->host;
272     for (i = 0; i < p->normal_num; i++) {
273         uint64_t offset = be64_to_cpu(packet->offset[i]);
274 
275         if (offset > (p->block->used_length - page_size)) {
276             error_setg(errp, "multifd: offset too long %" PRIu64
277                        " (max " RAM_ADDR_FMT ")",
278                        offset, p->block->used_length);
279             return -1;
280         }
281         p->normal[i] = offset;
282     }
283 
284     for (i = 0; i < p->zero_num; i++) {
285         uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
286 
287         if (offset > (p->block->used_length - page_size)) {
288             error_setg(errp, "multifd: offset too long %" PRIu64
289                        " (max " RAM_ADDR_FMT ")",
290                        offset, p->block->used_length);
291             return -1;
292         }
293         p->zero[i] = offset;
294     }
295 
296     return 0;
297 }
298 
299 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
300 {
301     return pages->num == 0;
302 }
303 
304 static inline bool multifd_queue_full(MultiFDPages_t *pages)
305 {
306     return pages->num == multifd_ram_page_count();
307 }
308 
309 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
310 {
311     pages->offset[pages->num++] = offset;
312 }
313 
314 /* Returns true if enqueue successful, false otherwise */
315 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
316 {
317     MultiFDPages_t *pages;
318 
319 retry:
320     pages = &multifd_ram_send->u.ram;
321 
322     if (multifd_payload_empty(multifd_ram_send)) {
323         multifd_pages_reset(pages);
324         multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
325     }
326 
327     /* If the queue is empty, we can already enqueue now */
328     if (multifd_queue_empty(pages)) {
329         pages->block = block;
330         multifd_enqueue(pages, offset);
331         return true;
332     }
333 
334     /*
335      * Not empty, meanwhile we need a flush.  It can because of either:
336      *
337      * (1) The page is not on the same ramblock of previous ones, or,
338      * (2) The queue is full.
339      *
340      * After flush, always retry.
341      */
342     if (pages->block != block || multifd_queue_full(pages)) {
343         if (!multifd_send(&multifd_ram_send)) {
344             return false;
345         }
346         goto retry;
347     }
348 
349     /* Not empty, and we still have space, do it! */
350     multifd_enqueue(pages, offset);
351     return true;
352 }
353 
354 /*
355  * We have two modes for multifd flushes:
356  *
357  * - Per-section mode: this is the legacy way to flush, it requires one
358  *   MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
359  *
360  * - Per-round mode: this is the modern way to flush, it requires one
361  *   MULTIFD_FLAG_SYNC message only for each round of RAM scan.  Normally
362  *   it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
363  *   based migrations.
364  *
365  * One thing to mention is mapped-ram always use the modern way to sync.
366  */
367 
368 /* Do we need a per-section multifd flush (legacy way)? */
369 bool multifd_ram_sync_per_section(void)
370 {
371     if (!migrate_multifd()) {
372         return false;
373     }
374 
375     if (migrate_mapped_ram()) {
376         return false;
377     }
378 
379     return migrate_multifd_flush_after_each_section();
380 }
381 
382 /* Do we need a per-round multifd flush (modern way)? */
383 bool multifd_ram_sync_per_round(void)
384 {
385     if (!migrate_multifd()) {
386         return false;
387     }
388 
389     if (migrate_mapped_ram()) {
390         return true;
391     }
392 
393     return !migrate_multifd_flush_after_each_section();
394 }
395 
396 int multifd_ram_flush_and_sync(QEMUFile *f)
397 {
398     MultiFDSyncReq req;
399     int ret;
400 
401     if (!migrate_multifd()) {
402         return 0;
403     }
404 
405     if (!multifd_payload_empty(multifd_ram_send)) {
406         if (!multifd_send(&multifd_ram_send)) {
407             error_report("%s: multifd_send fail", __func__);
408             return -1;
409         }
410     }
411 
412     /* File migrations only need to sync with threads */
413     req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
414 
415     ret = multifd_send_sync_main(req);
416     if (ret) {
417         return ret;
418     }
419 
420     /* If we don't need to sync with remote at all, nothing else to do */
421     if (req == MULTIFD_SYNC_LOCAL) {
422         return 0;
423     }
424 
425     /*
426      * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
427      * on RAM_SAVE_FLAG_EOS instead.
428      */
429     if (migrate_multifd_flush_after_each_section()) {
430         return 0;
431     }
432 
433     qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
434     qemu_fflush(f);
435 
436     return 0;
437 }
438 
439 bool multifd_send_prepare_common(MultiFDSendParams *p)
440 {
441     MultiFDPages_t *pages = &p->data->u.ram;
442     multifd_ram_prepare_header(p);
443     multifd_send_zero_page_detect(p);
444 
445     if (!pages->normal_num) {
446         p->next_packet_size = 0;
447         return false;
448     }
449 
450     return true;
451 }
452 
453 static const MultiFDMethods multifd_nocomp_ops = {
454     .send_setup = multifd_nocomp_send_setup,
455     .send_cleanup = multifd_nocomp_send_cleanup,
456     .send_prepare = multifd_nocomp_send_prepare,
457     .recv_setup = multifd_nocomp_recv_setup,
458     .recv_cleanup = multifd_nocomp_recv_cleanup,
459     .recv = multifd_nocomp_recv
460 };
461 
462 static void multifd_nocomp_register(void)
463 {
464     multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
465 }
466 
467 migration_init(multifd_nocomp_register);
468