1 /* 2 * Multifd RAM migration without compression 3 * 4 * Copyright (c) 2019-2020 Red Hat Inc 5 * 6 * Authors: 7 * Juan Quintela <quintela@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "exec/ramblock.h" 15 #include "exec/target_page.h" 16 #include "file.h" 17 #include "multifd.h" 18 #include "options.h" 19 #include "qapi/error.h" 20 #include "qemu/cutils.h" 21 #include "qemu/error-report.h" 22 #include "trace.h" 23 #include "qemu-file.h" 24 25 static MultiFDSendData *multifd_ram_send; 26 27 size_t multifd_ram_payload_size(void) 28 { 29 uint32_t n = multifd_ram_page_count(); 30 31 /* 32 * We keep an array of page offsets at the end of MultiFDPages_t, 33 * add space for it in the allocation. 34 */ 35 return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t); 36 } 37 38 void multifd_ram_save_setup(void) 39 { 40 multifd_ram_send = multifd_send_data_alloc(); 41 } 42 43 void multifd_ram_save_cleanup(void) 44 { 45 g_free(multifd_ram_send); 46 multifd_ram_send = NULL; 47 } 48 49 static void multifd_set_file_bitmap(MultiFDSendParams *p) 50 { 51 MultiFDPages_t *pages = &p->data->u.ram; 52 53 assert(pages->block); 54 55 for (int i = 0; i < pages->normal_num; i++) { 56 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); 57 } 58 59 for (int i = pages->normal_num; i < pages->num; i++) { 60 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); 61 } 62 } 63 64 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp) 65 { 66 uint32_t page_count = multifd_ram_page_count(); 67 68 if (migrate_zero_copy_send()) { 69 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; 70 } 71 72 if (!migrate_mapped_ram()) { 73 /* We need one extra place for the packet header */ 74 p->iov = g_new0(struct iovec, page_count + 1); 75 } else { 76 p->iov = g_new0(struct iovec, page_count); 77 } 78 79 return 0; 80 } 81 82 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) 83 { 84 g_free(p->iov); 85 p->iov = NULL; 86 return; 87 } 88 89 static void multifd_send_prepare_iovs(MultiFDSendParams *p) 90 { 91 MultiFDPages_t *pages = &p->data->u.ram; 92 uint32_t page_size = multifd_ram_page_size(); 93 94 for (int i = 0; i < pages->normal_num; i++) { 95 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; 96 p->iov[p->iovs_num].iov_len = page_size; 97 p->iovs_num++; 98 } 99 100 p->next_packet_size = pages->normal_num * page_size; 101 } 102 103 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) 104 { 105 bool use_zero_copy_send = migrate_zero_copy_send(); 106 int ret; 107 108 multifd_send_zero_page_detect(p); 109 110 if (migrate_mapped_ram()) { 111 multifd_send_prepare_iovs(p); 112 multifd_set_file_bitmap(p); 113 114 return 0; 115 } 116 117 if (!use_zero_copy_send) { 118 /* 119 * Only !zerocopy needs the header in IOV; zerocopy will 120 * send it separately. 121 */ 122 multifd_send_prepare_header(p); 123 } 124 125 multifd_send_prepare_iovs(p); 126 p->flags |= MULTIFD_FLAG_NOCOMP; 127 128 multifd_send_fill_packet(p); 129 130 if (use_zero_copy_send) { 131 /* Send header first, without zerocopy */ 132 ret = qio_channel_write_all(p->c, (void *)p->packet, 133 p->packet_len, errp); 134 if (ret != 0) { 135 return -1; 136 } 137 } 138 139 return 0; 140 } 141 142 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) 143 { 144 p->iov = g_new0(struct iovec, multifd_ram_page_count()); 145 return 0; 146 } 147 148 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p) 149 { 150 g_free(p->iov); 151 p->iov = NULL; 152 } 153 154 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp) 155 { 156 uint32_t flags; 157 158 if (migrate_mapped_ram()) { 159 return multifd_file_recv_data(p, errp); 160 } 161 162 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; 163 164 if (flags != MULTIFD_FLAG_NOCOMP) { 165 error_setg(errp, "multifd %u: flags received %x flags expected %x", 166 p->id, flags, MULTIFD_FLAG_NOCOMP); 167 return -1; 168 } 169 170 multifd_recv_zero_page_process(p); 171 172 if (!p->normal_num) { 173 return 0; 174 } 175 176 for (int i = 0; i < p->normal_num; i++) { 177 p->iov[i].iov_base = p->host + p->normal[i]; 178 p->iov[i].iov_len = multifd_ram_page_size(); 179 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); 180 } 181 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); 182 } 183 184 static void multifd_pages_reset(MultiFDPages_t *pages) 185 { 186 /* 187 * We don't need to touch offset[] array, because it will be 188 * overwritten later when reused. 189 */ 190 pages->num = 0; 191 pages->normal_num = 0; 192 pages->block = NULL; 193 } 194 195 void multifd_ram_fill_packet(MultiFDSendParams *p) 196 { 197 MultiFDPacket_t *packet = p->packet; 198 MultiFDPages_t *pages = &p->data->u.ram; 199 uint32_t zero_num = pages->num - pages->normal_num; 200 201 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count()); 202 packet->normal_pages = cpu_to_be32(pages->normal_num); 203 packet->zero_pages = cpu_to_be32(zero_num); 204 205 if (pages->block) { 206 pstrcpy(packet->ramblock, sizeof(packet->ramblock), 207 pages->block->idstr); 208 } 209 210 for (int i = 0; i < pages->num; i++) { 211 /* there are architectures where ram_addr_t is 32 bit */ 212 uint64_t temp = pages->offset[i]; 213 214 packet->offset[i] = cpu_to_be64(temp); 215 } 216 217 trace_multifd_send_ram_fill(p->id, pages->normal_num, 218 zero_num); 219 } 220 221 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp) 222 { 223 MultiFDPacket_t *packet = p->packet; 224 uint32_t page_count = multifd_ram_page_count(); 225 uint32_t page_size = multifd_ram_page_size(); 226 uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc); 227 int i; 228 229 if (pages_per_packet > page_count) { 230 error_setg(errp, "multifd: received packet with %u pages, expected %u", 231 pages_per_packet, page_count); 232 return -1; 233 } 234 235 p->normal_num = be32_to_cpu(packet->normal_pages); 236 if (p->normal_num > pages_per_packet) { 237 error_setg(errp, "multifd: received packet with %u non-zero pages, " 238 "which exceeds maximum expected pages %u", 239 p->normal_num, pages_per_packet); 240 return -1; 241 } 242 243 p->zero_num = be32_to_cpu(packet->zero_pages); 244 if (p->zero_num > pages_per_packet - p->normal_num) { 245 error_setg(errp, 246 "multifd: received packet with %u zero pages, expected maximum %u", 247 p->zero_num, pages_per_packet - p->normal_num); 248 return -1; 249 } 250 251 if (p->normal_num == 0 && p->zero_num == 0) { 252 return 0; 253 } 254 255 /* make sure that ramblock is 0 terminated */ 256 packet->ramblock[255] = 0; 257 p->block = qemu_ram_block_by_name(packet->ramblock); 258 if (!p->block) { 259 error_setg(errp, "multifd: unknown ram block %s", 260 packet->ramblock); 261 return -1; 262 } 263 264 p->host = p->block->host; 265 for (i = 0; i < p->normal_num; i++) { 266 uint64_t offset = be64_to_cpu(packet->offset[i]); 267 268 if (offset > (p->block->used_length - page_size)) { 269 error_setg(errp, "multifd: offset too long %" PRIu64 270 " (max " RAM_ADDR_FMT ")", 271 offset, p->block->used_length); 272 return -1; 273 } 274 p->normal[i] = offset; 275 } 276 277 for (i = 0; i < p->zero_num; i++) { 278 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); 279 280 if (offset > (p->block->used_length - page_size)) { 281 error_setg(errp, "multifd: offset too long %" PRIu64 282 " (max " RAM_ADDR_FMT ")", 283 offset, p->block->used_length); 284 return -1; 285 } 286 p->zero[i] = offset; 287 } 288 289 return 0; 290 } 291 292 static inline bool multifd_queue_empty(MultiFDPages_t *pages) 293 { 294 return pages->num == 0; 295 } 296 297 static inline bool multifd_queue_full(MultiFDPages_t *pages) 298 { 299 return pages->num == multifd_ram_page_count(); 300 } 301 302 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) 303 { 304 pages->offset[pages->num++] = offset; 305 } 306 307 /* Returns true if enqueue successful, false otherwise */ 308 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) 309 { 310 MultiFDPages_t *pages; 311 312 retry: 313 pages = &multifd_ram_send->u.ram; 314 315 if (multifd_payload_empty(multifd_ram_send)) { 316 multifd_pages_reset(pages); 317 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM); 318 } 319 320 /* If the queue is empty, we can already enqueue now */ 321 if (multifd_queue_empty(pages)) { 322 pages->block = block; 323 multifd_enqueue(pages, offset); 324 return true; 325 } 326 327 /* 328 * Not empty, meanwhile we need a flush. It can because of either: 329 * 330 * (1) The page is not on the same ramblock of previous ones, or, 331 * (2) The queue is full. 332 * 333 * After flush, always retry. 334 */ 335 if (pages->block != block || multifd_queue_full(pages)) { 336 if (!multifd_send(&multifd_ram_send)) { 337 return false; 338 } 339 goto retry; 340 } 341 342 /* Not empty, and we still have space, do it! */ 343 multifd_enqueue(pages, offset); 344 return true; 345 } 346 347 /* 348 * We have two modes for multifd flushes: 349 * 350 * - Per-section mode: this is the legacy way to flush, it requires one 351 * MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS. 352 * 353 * - Per-round mode: this is the modern way to flush, it requires one 354 * MULTIFD_FLAG_SYNC message only for each round of RAM scan. Normally 355 * it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network 356 * based migrations. 357 * 358 * One thing to mention is mapped-ram always use the modern way to sync. 359 */ 360 361 /* Do we need a per-section multifd flush (legacy way)? */ 362 bool multifd_ram_sync_per_section(void) 363 { 364 if (!migrate_multifd()) { 365 return false; 366 } 367 368 if (migrate_mapped_ram()) { 369 return false; 370 } 371 372 return migrate_multifd_flush_after_each_section(); 373 } 374 375 /* Do we need a per-round multifd flush (modern way)? */ 376 bool multifd_ram_sync_per_round(void) 377 { 378 if (!migrate_multifd()) { 379 return false; 380 } 381 382 if (migrate_mapped_ram()) { 383 return true; 384 } 385 386 return !migrate_multifd_flush_after_each_section(); 387 } 388 389 int multifd_ram_flush_and_sync(QEMUFile *f) 390 { 391 MultiFDSyncReq req; 392 int ret; 393 394 if (!migrate_multifd()) { 395 return 0; 396 } 397 398 if (!multifd_payload_empty(multifd_ram_send)) { 399 if (!multifd_send(&multifd_ram_send)) { 400 error_report("%s: multifd_send fail", __func__); 401 return -1; 402 } 403 } 404 405 /* File migrations only need to sync with threads */ 406 req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL; 407 408 ret = multifd_send_sync_main(req); 409 if (ret) { 410 return ret; 411 } 412 413 /* If we don't need to sync with remote at all, nothing else to do */ 414 if (req == MULTIFD_SYNC_LOCAL) { 415 return 0; 416 } 417 418 /* 419 * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies 420 * on RAM_SAVE_FLAG_EOS instead. 421 */ 422 if (migrate_multifd_flush_after_each_section()) { 423 return 0; 424 } 425 426 qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); 427 qemu_fflush(f); 428 429 return 0; 430 } 431 432 bool multifd_send_prepare_common(MultiFDSendParams *p) 433 { 434 MultiFDPages_t *pages = &p->data->u.ram; 435 multifd_send_prepare_header(p); 436 multifd_send_zero_page_detect(p); 437 438 if (!pages->normal_num) { 439 p->next_packet_size = 0; 440 return false; 441 } 442 443 return true; 444 } 445 446 static const MultiFDMethods multifd_nocomp_ops = { 447 .send_setup = multifd_nocomp_send_setup, 448 .send_cleanup = multifd_nocomp_send_cleanup, 449 .send_prepare = multifd_nocomp_send_prepare, 450 .recv_setup = multifd_nocomp_recv_setup, 451 .recv_cleanup = multifd_nocomp_recv_cleanup, 452 .recv = multifd_nocomp_recv 453 }; 454 455 static void multifd_nocomp_register(void) 456 { 457 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops); 458 } 459 460 migration_init(multifd_nocomp_register); 461