1 /* 2 * Multifd RAM migration without compression 3 * 4 * Copyright (c) 2019-2020 Red Hat Inc 5 * 6 * Authors: 7 * Juan Quintela <quintela@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "system/ramblock.h" 15 #include "exec/target_page.h" 16 #include "file.h" 17 #include "migration-stats.h" 18 #include "multifd.h" 19 #include "options.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "qemu/error-report.h" 23 #include "trace.h" 24 #include "qemu-file.h" 25 26 static MultiFDSendData *multifd_ram_send; 27 28 void multifd_ram_payload_alloc(MultiFDPages_t *pages) 29 { 30 pages->offset = g_new0(ram_addr_t, multifd_ram_page_count()); 31 } 32 33 void multifd_ram_payload_free(MultiFDPages_t *pages) 34 { 35 g_clear_pointer(&pages->offset, g_free); 36 } 37 38 void multifd_ram_save_setup(void) 39 { 40 multifd_ram_send = multifd_send_data_alloc(); 41 } 42 43 void multifd_ram_save_cleanup(void) 44 { 45 g_clear_pointer(&multifd_ram_send, multifd_send_data_free); 46 } 47 48 static void multifd_set_file_bitmap(MultiFDSendParams *p) 49 { 50 MultiFDPages_t *pages = &p->data->u.ram; 51 52 assert(pages->block); 53 54 for (int i = 0; i < pages->normal_num; i++) { 55 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); 56 } 57 58 for (int i = pages->normal_num; i < pages->num; i++) { 59 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); 60 } 61 } 62 63 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp) 64 { 65 uint32_t page_count = multifd_ram_page_count(); 66 67 if (migrate_zero_copy_send()) { 68 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; 69 } 70 71 if (!migrate_mapped_ram()) { 72 /* We need one extra place for the packet header */ 73 p->iov = g_new0(struct iovec, page_count + 1); 74 } else { 75 p->iov = g_new0(struct iovec, page_count); 76 } 77 78 return 0; 79 } 80 81 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) 82 { 83 g_free(p->iov); 84 p->iov = NULL; 85 return; 86 } 87 88 static void multifd_ram_prepare_header(MultiFDSendParams *p) 89 { 90 p->iov[0].iov_len = p->packet_len; 91 p->iov[0].iov_base = p->packet; 92 p->iovs_num++; 93 } 94 95 static void multifd_send_prepare_iovs(MultiFDSendParams *p) 96 { 97 MultiFDPages_t *pages = &p->data->u.ram; 98 uint32_t page_size = multifd_ram_page_size(); 99 100 for (int i = 0; i < pages->normal_num; i++) { 101 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; 102 p->iov[p->iovs_num].iov_len = page_size; 103 p->iovs_num++; 104 } 105 106 p->next_packet_size = pages->normal_num * page_size; 107 } 108 109 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) 110 { 111 bool use_zero_copy_send = migrate_zero_copy_send(); 112 int ret; 113 114 multifd_send_zero_page_detect(p); 115 116 if (migrate_mapped_ram()) { 117 multifd_send_prepare_iovs(p); 118 multifd_set_file_bitmap(p); 119 120 return 0; 121 } 122 123 if (!use_zero_copy_send) { 124 /* 125 * Only !zerocopy needs the header in IOV; zerocopy will 126 * send it separately. 127 */ 128 multifd_ram_prepare_header(p); 129 } 130 131 multifd_send_prepare_iovs(p); 132 p->flags |= MULTIFD_FLAG_NOCOMP; 133 134 multifd_send_fill_packet(p); 135 136 if (use_zero_copy_send) { 137 /* Send header first, without zerocopy */ 138 ret = qio_channel_write_all(p->c, (void *)p->packet, 139 p->packet_len, errp); 140 if (ret != 0) { 141 return -1; 142 } 143 144 stat64_add(&mig_stats.multifd_bytes, p->packet_len); 145 } 146 147 return 0; 148 } 149 150 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) 151 { 152 p->iov = g_new0(struct iovec, multifd_ram_page_count()); 153 return 0; 154 } 155 156 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p) 157 { 158 g_free(p->iov); 159 p->iov = NULL; 160 } 161 162 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp) 163 { 164 uint32_t flags; 165 166 if (migrate_mapped_ram()) { 167 return multifd_file_recv_data(p, errp); 168 } 169 170 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; 171 172 if (flags != MULTIFD_FLAG_NOCOMP) { 173 error_setg(errp, "multifd %u: flags received %x flags expected %x", 174 p->id, flags, MULTIFD_FLAG_NOCOMP); 175 return -1; 176 } 177 178 multifd_recv_zero_page_process(p); 179 180 if (!p->normal_num) { 181 return 0; 182 } 183 184 for (int i = 0; i < p->normal_num; i++) { 185 p->iov[i].iov_base = p->host + p->normal[i]; 186 p->iov[i].iov_len = multifd_ram_page_size(); 187 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); 188 } 189 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); 190 } 191 192 static void multifd_pages_reset(MultiFDPages_t *pages) 193 { 194 /* 195 * We don't need to touch offset[] array, because it will be 196 * overwritten later when reused. 197 */ 198 pages->num = 0; 199 pages->normal_num = 0; 200 pages->block = NULL; 201 } 202 203 void multifd_ram_fill_packet(MultiFDSendParams *p) 204 { 205 MultiFDPacket_t *packet = p->packet; 206 MultiFDPages_t *pages = &p->data->u.ram; 207 uint32_t zero_num = pages->num - pages->normal_num; 208 209 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count()); 210 packet->normal_pages = cpu_to_be32(pages->normal_num); 211 packet->zero_pages = cpu_to_be32(zero_num); 212 213 if (pages->block) { 214 pstrcpy(packet->ramblock, sizeof(packet->ramblock), 215 pages->block->idstr); 216 } 217 218 for (int i = 0; i < pages->num; i++) { 219 /* there are architectures where ram_addr_t is 32 bit */ 220 uint64_t temp = pages->offset[i]; 221 222 packet->offset[i] = cpu_to_be64(temp); 223 } 224 225 trace_multifd_send_ram_fill(p->id, pages->normal_num, 226 zero_num); 227 } 228 229 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp) 230 { 231 MultiFDPacket_t *packet = p->packet; 232 uint32_t page_count = multifd_ram_page_count(); 233 uint32_t page_size = multifd_ram_page_size(); 234 uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc); 235 int i; 236 237 if (pages_per_packet > page_count) { 238 error_setg(errp, "multifd: received packet with %u pages, expected %u", 239 pages_per_packet, page_count); 240 return -1; 241 } 242 243 p->normal_num = be32_to_cpu(packet->normal_pages); 244 if (p->normal_num > pages_per_packet) { 245 error_setg(errp, "multifd: received packet with %u non-zero pages, " 246 "which exceeds maximum expected pages %u", 247 p->normal_num, pages_per_packet); 248 return -1; 249 } 250 251 p->zero_num = be32_to_cpu(packet->zero_pages); 252 if (p->zero_num > pages_per_packet - p->normal_num) { 253 error_setg(errp, 254 "multifd: received packet with %u zero pages, expected maximum %u", 255 p->zero_num, pages_per_packet - p->normal_num); 256 return -1; 257 } 258 259 if (p->normal_num == 0 && p->zero_num == 0) { 260 return 0; 261 } 262 263 /* make sure that ramblock is 0 terminated */ 264 packet->ramblock[255] = 0; 265 p->block = qemu_ram_block_by_name(packet->ramblock); 266 if (!p->block) { 267 error_setg(errp, "multifd: unknown ram block %s", 268 packet->ramblock); 269 return -1; 270 } 271 272 p->host = p->block->host; 273 for (i = 0; i < p->normal_num; i++) { 274 uint64_t offset = be64_to_cpu(packet->offset[i]); 275 276 if (offset > (p->block->used_length - page_size)) { 277 error_setg(errp, "multifd: offset too long %" PRIu64 278 " (max " RAM_ADDR_FMT ")", 279 offset, p->block->used_length); 280 return -1; 281 } 282 p->normal[i] = offset; 283 } 284 285 for (i = 0; i < p->zero_num; i++) { 286 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); 287 288 if (offset > (p->block->used_length - page_size)) { 289 error_setg(errp, "multifd: offset too long %" PRIu64 290 " (max " RAM_ADDR_FMT ")", 291 offset, p->block->used_length); 292 return -1; 293 } 294 p->zero[i] = offset; 295 } 296 297 return 0; 298 } 299 300 static inline bool multifd_queue_empty(MultiFDPages_t *pages) 301 { 302 return pages->num == 0; 303 } 304 305 static inline bool multifd_queue_full(MultiFDPages_t *pages) 306 { 307 return pages->num == multifd_ram_page_count(); 308 } 309 310 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) 311 { 312 pages->offset[pages->num++] = offset; 313 } 314 315 /* Returns true if enqueue successful, false otherwise */ 316 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) 317 { 318 MultiFDPages_t *pages; 319 320 retry: 321 pages = &multifd_ram_send->u.ram; 322 323 if (multifd_payload_empty(multifd_ram_send)) { 324 multifd_pages_reset(pages); 325 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM); 326 } 327 328 /* If the queue is empty, we can already enqueue now */ 329 if (multifd_queue_empty(pages)) { 330 pages->block = block; 331 multifd_enqueue(pages, offset); 332 return true; 333 } 334 335 /* 336 * Not empty, meanwhile we need a flush. It can because of either: 337 * 338 * (1) The page is not on the same ramblock of previous ones, or, 339 * (2) The queue is full. 340 * 341 * After flush, always retry. 342 */ 343 if (pages->block != block || multifd_queue_full(pages)) { 344 if (!multifd_send(&multifd_ram_send)) { 345 return false; 346 } 347 goto retry; 348 } 349 350 /* Not empty, and we still have space, do it! */ 351 multifd_enqueue(pages, offset); 352 return true; 353 } 354 355 /* 356 * We have two modes for multifd flushes: 357 * 358 * - Per-section mode: this is the legacy way to flush, it requires one 359 * MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS. 360 * 361 * - Per-round mode: this is the modern way to flush, it requires one 362 * MULTIFD_FLAG_SYNC message only for each round of RAM scan. Normally 363 * it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network 364 * based migrations. 365 * 366 * One thing to mention is mapped-ram always use the modern way to sync. 367 */ 368 369 /* Do we need a per-section multifd flush (legacy way)? */ 370 bool multifd_ram_sync_per_section(void) 371 { 372 if (!migrate_multifd()) { 373 return false; 374 } 375 376 if (migrate_mapped_ram()) { 377 return false; 378 } 379 380 return migrate_multifd_flush_after_each_section(); 381 } 382 383 /* Do we need a per-round multifd flush (modern way)? */ 384 bool multifd_ram_sync_per_round(void) 385 { 386 if (!migrate_multifd()) { 387 return false; 388 } 389 390 if (migrate_mapped_ram()) { 391 return true; 392 } 393 394 return !migrate_multifd_flush_after_each_section(); 395 } 396 397 int multifd_ram_flush_and_sync(QEMUFile *f) 398 { 399 MultiFDSyncReq req; 400 int ret; 401 402 if (!migrate_multifd()) { 403 return 0; 404 } 405 406 if (!multifd_payload_empty(multifd_ram_send)) { 407 if (!multifd_send(&multifd_ram_send)) { 408 error_report("%s: multifd_send fail", __func__); 409 return -1; 410 } 411 } 412 413 /* File migrations only need to sync with threads */ 414 req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL; 415 416 ret = multifd_send_sync_main(req); 417 if (ret) { 418 return ret; 419 } 420 421 /* If we don't need to sync with remote at all, nothing else to do */ 422 if (req == MULTIFD_SYNC_LOCAL) { 423 return 0; 424 } 425 426 /* 427 * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies 428 * on RAM_SAVE_FLAG_EOS instead. 429 */ 430 if (migrate_multifd_flush_after_each_section()) { 431 return 0; 432 } 433 434 qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); 435 qemu_fflush(f); 436 437 return 0; 438 } 439 440 bool multifd_send_prepare_common(MultiFDSendParams *p) 441 { 442 MultiFDPages_t *pages = &p->data->u.ram; 443 multifd_ram_prepare_header(p); 444 multifd_send_zero_page_detect(p); 445 446 if (!pages->normal_num) { 447 p->next_packet_size = 0; 448 return false; 449 } 450 451 return true; 452 } 453 454 static const MultiFDMethods multifd_nocomp_ops = { 455 .send_setup = multifd_nocomp_send_setup, 456 .send_cleanup = multifd_nocomp_send_cleanup, 457 .send_prepare = multifd_nocomp_send_prepare, 458 .recv_setup = multifd_nocomp_recv_setup, 459 .recv_cleanup = multifd_nocomp_recv_cleanup, 460 .recv = multifd_nocomp_recv 461 }; 462 463 static void multifd_nocomp_register(void) 464 { 465 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops); 466 } 467 468 migration_init(multifd_nocomp_register); 469