1 /* 2 * Multifd RAM migration without compression 3 * 4 * Copyright (c) 2019-2020 Red Hat Inc 5 * 6 * Authors: 7 * Juan Quintela <quintela@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "system/ramblock.h" 15 #include "exec/target_page.h" 16 #include "file.h" 17 #include "migration-stats.h" 18 #include "multifd.h" 19 #include "options.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "qemu/error-report.h" 23 #include "trace.h" 24 #include "qemu-file.h" 25 26 static MultiFDSendData *multifd_ram_send; 27 28 void multifd_ram_payload_alloc(MultiFDPages_t *pages) 29 { 30 pages->offset = g_new0(ram_addr_t, multifd_ram_page_count()); 31 } 32 33 void multifd_ram_payload_free(MultiFDPages_t *pages) 34 { 35 g_clear_pointer(&pages->offset, g_free); 36 } 37 38 void multifd_ram_save_setup(void) 39 { 40 multifd_ram_send = multifd_send_data_alloc(); 41 } 42 43 void multifd_ram_save_cleanup(void) 44 { 45 g_clear_pointer(&multifd_ram_send, multifd_send_data_free); 46 } 47 48 static void multifd_set_file_bitmap(MultiFDSendParams *p) 49 { 50 MultiFDPages_t *pages = &p->data->u.ram; 51 52 assert(pages->block); 53 54 for (int i = 0; i < pages->normal_num; i++) { 55 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); 56 } 57 58 for (int i = pages->normal_num; i < pages->num; i++) { 59 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); 60 } 61 } 62 63 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp) 64 { 65 uint32_t page_count = multifd_ram_page_count(); 66 67 if (migrate_zero_copy_send()) { 68 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; 69 } 70 71 if (!migrate_mapped_ram()) { 72 /* We need one extra place for the packet header */ 73 p->iov = g_new0(struct iovec, page_count + 1); 74 } else { 75 p->iov = g_new0(struct iovec, page_count); 76 } 77 78 return 0; 79 } 80 81 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) 82 { 83 g_free(p->iov); 84 p->iov = NULL; 85 } 86 87 static void multifd_ram_prepare_header(MultiFDSendParams *p) 88 { 89 p->iov[0].iov_len = p->packet_len; 90 p->iov[0].iov_base = p->packet; 91 p->iovs_num++; 92 } 93 94 static void multifd_send_prepare_iovs(MultiFDSendParams *p) 95 { 96 MultiFDPages_t *pages = &p->data->u.ram; 97 uint32_t page_size = multifd_ram_page_size(); 98 99 for (int i = 0; i < pages->normal_num; i++) { 100 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; 101 p->iov[p->iovs_num].iov_len = page_size; 102 p->iovs_num++; 103 } 104 105 p->next_packet_size = pages->normal_num * page_size; 106 } 107 108 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) 109 { 110 bool use_zero_copy_send = migrate_zero_copy_send(); 111 int ret; 112 113 multifd_send_zero_page_detect(p); 114 115 if (migrate_mapped_ram()) { 116 multifd_send_prepare_iovs(p); 117 multifd_set_file_bitmap(p); 118 119 return 0; 120 } 121 122 if (!use_zero_copy_send) { 123 /* 124 * Only !zerocopy needs the header in IOV; zerocopy will 125 * send it separately. 126 */ 127 multifd_ram_prepare_header(p); 128 } 129 130 multifd_send_prepare_iovs(p); 131 p->flags |= MULTIFD_FLAG_NOCOMP; 132 133 multifd_send_fill_packet(p); 134 135 if (use_zero_copy_send) { 136 /* Send header first, without zerocopy */ 137 ret = qio_channel_write_all(p->c, (void *)p->packet, 138 p->packet_len, errp); 139 if (ret != 0) { 140 return -1; 141 } 142 143 stat64_add(&mig_stats.multifd_bytes, p->packet_len); 144 } 145 146 return 0; 147 } 148 149 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) 150 { 151 p->iov = g_new0(struct iovec, multifd_ram_page_count()); 152 return 0; 153 } 154 155 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p) 156 { 157 g_free(p->iov); 158 p->iov = NULL; 159 } 160 161 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp) 162 { 163 uint32_t flags; 164 165 if (migrate_mapped_ram()) { 166 return multifd_file_recv_data(p, errp); 167 } 168 169 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; 170 171 if (flags != MULTIFD_FLAG_NOCOMP) { 172 error_setg(errp, "multifd %u: flags received %x flags expected %x", 173 p->id, flags, MULTIFD_FLAG_NOCOMP); 174 return -1; 175 } 176 177 multifd_recv_zero_page_process(p); 178 179 if (!p->normal_num) { 180 return 0; 181 } 182 183 for (int i = 0; i < p->normal_num; i++) { 184 p->iov[i].iov_base = p->host + p->normal[i]; 185 p->iov[i].iov_len = multifd_ram_page_size(); 186 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); 187 } 188 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); 189 } 190 191 static void multifd_pages_reset(MultiFDPages_t *pages) 192 { 193 /* 194 * We don't need to touch offset[] array, because it will be 195 * overwritten later when reused. 196 */ 197 pages->num = 0; 198 pages->normal_num = 0; 199 pages->block = NULL; 200 } 201 202 void multifd_ram_fill_packet(MultiFDSendParams *p) 203 { 204 MultiFDPacket_t *packet = p->packet; 205 MultiFDPages_t *pages = &p->data->u.ram; 206 uint32_t zero_num = pages->num - pages->normal_num; 207 208 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count()); 209 packet->normal_pages = cpu_to_be32(pages->normal_num); 210 packet->zero_pages = cpu_to_be32(zero_num); 211 212 if (pages->block) { 213 pstrcpy(packet->ramblock, sizeof(packet->ramblock), 214 pages->block->idstr); 215 } 216 217 for (int i = 0; i < pages->num; i++) { 218 /* there are architectures where ram_addr_t is 32 bit */ 219 uint64_t temp = pages->offset[i]; 220 221 packet->offset[i] = cpu_to_be64(temp); 222 } 223 224 trace_multifd_send_ram_fill(p->id, pages->normal_num, 225 zero_num); 226 } 227 228 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp) 229 { 230 MultiFDPacket_t *packet = p->packet; 231 uint32_t page_count = multifd_ram_page_count(); 232 uint32_t page_size = multifd_ram_page_size(); 233 uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc); 234 int i; 235 236 if (pages_per_packet > page_count) { 237 error_setg(errp, "multifd: received packet with %u pages, expected %u", 238 pages_per_packet, page_count); 239 return -1; 240 } 241 242 p->normal_num = be32_to_cpu(packet->normal_pages); 243 if (p->normal_num > pages_per_packet) { 244 error_setg(errp, "multifd: received packet with %u non-zero pages, " 245 "which exceeds maximum expected pages %u", 246 p->normal_num, pages_per_packet); 247 return -1; 248 } 249 250 p->zero_num = be32_to_cpu(packet->zero_pages); 251 if (p->zero_num > pages_per_packet - p->normal_num) { 252 error_setg(errp, 253 "multifd: received packet with %u zero pages, expected maximum %u", 254 p->zero_num, pages_per_packet - p->normal_num); 255 return -1; 256 } 257 258 if (p->normal_num == 0 && p->zero_num == 0) { 259 return 0; 260 } 261 262 /* make sure that ramblock is 0 terminated */ 263 packet->ramblock[255] = 0; 264 p->block = qemu_ram_block_by_name(packet->ramblock); 265 if (!p->block) { 266 error_setg(errp, "multifd: unknown ram block %s", 267 packet->ramblock); 268 return -1; 269 } 270 271 p->host = p->block->host; 272 for (i = 0; i < p->normal_num; i++) { 273 uint64_t offset = be64_to_cpu(packet->offset[i]); 274 275 if (offset > (p->block->used_length - page_size)) { 276 error_setg(errp, "multifd: offset too long %" PRIu64 277 " (max " RAM_ADDR_FMT ")", 278 offset, p->block->used_length); 279 return -1; 280 } 281 p->normal[i] = offset; 282 } 283 284 for (i = 0; i < p->zero_num; i++) { 285 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); 286 287 if (offset > (p->block->used_length - page_size)) { 288 error_setg(errp, "multifd: offset too long %" PRIu64 289 " (max " RAM_ADDR_FMT ")", 290 offset, p->block->used_length); 291 return -1; 292 } 293 p->zero[i] = offset; 294 } 295 296 return 0; 297 } 298 299 static inline bool multifd_queue_empty(MultiFDPages_t *pages) 300 { 301 return pages->num == 0; 302 } 303 304 static inline bool multifd_queue_full(MultiFDPages_t *pages) 305 { 306 return pages->num == multifd_ram_page_count(); 307 } 308 309 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) 310 { 311 pages->offset[pages->num++] = offset; 312 } 313 314 /* Returns true if enqueue successful, false otherwise */ 315 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) 316 { 317 MultiFDPages_t *pages; 318 319 retry: 320 pages = &multifd_ram_send->u.ram; 321 322 if (multifd_payload_empty(multifd_ram_send)) { 323 multifd_pages_reset(pages); 324 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM); 325 } 326 327 /* If the queue is empty, we can already enqueue now */ 328 if (multifd_queue_empty(pages)) { 329 pages->block = block; 330 multifd_enqueue(pages, offset); 331 return true; 332 } 333 334 /* 335 * Not empty, meanwhile we need a flush. It can because of either: 336 * 337 * (1) The page is not on the same ramblock of previous ones, or, 338 * (2) The queue is full. 339 * 340 * After flush, always retry. 341 */ 342 if (pages->block != block || multifd_queue_full(pages)) { 343 if (!multifd_send(&multifd_ram_send)) { 344 return false; 345 } 346 goto retry; 347 } 348 349 /* Not empty, and we still have space, do it! */ 350 multifd_enqueue(pages, offset); 351 return true; 352 } 353 354 /* 355 * We have two modes for multifd flushes: 356 * 357 * - Per-section mode: this is the legacy way to flush, it requires one 358 * MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS. 359 * 360 * - Per-round mode: this is the modern way to flush, it requires one 361 * MULTIFD_FLAG_SYNC message only for each round of RAM scan. Normally 362 * it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network 363 * based migrations. 364 * 365 * One thing to mention is mapped-ram always use the modern way to sync. 366 */ 367 368 /* Do we need a per-section multifd flush (legacy way)? */ 369 bool multifd_ram_sync_per_section(void) 370 { 371 if (!migrate_multifd()) { 372 return false; 373 } 374 375 if (migrate_mapped_ram()) { 376 return false; 377 } 378 379 return migrate_multifd_flush_after_each_section(); 380 } 381 382 /* Do we need a per-round multifd flush (modern way)? */ 383 bool multifd_ram_sync_per_round(void) 384 { 385 if (!migrate_multifd()) { 386 return false; 387 } 388 389 if (migrate_mapped_ram()) { 390 return true; 391 } 392 393 return !migrate_multifd_flush_after_each_section(); 394 } 395 396 int multifd_ram_flush_and_sync(QEMUFile *f) 397 { 398 MultiFDSyncReq req; 399 int ret; 400 401 if (!migrate_multifd()) { 402 return 0; 403 } 404 405 if (!multifd_payload_empty(multifd_ram_send)) { 406 if (!multifd_send(&multifd_ram_send)) { 407 error_report("%s: multifd_send fail", __func__); 408 return -1; 409 } 410 } 411 412 /* File migrations only need to sync with threads */ 413 req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL; 414 415 ret = multifd_send_sync_main(req); 416 if (ret) { 417 return ret; 418 } 419 420 /* If we don't need to sync with remote at all, nothing else to do */ 421 if (req == MULTIFD_SYNC_LOCAL) { 422 return 0; 423 } 424 425 /* 426 * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies 427 * on RAM_SAVE_FLAG_EOS instead. 428 */ 429 if (migrate_multifd_flush_after_each_section()) { 430 return 0; 431 } 432 433 qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); 434 qemu_fflush(f); 435 436 return 0; 437 } 438 439 bool multifd_send_prepare_common(MultiFDSendParams *p) 440 { 441 MultiFDPages_t *pages = &p->data->u.ram; 442 multifd_ram_prepare_header(p); 443 multifd_send_zero_page_detect(p); 444 445 if (!pages->normal_num) { 446 p->next_packet_size = 0; 447 return false; 448 } 449 450 return true; 451 } 452 453 static const MultiFDMethods multifd_nocomp_ops = { 454 .send_setup = multifd_nocomp_send_setup, 455 .send_cleanup = multifd_nocomp_send_cleanup, 456 .send_prepare = multifd_nocomp_send_prepare, 457 .recv_setup = multifd_nocomp_recv_setup, 458 .recv_cleanup = multifd_nocomp_recv_cleanup, 459 .recv = multifd_nocomp_recv 460 }; 461 462 static void multifd_nocomp_register(void) 463 { 464 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops); 465 } 466 467 migration_init(multifd_nocomp_register); 468