xref: /qemu/migration/migration.c (revision b103cc6e74ac92f070a0e004bd84334e845c20b5)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/ctype.h"
18 #include "qemu/cutils.h"
19 #include "qemu/error-report.h"
20 #include "qemu/main-loop.h"
21 #include "migration/blocker.h"
22 #include "exec.h"
23 #include "fd.h"
24 #include "file.h"
25 #include "socket.h"
26 #include "system/runstate.h"
27 #include "system/system.h"
28 #include "system/cpu-throttle.h"
29 #include "rdma.h"
30 #include "ram.h"
31 #include "migration/cpr.h"
32 #include "migration/global_state.h"
33 #include "migration/misc.h"
34 #include "migration.h"
35 #include "migration-stats.h"
36 #include "savevm.h"
37 #include "qemu-file.h"
38 #include "channel.h"
39 #include "migration/vmstate.h"
40 #include "block/block.h"
41 #include "qapi/error.h"
42 #include "qapi/clone-visitor.h"
43 #include "qapi/qapi-visit-migration.h"
44 #include "qapi/qapi-visit-sockets.h"
45 #include "qapi/qapi-commands-migration.h"
46 #include "qapi/qapi-events-migration.h"
47 #include "qapi/qmp/qerror.h"
48 #include "qobject/qnull.h"
49 #include "qemu/rcu.h"
50 #include "postcopy-ram.h"
51 #include "qemu/thread.h"
52 #include "trace.h"
53 #include "exec/target_page.h"
54 #include "io/channel-buffer.h"
55 #include "io/channel-tls.h"
56 #include "migration/colo.h"
57 #include "hw/boards.h"
58 #include "monitor/monitor.h"
59 #include "net/announce.h"
60 #include "qemu/queue.h"
61 #include "multifd.h"
62 #include "threadinfo.h"
63 #include "qemu/yank.h"
64 #include "system/cpus.h"
65 #include "yank_functions.h"
66 #include "system/qtest.h"
67 #include "options.h"
68 #include "system/dirtylimit.h"
69 #include "qemu/sockets.h"
70 #include "system/kvm.h"
71 
72 #define NOTIFIER_ELEM_INIT(array, elem)    \
73     [elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem])
74 
75 #define INMIGRATE_DEFAULT_EXIT_ON_ERROR true
76 
77 static NotifierWithReturnList migration_state_notifiers[] = {
78     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
79     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
80     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
81 };
82 
83 /* Messages sent on the return path from destination to source */
84 enum mig_rp_message_type {
85     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
86     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
87     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
88 
89     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
90     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
91     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
92     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
93     MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */
94 
95     MIG_RP_MSG_MAX
96 };
97 
98 /* When we add fault tolerance, we could have several
99    migrations at once.  For now we don't need to add
100    dynamic creation of migration */
101 
102 static MigrationState *current_migration;
103 static MigrationIncomingState *current_incoming;
104 
105 static GSList *migration_blockers[MIG_MODE__MAX];
106 
107 static bool migration_object_check(MigrationState *ms, Error **errp);
108 static bool migration_switchover_start(MigrationState *s, Error **errp);
109 static bool close_return_path_on_source(MigrationState *s);
110 static void migration_completion_end(MigrationState *s);
111 static void migrate_hup_delete(MigrationState *s);
112 
113 static void migration_downtime_start(MigrationState *s)
114 {
115     trace_vmstate_downtime_checkpoint("src-downtime-start");
116     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
117 }
118 
119 /*
120  * This is unfortunate: incoming migration actually needs the outgoing
121  * migration state (MigrationState) to be there too, e.g. to query
122  * capabilities, parameters, using locks, setup errors, etc.
123  *
124  * NOTE: when calling this, making sure current_migration exists and not
125  * been freed yet!  Otherwise trying to access the refcount is already
126  * an use-after-free itself..
127  *
128  * TODO: Move shared part of incoming / outgoing out into separate object.
129  * Then this is not needed.
130  */
131 static void migrate_incoming_ref_outgoing_state(void)
132 {
133     object_ref(migrate_get_current());
134 }
135 static void migrate_incoming_unref_outgoing_state(void)
136 {
137     object_unref(migrate_get_current());
138 }
139 
140 static void migration_downtime_end(MigrationState *s)
141 {
142     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
143 
144     /*
145      * If downtime already set, should mean that postcopy already set it,
146      * then that should be the real downtime already.
147      */
148     if (!s->downtime) {
149         s->downtime = now - s->downtime_start;
150         trace_vmstate_downtime_checkpoint("src-downtime-end");
151     }
152 }
153 
154 static void precopy_notify_complete(void)
155 {
156     Error *local_err = NULL;
157 
158     if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
159         error_report_err(local_err);
160     }
161 
162     trace_migration_precopy_complete();
163 }
164 
165 static bool migration_needs_multiple_sockets(void)
166 {
167     return migrate_multifd() || migrate_postcopy_preempt();
168 }
169 
170 static RunState migration_get_target_runstate(void)
171 {
172     /*
173      * When the global state is not migrated, it means we don't know the
174      * runstate of the src QEMU.  We don't have much choice but assuming
175      * the VM is running.  NOTE: this is pretty rare case, so far only Xen
176      * uses it.
177      */
178     if (!global_state_received()) {
179         return RUN_STATE_RUNNING;
180     }
181 
182     return global_state_get_runstate();
183 }
184 
185 static bool transport_supports_multi_channels(MigrationAddress *addr)
186 {
187     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
188         SocketAddress *saddr = &addr->u.socket;
189 
190         return (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
191                 saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
192                 saddr->type == SOCKET_ADDRESS_TYPE_VSOCK);
193     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
194         return migrate_mapped_ram();
195     } else {
196         return false;
197     }
198 }
199 
200 static bool migration_needs_seekable_channel(void)
201 {
202     return migrate_mapped_ram();
203 }
204 
205 static bool migration_needs_extra_fds(void)
206 {
207     /*
208      * When doing direct-io, multifd requires two different,
209      * non-duplicated file descriptors so we can use one of them for
210      * unaligned IO.
211      */
212     return migrate_multifd() && migrate_direct_io();
213 }
214 
215 static bool transport_supports_seeking(MigrationAddress *addr)
216 {
217     if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
218         return true;
219     }
220 
221     return false;
222 }
223 
224 static bool transport_supports_extra_fds(MigrationAddress *addr)
225 {
226     /* file: works because QEMU can open it multiple times */
227     return addr->transport == MIGRATION_ADDRESS_TYPE_FILE;
228 }
229 
230 static bool
231 migration_channels_and_transport_compatible(MigrationAddress *addr,
232                                             Error **errp)
233 {
234     if (migration_needs_seekable_channel() &&
235         !transport_supports_seeking(addr)) {
236         error_setg(errp, "Migration requires seekable transport (e.g. file)");
237         return false;
238     }
239 
240     if (migration_needs_multiple_sockets() &&
241         !transport_supports_multi_channels(addr)) {
242         error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
243         return false;
244     }
245 
246     if (migration_needs_extra_fds() &&
247         !transport_supports_extra_fds(addr)) {
248         error_setg(errp,
249                    "Migration requires a transport that allows for extra fds (e.g. file)");
250         return false;
251     }
252 
253     if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
254         addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
255         error_setg(errp, "Migration requires streamable transport (eg unix)");
256         return false;
257     }
258 
259     return true;
260 }
261 
262 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
263 {
264     uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
265 
266     return (a > b) - (a < b);
267 }
268 
269 static int migration_stop_vm(MigrationState *s, RunState state)
270 {
271     int ret;
272 
273     migration_downtime_start(s);
274 
275     s->vm_old_state = runstate_get();
276     global_state_store();
277 
278     ret = vm_stop_force_state(state);
279 
280     trace_vmstate_downtime_checkpoint("src-vm-stopped");
281     trace_migration_completion_vm_stop(ret);
282 
283     return ret;
284 }
285 
286 void migration_object_init(void)
287 {
288     /* This can only be called once. */
289     assert(!current_migration);
290     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
291 
292     /*
293      * Init the migrate incoming object as well no matter whether
294      * we'll use it or not.
295      */
296     assert(!current_incoming);
297     current_incoming = g_new0(MigrationIncomingState, 1);
298     current_incoming->state = MIGRATION_STATUS_NONE;
299     current_incoming->postcopy_remote_fds =
300         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
301     qemu_mutex_init(&current_incoming->rp_mutex);
302     qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
303     qemu_event_init(&current_incoming->main_thread_load_event, false);
304     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
305     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
306     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
307     qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
308 
309     qemu_mutex_init(&current_incoming->page_request_mutex);
310     qemu_cond_init(&current_incoming->page_request_cond);
311     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
312 
313     current_incoming->exit_on_error = INMIGRATE_DEFAULT_EXIT_ON_ERROR;
314 
315     migration_object_check(current_migration, &error_fatal);
316 
317     ram_mig_init();
318     dirty_bitmap_mig_init();
319 
320     /* Initialize cpu throttle timers */
321     cpu_throttle_init();
322 }
323 
324 typedef struct {
325     QEMUBH *bh;
326     QEMUBHFunc *cb;
327     void *opaque;
328 } MigrationBH;
329 
330 static void migration_bh_dispatch_bh(void *opaque)
331 {
332     MigrationState *s = migrate_get_current();
333     MigrationBH *migbh = opaque;
334 
335     /* cleanup this BH */
336     qemu_bh_delete(migbh->bh);
337     migbh->bh = NULL;
338 
339     /* dispatch the other one */
340     migbh->cb(migbh->opaque);
341     object_unref(OBJECT(s));
342 
343     g_free(migbh);
344 }
345 
346 void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
347 {
348     MigrationState *s = migrate_get_current();
349     MigrationBH *migbh = g_new0(MigrationBH, 1);
350     QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh);
351 
352     /* Store these to dispatch when the BH runs */
353     migbh->bh = bh;
354     migbh->cb = cb;
355     migbh->opaque = opaque;
356 
357     /*
358      * Ref the state for bh, because it may be called when
359      * there're already no other refs
360      */
361     object_ref(OBJECT(s));
362     qemu_bh_schedule(bh);
363 }
364 
365 void migration_shutdown(void)
366 {
367     /*
368      * When the QEMU main thread exit, the COLO thread
369      * may wait a semaphore. So, we should wakeup the
370      * COLO thread before migration shutdown.
371      */
372     colo_shutdown();
373     /*
374      * Cancel the current migration - that will (eventually)
375      * stop the migration using this structure
376      */
377     migration_cancel();
378     object_unref(OBJECT(current_migration));
379 
380     /*
381      * Cancel outgoing migration of dirty bitmaps. It should
382      * at least unref used block nodes.
383      */
384     dirty_bitmap_mig_cancel_outgoing();
385 
386     /*
387      * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
388      * are non-critical data, and their loss never considered as
389      * something serious.
390      */
391     dirty_bitmap_mig_cancel_incoming();
392 }
393 
394 /* For outgoing */
395 MigrationState *migrate_get_current(void)
396 {
397     /* This can only be called after the object created. */
398     assert(current_migration);
399     return current_migration;
400 }
401 
402 MigrationIncomingState *migration_incoming_get_current(void)
403 {
404     assert(current_incoming);
405     return current_incoming;
406 }
407 
408 void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
409 {
410     if (mis->socket_address_list) {
411         qapi_free_SocketAddressList(mis->socket_address_list);
412         mis->socket_address_list = NULL;
413     }
414 
415     if (mis->transport_cleanup) {
416         mis->transport_cleanup(mis->transport_data);
417         mis->transport_data = mis->transport_cleanup = NULL;
418     }
419 }
420 
421 void migration_incoming_state_destroy(void)
422 {
423     struct MigrationIncomingState *mis = migration_incoming_get_current();
424 
425     multifd_recv_cleanup();
426 
427     /*
428      * RAM state cleanup needs to happen after multifd cleanup, because
429      * multifd threads can use some of its states (receivedmap).
430      * The VFIO load_cleanup() implementation is BQL-sensitive. It requires
431      * BQL must NOT be taken when recycling load threads, so that it won't
432      * block the load threads from making progress on address space
433      * modification operations.
434      *
435      * To make it work, we could try to not take BQL for all load_cleanup(),
436      * or conditionally unlock BQL only if bql_locked() in VFIO.
437      *
438      * Since most existing call sites take BQL for load_cleanup(), make
439      * it simple by taking BQL always as the rule, so that VFIO can unlock
440      * BQL and retake unconditionally.
441      */
442     assert(bql_locked());
443     qemu_loadvm_state_cleanup(mis);
444 
445     if (mis->to_src_file) {
446         /* Tell source that we are done */
447         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
448         qemu_fclose(mis->to_src_file);
449         mis->to_src_file = NULL;
450     }
451 
452     if (mis->from_src_file) {
453         migration_ioc_unregister_yank_from_file(mis->from_src_file);
454         qemu_fclose(mis->from_src_file);
455         mis->from_src_file = NULL;
456     }
457     if (mis->postcopy_remote_fds) {
458         g_array_free(mis->postcopy_remote_fds, TRUE);
459         mis->postcopy_remote_fds = NULL;
460     }
461 
462     migration_incoming_transport_cleanup(mis);
463     qemu_event_reset(&mis->main_thread_load_event);
464 
465     if (mis->page_requested) {
466         g_tree_destroy(mis->page_requested);
467         mis->page_requested = NULL;
468     }
469 
470     if (mis->postcopy_qemufile_dst) {
471         migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
472         qemu_fclose(mis->postcopy_qemufile_dst);
473         mis->postcopy_qemufile_dst = NULL;
474     }
475 
476     cpr_set_incoming_mode(MIG_MODE_NONE);
477     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
478 }
479 
480 static void migrate_generate_event(MigrationStatus new_state)
481 {
482     if (migrate_events()) {
483         qapi_event_send_migration(new_state);
484     }
485 }
486 
487 /*
488  * Send a message on the return channel back to the source
489  * of the migration.
490  */
491 static int migrate_send_rp_message(MigrationIncomingState *mis,
492                                    enum mig_rp_message_type message_type,
493                                    uint16_t len, void *data)
494 {
495     int ret = 0;
496 
497     trace_migrate_send_rp_message((int)message_type, len);
498     QEMU_LOCK_GUARD(&mis->rp_mutex);
499 
500     /*
501      * It's possible that the file handle got lost due to network
502      * failures.
503      */
504     if (!mis->to_src_file) {
505         ret = -EIO;
506         return ret;
507     }
508 
509     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
510     qemu_put_be16(mis->to_src_file, len);
511     qemu_put_buffer(mis->to_src_file, data, len);
512     return qemu_fflush(mis->to_src_file);
513 }
514 
515 /* Request one page from the source VM at the given start address.
516  *   rb: the RAMBlock to request the page in
517  *   Start: Address offset within the RB
518  *   Len: Length in bytes required - must be a multiple of pagesize
519  */
520 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
521                                       RAMBlock *rb, ram_addr_t start)
522 {
523     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
524     size_t msglen = 12; /* start + len */
525     size_t len = qemu_ram_pagesize(rb);
526     enum mig_rp_message_type msg_type;
527     const char *rbname;
528     int rbname_len;
529 
530     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
531     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
532 
533     /*
534      * We maintain the last ramblock that we requested for page.  Note that we
535      * don't need locking because this function will only be called within the
536      * postcopy ram fault thread.
537      */
538     if (rb != mis->last_rb) {
539         mis->last_rb = rb;
540 
541         rbname = qemu_ram_get_idstr(rb);
542         rbname_len = strlen(rbname);
543 
544         assert(rbname_len < 256);
545 
546         bufc[msglen++] = rbname_len;
547         memcpy(bufc + msglen, rbname, rbname_len);
548         msglen += rbname_len;
549         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
550     } else {
551         msg_type = MIG_RP_MSG_REQ_PAGES;
552     }
553 
554     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
555 }
556 
557 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
558                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
559 {
560     void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
561     bool received = false;
562 
563     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
564         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
565         if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
566             /*
567              * The page has not been received, and it's not yet in the page
568              * request list.  Queue it.  Set the value of element to 1, so that
569              * things like g_tree_lookup() will return TRUE (1) when found.
570              */
571             g_tree_insert(mis->page_requested, aligned, (gpointer)1);
572             qatomic_inc(&mis->page_requested_count);
573             trace_postcopy_page_req_add(aligned, mis->page_requested_count);
574         }
575     }
576 
577     /*
578      * If the page is there, skip sending the message.  We don't even need the
579      * lock because as long as the page arrived, it'll be there forever.
580      */
581     if (received) {
582         return 0;
583     }
584 
585     return migrate_send_rp_message_req_pages(mis, rb, start);
586 }
587 
588 static bool migration_colo_enabled;
589 bool migration_incoming_colo_enabled(void)
590 {
591     return migration_colo_enabled;
592 }
593 
594 void migration_incoming_disable_colo(void)
595 {
596     ram_block_discard_disable(false);
597     migration_colo_enabled = false;
598 }
599 
600 int migration_incoming_enable_colo(void)
601 {
602 #ifndef CONFIG_REPLICATION
603     error_report("ENABLE_COLO command come in migration stream, but the "
604                  "replication module is not built in");
605     return -ENOTSUP;
606 #endif
607 
608     if (!migrate_colo()) {
609         error_report("ENABLE_COLO command come in migration stream, but x-colo "
610                      "capability is not set");
611         return -EINVAL;
612     }
613 
614     if (ram_block_discard_disable(true)) {
615         error_report("COLO: cannot disable RAM discard");
616         return -EBUSY;
617     }
618     migration_colo_enabled = true;
619     return 0;
620 }
621 
622 void migrate_add_address(SocketAddress *address)
623 {
624     MigrationIncomingState *mis = migration_incoming_get_current();
625 
626     QAPI_LIST_PREPEND(mis->socket_address_list,
627                       QAPI_CLONE(SocketAddress, address));
628 }
629 
630 bool migrate_is_uri(const char *uri)
631 {
632     while (*uri && *uri != ':') {
633         if (!qemu_isalpha(*uri++)) {
634             return false;
635         }
636     }
637     return *uri == ':';
638 }
639 
640 bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
641                        Error **errp)
642 {
643     g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1);
644     g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1);
645     InetSocketAddress *isock = &addr->u.rdma;
646     strList **tail = &addr->u.exec.args;
647 
648     if (strstart(uri, "exec:", NULL)) {
649         addr->transport = MIGRATION_ADDRESS_TYPE_EXEC;
650 #ifdef WIN32
651         QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path()));
652         QAPI_LIST_APPEND(tail, g_strdup("/c"));
653 #else
654         QAPI_LIST_APPEND(tail, g_strdup("/bin/sh"));
655         QAPI_LIST_APPEND(tail, g_strdup("-c"));
656 #endif
657         QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:")));
658     } else if (strstart(uri, "rdma:", NULL)) {
659         if (inet_parse(isock, uri + strlen("rdma:"), errp)) {
660             qapi_free_InetSocketAddress(isock);
661             return false;
662         }
663         addr->transport = MIGRATION_ADDRESS_TYPE_RDMA;
664     } else if (strstart(uri, "tcp:", NULL) ||
665                 strstart(uri, "unix:", NULL) ||
666                 strstart(uri, "vsock:", NULL) ||
667                 strstart(uri, "fd:", NULL)) {
668         addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET;
669         SocketAddress *saddr = socket_parse(uri, errp);
670         if (!saddr) {
671             return false;
672         }
673         addr->u.socket.type = saddr->type;
674         addr->u.socket.u = saddr->u;
675         /* Don't free the objects inside; their ownership moved to "addr" */
676         g_free(saddr);
677     } else if (strstart(uri, "file:", NULL)) {
678         addr->transport = MIGRATION_ADDRESS_TYPE_FILE;
679         addr->u.file.filename = g_strdup(uri + strlen("file:"));
680         if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset,
681                               errp)) {
682             return false;
683         }
684     } else {
685         error_setg(errp, "unknown migration protocol: %s", uri);
686         return false;
687     }
688 
689     val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN;
690     val->addr = g_steal_pointer(&addr);
691     *channel = g_steal_pointer(&val);
692     return true;
693 }
694 
695 static bool
696 migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
697 {
698     MigrationStatus current = mis->state;
699 
700     if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
701         /*
702          * Incoming postcopy migration will stay in PAUSED state even if
703          * reconnection happened.
704          */
705         return true;
706     }
707 
708     if (current != MIGRATION_STATUS_NONE) {
709         error_setg(errp, "Illegal migration incoming state: %s",
710                    MigrationStatus_str(current));
711         return false;
712     }
713 
714     migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP);
715     return true;
716 }
717 
718 static void qemu_start_incoming_migration(const char *uri, bool has_channels,
719                                           MigrationChannelList *channels,
720                                           Error **errp)
721 {
722     g_autoptr(MigrationChannel) channel = NULL;
723     MigrationAddress *addr = NULL;
724     MigrationIncomingState *mis = migration_incoming_get_current();
725 
726     /*
727      * Having preliminary checks for uri and channel
728      */
729     if (!uri == !channels) {
730         error_setg(errp, "need either 'uri' or 'channels' argument");
731         return;
732     }
733 
734     if (channels) {
735         /* To verify that Migrate channel list has only item */
736         if (channels->next) {
737             error_setg(errp, "Channel list must have only one entry, "
738                              "for type 'main'");
739             return;
740         }
741         addr = channels->value->addr;
742     }
743 
744     if (uri) {
745         /* caller uses the old URI syntax */
746         if (!migrate_uri_parse(uri, &channel, errp)) {
747             return;
748         }
749         addr = channel->addr;
750     }
751 
752     /* transport mechanism not suitable for migration? */
753     if (!migration_channels_and_transport_compatible(addr, errp)) {
754         return;
755     }
756 
757     if (!migration_incoming_state_setup(mis, errp)) {
758         return;
759     }
760 
761     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
762         SocketAddress *saddr = &addr->u.socket;
763         if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
764             saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
765             saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
766             socket_start_incoming_migration(saddr, errp);
767         } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
768             fd_start_incoming_migration(saddr->u.fd.str, errp);
769         }
770 #ifdef CONFIG_RDMA
771     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
772         if (migrate_xbzrle()) {
773             error_setg(errp, "RDMA and XBZRLE can't be used together");
774             return;
775         }
776         if (migrate_multifd()) {
777             error_setg(errp, "RDMA and multifd can't be used together");
778             return;
779         }
780         rdma_start_incoming_migration(&addr->u.rdma, errp);
781 #endif
782     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
783         exec_start_incoming_migration(addr->u.exec.args, errp);
784     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
785         file_start_incoming_migration(&addr->u.file, errp);
786     } else {
787         error_setg(errp, "unknown migration protocol: %s", uri);
788     }
789 
790     /* Close cpr socket to tell source that we are listening */
791     cpr_state_close();
792 }
793 
794 static void process_incoming_migration_bh(void *opaque)
795 {
796     MigrationIncomingState *mis = opaque;
797 
798     trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
799 
800     /*
801      * This must happen after all error conditions are dealt with and
802      * we're sure the VM is going to be running on this host.
803      */
804     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
805 
806     trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
807 
808     multifd_recv_shutdown();
809 
810     dirty_bitmap_mig_before_vm_start();
811 
812     if (runstate_is_live(migration_get_target_runstate())) {
813         if (autostart) {
814             /*
815              * Block activation is always delayed until VM starts, either
816              * here (which means we need to start the dest VM right now..),
817              * or until qmp_cont() later.
818              *
819              * We used to have cap 'late-block-activate' but now we do this
820              * unconditionally, as it has no harm but only benefit.  E.g.,
821              * it's not part of migration ABI on the time of disk activation.
822              *
823              * Make sure all file formats throw away their mutable
824              * metadata.  If error, don't restart the VM yet.
825              */
826             if (migration_block_activate(NULL)) {
827                 vm_start();
828             }
829         } else {
830             runstate_set(RUN_STATE_PAUSED);
831         }
832     } else if (migration_incoming_colo_enabled()) {
833         migration_incoming_disable_colo();
834         vm_start();
835     } else {
836         runstate_set(global_state_get_runstate());
837     }
838     trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
839     /*
840      * This must happen after any state changes since as soon as an external
841      * observer sees this event they might start to prod at the VM assuming
842      * it's ready to use.
843      */
844     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
845                       MIGRATION_STATUS_COMPLETED);
846     migration_incoming_state_destroy();
847 }
848 
849 static void coroutine_fn
850 process_incoming_migration_co(void *opaque)
851 {
852     MigrationState *s = migrate_get_current();
853     MigrationIncomingState *mis = migration_incoming_get_current();
854     PostcopyState ps;
855     int ret;
856     Error *local_err = NULL;
857 
858     assert(mis->from_src_file);
859 
860     mis->largest_page_size = qemu_ram_pagesize_largest();
861     postcopy_state_set(POSTCOPY_INCOMING_NONE);
862     migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
863                       MIGRATION_STATUS_ACTIVE);
864 
865     mis->loadvm_co = qemu_coroutine_self();
866     ret = qemu_loadvm_state(mis->from_src_file);
867     mis->loadvm_co = NULL;
868 
869     trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
870 
871     ps = postcopy_state_get();
872     trace_process_incoming_migration_co_end(ret, ps);
873     if (ps != POSTCOPY_INCOMING_NONE) {
874         if (ps == POSTCOPY_INCOMING_ADVISE) {
875             /*
876              * Where a migration had postcopy enabled (and thus went to advise)
877              * but managed to complete within the precopy period, we can use
878              * the normal exit.
879              */
880             postcopy_ram_incoming_cleanup(mis);
881         } else if (ret >= 0) {
882             /*
883              * Postcopy was started, cleanup should happen at the end of the
884              * postcopy thread.
885              */
886             trace_process_incoming_migration_co_postcopy_end_main();
887             goto out;
888         }
889         /* Else if something went wrong then just fall out of the normal exit */
890     }
891 
892     if (ret < 0) {
893         error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
894         goto fail;
895     }
896 
897     if (migration_incoming_colo_enabled()) {
898         /* yield until COLO exit */
899         colo_incoming_co();
900     }
901 
902     migration_bh_schedule(process_incoming_migration_bh, mis);
903     goto out;
904 
905 fail:
906     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
907                       MIGRATION_STATUS_FAILED);
908     migrate_set_error(s, local_err);
909     error_free(local_err);
910 
911     migration_incoming_state_destroy();
912 
913     if (mis->exit_on_error) {
914         WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
915             error_report_err(s->error);
916             s->error = NULL;
917         }
918 
919         exit(EXIT_FAILURE);
920     }
921 out:
922     /* Pairs with the refcount taken in qmp_migrate_incoming() */
923     migrate_incoming_unref_outgoing_state();
924 }
925 
926 /**
927  * migration_incoming_setup: Setup incoming migration
928  * @f: file for main migration channel
929  */
930 static void migration_incoming_setup(QEMUFile *f)
931 {
932     MigrationIncomingState *mis = migration_incoming_get_current();
933 
934     if (!mis->from_src_file) {
935         mis->from_src_file = f;
936     }
937     qemu_file_set_blocking(f, false);
938 }
939 
940 void migration_incoming_process(void)
941 {
942     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
943     qemu_coroutine_enter(co);
944 }
945 
946 /* Returns true if recovered from a paused migration, otherwise false */
947 static bool postcopy_try_recover(void)
948 {
949     MigrationIncomingState *mis = migration_incoming_get_current();
950 
951     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
952         /* Resumed from a paused postcopy migration */
953 
954         /* This should be set already in migration_incoming_setup() */
955         assert(mis->from_src_file);
956         /* Postcopy has standalone thread to do vm load */
957         qemu_file_set_blocking(mis->from_src_file, true);
958 
959         /* Re-configure the return path */
960         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
961 
962         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
963                           MIGRATION_STATUS_POSTCOPY_RECOVER);
964 
965         /*
966          * Here, we only wake up the main loading thread (while the
967          * rest threads will still be waiting), so that we can receive
968          * commands from source now, and answer it if needed. The
969          * rest threads will be woken up afterwards until we are sure
970          * that source is ready to reply to page requests.
971          */
972         qemu_sem_post(&mis->postcopy_pause_sem_dst);
973         return true;
974     }
975 
976     return false;
977 }
978 
979 void migration_fd_process_incoming(QEMUFile *f)
980 {
981     migration_incoming_setup(f);
982     if (postcopy_try_recover()) {
983         return;
984     }
985     migration_incoming_process();
986 }
987 
988 /*
989  * Returns true when we want to start a new incoming migration process,
990  * false otherwise.
991  */
992 static bool migration_should_start_incoming(bool main_channel)
993 {
994     /* Multifd doesn't start unless all channels are established */
995     if (migrate_multifd()) {
996         return migration_has_all_channels();
997     }
998 
999     /* Preempt channel only starts when the main channel is created */
1000     if (migrate_postcopy_preempt()) {
1001         return main_channel;
1002     }
1003 
1004     /*
1005      * For all the rest types of migration, we should only reach here when
1006      * it's the main channel that's being created, and we should always
1007      * proceed with this channel.
1008      */
1009     assert(main_channel);
1010     return true;
1011 }
1012 
1013 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
1014 {
1015     MigrationIncomingState *mis = migration_incoming_get_current();
1016     Error *local_err = NULL;
1017     QEMUFile *f;
1018     bool default_channel = true;
1019     uint32_t channel_magic = 0;
1020     int ret = 0;
1021 
1022     if (migrate_multifd() && !migrate_mapped_ram() &&
1023         !migrate_postcopy_ram() &&
1024         qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
1025         /*
1026          * With multiple channels, it is possible that we receive channels
1027          * out of order on destination side, causing incorrect mapping of
1028          * source channels on destination side. Check channel MAGIC to
1029          * decide type of channel. Please note this is best effort, postcopy
1030          * preempt channel does not send any magic number so avoid it for
1031          * postcopy live migration. Also tls live migration already does
1032          * tls handshake while initializing main channel so with tls this
1033          * issue is not possible.
1034          */
1035         ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
1036                                           sizeof(channel_magic), errp);
1037 
1038         if (ret != 0) {
1039             return;
1040         }
1041 
1042         default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
1043     } else {
1044         default_channel = !mis->from_src_file;
1045     }
1046 
1047     if (multifd_recv_setup(errp) != 0) {
1048         return;
1049     }
1050 
1051     if (default_channel) {
1052         f = qemu_file_new_input(ioc);
1053         migration_incoming_setup(f);
1054     } else {
1055         /* Multiple connections */
1056         assert(migration_needs_multiple_sockets());
1057         if (migrate_multifd()) {
1058             multifd_recv_new_channel(ioc, &local_err);
1059         } else {
1060             assert(migrate_postcopy_preempt());
1061             f = qemu_file_new_input(ioc);
1062             postcopy_preempt_new_channel(mis, f);
1063         }
1064         if (local_err) {
1065             error_propagate(errp, local_err);
1066             return;
1067         }
1068     }
1069 
1070     if (migration_should_start_incoming(default_channel)) {
1071         /* If it's a recovery, we're done */
1072         if (postcopy_try_recover()) {
1073             return;
1074         }
1075         migration_incoming_process();
1076     }
1077 }
1078 
1079 /**
1080  * @migration_has_all_channels: We have received all channels that we need
1081  *
1082  * Returns true when we have got connections to all the channels that
1083  * we need for migration.
1084  */
1085 bool migration_has_all_channels(void)
1086 {
1087     MigrationIncomingState *mis = migration_incoming_get_current();
1088 
1089     if (!mis->from_src_file) {
1090         return false;
1091     }
1092 
1093     if (migrate_multifd()) {
1094         return multifd_recv_all_channels_created();
1095     }
1096 
1097     if (migrate_postcopy_preempt()) {
1098         return mis->postcopy_qemufile_dst != NULL;
1099     }
1100 
1101     return true;
1102 }
1103 
1104 int migrate_send_rp_switchover_ack(MigrationIncomingState *mis)
1105 {
1106     return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL);
1107 }
1108 
1109 /*
1110  * Send a 'SHUT' message on the return channel with the given value
1111  * to indicate that we've finished with the RP.  Non-0 value indicates
1112  * error.
1113  */
1114 void migrate_send_rp_shut(MigrationIncomingState *mis,
1115                           uint32_t value)
1116 {
1117     uint32_t buf;
1118 
1119     buf = cpu_to_be32(value);
1120     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
1121 }
1122 
1123 /*
1124  * Send a 'PONG' message on the return channel with the given value
1125  * (normally in response to a 'PING')
1126  */
1127 void migrate_send_rp_pong(MigrationIncomingState *mis,
1128                           uint32_t value)
1129 {
1130     uint32_t buf;
1131 
1132     buf = cpu_to_be32(value);
1133     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
1134 }
1135 
1136 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
1137                                  char *block_name)
1138 {
1139     char buf[512];
1140     int len;
1141     int64_t res;
1142 
1143     /*
1144      * First, we send the header part. It contains only the len of
1145      * idstr, and the idstr itself.
1146      */
1147     len = strlen(block_name);
1148     buf[0] = len;
1149     memcpy(buf + 1, block_name, len);
1150 
1151     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
1152         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
1153                      __func__);
1154         return;
1155     }
1156 
1157     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
1158 
1159     /*
1160      * Next, we dump the received bitmap to the stream.
1161      *
1162      * TODO: currently we are safe since we are the only one that is
1163      * using the to_src_file handle (fault thread is still paused),
1164      * and it's ok even not taking the mutex. However the best way is
1165      * to take the lock before sending the message header, and release
1166      * the lock after sending the bitmap.
1167      */
1168     qemu_mutex_lock(&mis->rp_mutex);
1169     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
1170     qemu_mutex_unlock(&mis->rp_mutex);
1171 
1172     trace_migrate_send_rp_recv_bitmap(block_name, res);
1173 }
1174 
1175 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
1176 {
1177     uint32_t buf;
1178 
1179     buf = cpu_to_be32(value);
1180     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
1181 }
1182 
1183 bool migration_is_running(void)
1184 {
1185     MigrationState *s = current_migration;
1186 
1187     if (!s) {
1188         return false;
1189     }
1190 
1191     switch (s->state) {
1192     case MIGRATION_STATUS_ACTIVE:
1193     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1194     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1195     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1196     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1197     case MIGRATION_STATUS_SETUP:
1198     case MIGRATION_STATUS_PRE_SWITCHOVER:
1199     case MIGRATION_STATUS_DEVICE:
1200     case MIGRATION_STATUS_WAIT_UNPLUG:
1201     case MIGRATION_STATUS_CANCELLING:
1202     case MIGRATION_STATUS_COLO:
1203         return true;
1204     default:
1205         return false;
1206     }
1207 }
1208 
1209 static bool migration_is_active(void)
1210 {
1211     MigrationState *s = current_migration;
1212 
1213     return (s->state == MIGRATION_STATUS_ACTIVE ||
1214             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1215 }
1216 
1217 static bool migrate_show_downtime(MigrationState *s)
1218 {
1219     return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
1220 }
1221 
1222 static void populate_time_info(MigrationInfo *info, MigrationState *s)
1223 {
1224     info->has_status = true;
1225     info->has_setup_time = true;
1226     info->setup_time = s->setup_time;
1227 
1228     if (s->state == MIGRATION_STATUS_COMPLETED) {
1229         info->has_total_time = true;
1230         info->total_time = s->total_time;
1231     } else {
1232         info->has_total_time = true;
1233         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
1234                            s->start_time;
1235     }
1236 
1237     if (migrate_show_downtime(s)) {
1238         info->has_downtime = true;
1239         info->downtime = s->downtime;
1240     } else {
1241         info->has_expected_downtime = true;
1242         info->expected_downtime = s->expected_downtime;
1243     }
1244 }
1245 
1246 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
1247 {
1248     size_t page_size = qemu_target_page_size();
1249 
1250     info->ram = g_malloc0(sizeof(*info->ram));
1251     info->ram->transferred = migration_transferred_bytes();
1252     info->ram->total = ram_bytes_total();
1253     info->ram->duplicate = stat64_get(&mig_stats.zero_pages);
1254     info->ram->normal = stat64_get(&mig_stats.normal_pages);
1255     info->ram->normal_bytes = info->ram->normal * page_size;
1256     info->ram->mbps = s->mbps;
1257     info->ram->dirty_sync_count =
1258         stat64_get(&mig_stats.dirty_sync_count);
1259     info->ram->dirty_sync_missed_zero_copy =
1260         stat64_get(&mig_stats.dirty_sync_missed_zero_copy);
1261     info->ram->postcopy_requests =
1262         stat64_get(&mig_stats.postcopy_requests);
1263     info->ram->page_size = page_size;
1264     info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes);
1265     info->ram->pages_per_second = s->pages_per_second;
1266     info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes);
1267     info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes);
1268     info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes);
1269 
1270     if (migrate_xbzrle()) {
1271         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
1272         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
1273         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
1274         info->xbzrle_cache->pages = xbzrle_counters.pages;
1275         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
1276         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
1277         info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
1278         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
1279     }
1280 
1281     if (cpu_throttle_active()) {
1282         info->has_cpu_throttle_percentage = true;
1283         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1284     }
1285 
1286     if (s->state != MIGRATION_STATUS_COMPLETED) {
1287         info->ram->remaining = ram_bytes_remaining();
1288         info->ram->dirty_pages_rate =
1289            stat64_get(&mig_stats.dirty_pages_rate);
1290     }
1291 
1292     if (migrate_dirty_limit() && dirtylimit_in_service()) {
1293         info->has_dirty_limit_throttle_time_per_round = true;
1294         info->dirty_limit_throttle_time_per_round =
1295                             dirtylimit_throttle_time_per_round();
1296 
1297         info->has_dirty_limit_ring_full_time = true;
1298         info->dirty_limit_ring_full_time = dirtylimit_ring_full_time();
1299     }
1300 }
1301 
1302 static void fill_source_migration_info(MigrationInfo *info)
1303 {
1304     MigrationState *s = migrate_get_current();
1305     int state = qatomic_read(&s->state);
1306     GSList *cur_blocker = migration_blockers[migrate_mode()];
1307 
1308     info->blocked_reasons = NULL;
1309 
1310     /*
1311      * There are two types of reasons a migration might be blocked;
1312      * a) devices marked in VMState as non-migratable, and
1313      * b) Explicit migration blockers
1314      * We need to add both of them here.
1315      */
1316     qemu_savevm_non_migratable_list(&info->blocked_reasons);
1317 
1318     while (cur_blocker) {
1319         QAPI_LIST_PREPEND(info->blocked_reasons,
1320                           g_strdup(error_get_pretty(cur_blocker->data)));
1321         cur_blocker = g_slist_next(cur_blocker);
1322     }
1323     info->has_blocked_reasons = info->blocked_reasons != NULL;
1324 
1325     switch (state) {
1326     case MIGRATION_STATUS_NONE:
1327         /* no migration has happened ever */
1328         /* do not overwrite destination migration status */
1329         return;
1330     case MIGRATION_STATUS_SETUP:
1331         info->has_status = true;
1332         info->has_total_time = false;
1333         break;
1334     case MIGRATION_STATUS_ACTIVE:
1335     case MIGRATION_STATUS_CANCELLING:
1336     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1337     case MIGRATION_STATUS_PRE_SWITCHOVER:
1338     case MIGRATION_STATUS_DEVICE:
1339     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1340     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1341     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1342         /* TODO add some postcopy stats */
1343         populate_time_info(info, s);
1344         populate_ram_info(info, s);
1345         migration_populate_vfio_info(info);
1346         break;
1347     case MIGRATION_STATUS_COLO:
1348         info->has_status = true;
1349         /* TODO: display COLO specific information (checkpoint info etc.) */
1350         break;
1351     case MIGRATION_STATUS_COMPLETED:
1352         populate_time_info(info, s);
1353         populate_ram_info(info, s);
1354         migration_populate_vfio_info(info);
1355         break;
1356     case MIGRATION_STATUS_FAILED:
1357         info->has_status = true;
1358         break;
1359     case MIGRATION_STATUS_CANCELLED:
1360         info->has_status = true;
1361         break;
1362     case MIGRATION_STATUS_WAIT_UNPLUG:
1363         info->has_status = true;
1364         break;
1365     }
1366     info->status = state;
1367 
1368     QEMU_LOCK_GUARD(&s->error_mutex);
1369     if (s->error) {
1370         info->error_desc = g_strdup(error_get_pretty(s->error));
1371     }
1372 }
1373 
1374 static void fill_destination_migration_info(MigrationInfo *info)
1375 {
1376     MigrationIncomingState *mis = migration_incoming_get_current();
1377 
1378     if (mis->socket_address_list) {
1379         info->has_socket_address = true;
1380         info->socket_address =
1381             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1382     }
1383 
1384     switch (mis->state) {
1385     case MIGRATION_STATUS_SETUP:
1386     case MIGRATION_STATUS_CANCELLING:
1387     case MIGRATION_STATUS_CANCELLED:
1388     case MIGRATION_STATUS_ACTIVE:
1389     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1390     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1391     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1392     case MIGRATION_STATUS_FAILED:
1393     case MIGRATION_STATUS_COLO:
1394         info->has_status = true;
1395         break;
1396     case MIGRATION_STATUS_COMPLETED:
1397         info->has_status = true;
1398         fill_destination_postcopy_migration_info(info);
1399         break;
1400     default:
1401         return;
1402     }
1403     info->status = mis->state;
1404 
1405     if (!info->error_desc) {
1406         MigrationState *s = migrate_get_current();
1407         QEMU_LOCK_GUARD(&s->error_mutex);
1408 
1409         if (s->error) {
1410             info->error_desc = g_strdup(error_get_pretty(s->error));
1411         }
1412     }
1413 }
1414 
1415 MigrationInfo *qmp_query_migrate(Error **errp)
1416 {
1417     MigrationInfo *info = g_malloc0(sizeof(*info));
1418 
1419     fill_destination_migration_info(info);
1420     fill_source_migration_info(info);
1421 
1422     return info;
1423 }
1424 
1425 void qmp_migrate_start_postcopy(Error **errp)
1426 {
1427     MigrationState *s = migrate_get_current();
1428 
1429     if (!migrate_postcopy()) {
1430         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1431                          " the start of migration");
1432         return;
1433     }
1434 
1435     if (s->state == MIGRATION_STATUS_NONE) {
1436         error_setg(errp, "Postcopy must be started after migration has been"
1437                          " started");
1438         return;
1439     }
1440     /*
1441      * we don't error if migration has finished since that would be racy
1442      * with issuing this command.
1443      */
1444     qatomic_set(&s->start_postcopy, true);
1445 }
1446 
1447 /* shared migration helpers */
1448 
1449 void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
1450                        MigrationStatus new_state)
1451 {
1452     assert(new_state < MIGRATION_STATUS__MAX);
1453     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1454         trace_migrate_set_state(MigrationStatus_str(new_state));
1455         migrate_generate_event(new_state);
1456     }
1457 }
1458 
1459 static void migration_cleanup_json_writer(MigrationState *s)
1460 {
1461     g_clear_pointer(&s->vmdesc, json_writer_free);
1462 }
1463 
1464 static void migration_cleanup(MigrationState *s)
1465 {
1466     MigrationEventType type;
1467     QEMUFile *tmp = NULL;
1468 
1469     trace_migration_cleanup();
1470 
1471     migration_cleanup_json_writer(s);
1472 
1473     g_free(s->hostname);
1474     s->hostname = NULL;
1475 
1476     qemu_savevm_state_cleanup();
1477     cpr_state_close();
1478     migrate_hup_delete(s);
1479 
1480     close_return_path_on_source(s);
1481 
1482     if (s->migration_thread_running) {
1483         bql_unlock();
1484         qemu_thread_join(&s->thread);
1485         s->migration_thread_running = false;
1486         bql_lock();
1487     }
1488 
1489     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1490         /*
1491          * Close the file handle without the lock to make sure the critical
1492          * section won't block for long.
1493          */
1494         tmp = s->to_dst_file;
1495         s->to_dst_file = NULL;
1496     }
1497 
1498     if (tmp) {
1499         /*
1500          * We only need to shutdown multifd if tmp!=NULL, because if
1501          * tmp==NULL, it means the main channel isn't established, while
1502          * multifd is only setup after that (in migration_thread()).
1503          */
1504         multifd_send_shutdown();
1505         migration_ioc_unregister_yank_from_file(tmp);
1506         qemu_fclose(tmp);
1507     }
1508 
1509     assert(!migration_is_active());
1510 
1511     if (s->state == MIGRATION_STATUS_CANCELLING) {
1512         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1513                           MIGRATION_STATUS_CANCELLED);
1514     }
1515 
1516     if (s->error) {
1517         /* It is used on info migrate.  We can't free it */
1518         error_report_err(error_copy(s->error));
1519     }
1520     type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED :
1521                                      MIG_EVENT_PRECOPY_DONE;
1522     migration_call_notifiers(s, type, NULL);
1523     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1524 }
1525 
1526 static void migration_cleanup_bh(void *opaque)
1527 {
1528     migration_cleanup(opaque);
1529 }
1530 
1531 void migrate_set_error(MigrationState *s, const Error *error)
1532 {
1533     QEMU_LOCK_GUARD(&s->error_mutex);
1534 
1535     trace_migrate_error(error_get_pretty(error));
1536 
1537     if (!s->error) {
1538         s->error = error_copy(error);
1539     }
1540 }
1541 
1542 bool migrate_has_error(MigrationState *s)
1543 {
1544     /* The lock is not helpful here, but still follow the rule */
1545     QEMU_LOCK_GUARD(&s->error_mutex);
1546     return qatomic_read(&s->error);
1547 }
1548 
1549 static void migrate_error_free(MigrationState *s)
1550 {
1551     QEMU_LOCK_GUARD(&s->error_mutex);
1552     if (s->error) {
1553         error_free(s->error);
1554         s->error = NULL;
1555     }
1556 }
1557 
1558 static void migration_connect_set_error(MigrationState *s, const Error *error)
1559 {
1560     MigrationStatus current = s->state;
1561     MigrationStatus next;
1562 
1563     assert(s->to_dst_file == NULL);
1564 
1565     switch (current) {
1566     case MIGRATION_STATUS_SETUP:
1567         next = MIGRATION_STATUS_FAILED;
1568         break;
1569     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1570         /* Never fail a postcopy migration; switch back to PAUSED instead */
1571         next = MIGRATION_STATUS_POSTCOPY_PAUSED;
1572         break;
1573     default:
1574         /*
1575          * This really shouldn't happen. Just be careful to not crash a VM
1576          * just for this.  Instead, dump something.
1577          */
1578         error_report("%s: Illegal migration status (%s) detected",
1579                      __func__, MigrationStatus_str(current));
1580         return;
1581     }
1582 
1583     migrate_set_state(&s->state, current, next);
1584     migrate_set_error(s, error);
1585 }
1586 
1587 void migration_cancel(void)
1588 {
1589     MigrationState *s = migrate_get_current();
1590     int old_state ;
1591     bool setup = (s->state == MIGRATION_STATUS_SETUP);
1592 
1593     trace_migration_cancel();
1594 
1595     if (migrate_dirty_limit()) {
1596         qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
1597     }
1598 
1599     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1600         if (s->rp_state.from_dst_file) {
1601             /* shutdown the rp socket, so causing the rp thread to shutdown */
1602             qemu_file_shutdown(s->rp_state.from_dst_file);
1603         }
1604     }
1605 
1606     do {
1607         old_state = s->state;
1608         if (!migration_is_running()) {
1609             break;
1610         }
1611         /* If the migration is paused, kick it out of the pause */
1612         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1613             qemu_sem_post(&s->pause_sem);
1614         }
1615         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1616     } while (s->state != MIGRATION_STATUS_CANCELLING);
1617 
1618     /*
1619      * If we're unlucky the migration code might be stuck somewhere in a
1620      * send/write while the network has failed and is waiting to timeout;
1621      * if we've got shutdown(2) available then we can force it to quit.
1622      */
1623     if (s->state == MIGRATION_STATUS_CANCELLING) {
1624         WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1625             if (s->to_dst_file) {
1626                 qemu_file_shutdown(s->to_dst_file);
1627             }
1628         }
1629     }
1630 
1631     /*
1632      * If qmp_migrate_finish has not been called, then there is no path that
1633      * will complete the cancellation.  Do it now.
1634      */
1635     if (setup && !s->to_dst_file) {
1636         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1637                           MIGRATION_STATUS_CANCELLED);
1638         cpr_state_close();
1639         migrate_hup_delete(s);
1640     }
1641 }
1642 
1643 void migration_add_notifier_mode(NotifierWithReturn *notify,
1644                                  MigrationNotifyFunc func, MigMode mode)
1645 {
1646     notify->notify = (NotifierWithReturnFunc)func;
1647     notifier_with_return_list_add(&migration_state_notifiers[mode], notify);
1648 }
1649 
1650 void migration_add_notifier(NotifierWithReturn *notify,
1651                             MigrationNotifyFunc func)
1652 {
1653     migration_add_notifier_mode(notify, func, MIG_MODE_NORMAL);
1654 }
1655 
1656 void migration_remove_notifier(NotifierWithReturn *notify)
1657 {
1658     if (notify->notify) {
1659         notifier_with_return_remove(notify);
1660         notify->notify = NULL;
1661     }
1662 }
1663 
1664 int migration_call_notifiers(MigrationState *s, MigrationEventType type,
1665                              Error **errp)
1666 {
1667     MigMode mode = s->parameters.mode;
1668     MigrationEvent e;
1669     int ret;
1670 
1671     e.type = type;
1672     ret = notifier_with_return_list_notify(&migration_state_notifiers[mode],
1673                                            &e, errp);
1674     assert(!ret || type == MIG_EVENT_PRECOPY_SETUP);
1675     return ret;
1676 }
1677 
1678 bool migration_has_failed(MigrationState *s)
1679 {
1680     return (s->state == MIGRATION_STATUS_CANCELLED ||
1681             s->state == MIGRATION_STATUS_FAILED);
1682 }
1683 
1684 bool migration_in_postcopy(void)
1685 {
1686     MigrationState *s = migrate_get_current();
1687 
1688     switch (s->state) {
1689     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1690     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1691     case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
1692     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1693         return true;
1694     default:
1695         return false;
1696     }
1697 }
1698 
1699 bool migration_postcopy_is_alive(MigrationStatus state)
1700 {
1701     switch (state) {
1702     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1703     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1704         return true;
1705     default:
1706         return false;
1707     }
1708 }
1709 
1710 bool migration_in_incoming_postcopy(void)
1711 {
1712     PostcopyState ps = postcopy_state_get();
1713 
1714     return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
1715 }
1716 
1717 bool migration_incoming_postcopy_advised(void)
1718 {
1719     PostcopyState ps = postcopy_state_get();
1720 
1721     return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
1722 }
1723 
1724 bool migration_in_bg_snapshot(void)
1725 {
1726     return migrate_background_snapshot() && migration_is_running();
1727 }
1728 
1729 bool migration_thread_is_self(void)
1730 {
1731     MigrationState *s = current_migration;
1732 
1733     return qemu_thread_is_self(&s->thread);
1734 }
1735 
1736 bool migrate_mode_is_cpr(MigrationState *s)
1737 {
1738     MigMode mode = s->parameters.mode;
1739     return mode == MIG_MODE_CPR_REBOOT ||
1740            mode == MIG_MODE_CPR_TRANSFER;
1741 }
1742 
1743 int migrate_init(MigrationState *s, Error **errp)
1744 {
1745     int ret;
1746 
1747     ret = qemu_savevm_state_prepare(errp);
1748     if (ret) {
1749         return ret;
1750     }
1751 
1752     /*
1753      * Reinitialise all migration state, except
1754      * parameters/capabilities that the user set, and
1755      * locks.
1756      */
1757     s->to_dst_file = NULL;
1758     s->state = MIGRATION_STATUS_NONE;
1759     s->rp_state.from_dst_file = NULL;
1760     s->mbps = 0.0;
1761     s->pages_per_second = 0.0;
1762     s->downtime = 0;
1763     s->expected_downtime = 0;
1764     s->setup_time = 0;
1765     s->start_postcopy = false;
1766     s->migration_thread_running = false;
1767     error_free(s->error);
1768     s->error = NULL;
1769 
1770     if (should_send_vmdesc()) {
1771         s->vmdesc = json_writer_new(false);
1772     }
1773 
1774     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1775 
1776     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1777     s->total_time = 0;
1778     s->vm_old_state = -1;
1779     s->iteration_initial_bytes = 0;
1780     s->threshold_size = 0;
1781     s->switchover_acked = false;
1782     s->rdma_migration = false;
1783     /*
1784      * set mig_stats memory to zero for a new migration
1785      */
1786     memset(&mig_stats, 0, sizeof(mig_stats));
1787     migration_reset_vfio_bytes_transferred();
1788 
1789     return 0;
1790 }
1791 
1792 static bool is_busy(Error **reasonp, Error **errp)
1793 {
1794     ERRP_GUARD();
1795 
1796     /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
1797     if (runstate_check(RUN_STATE_SAVE_VM) || migration_is_running()) {
1798         error_propagate_prepend(errp, *reasonp,
1799                                 "disallowing migration blocker "
1800                                 "(migration/snapshot in progress) for: ");
1801         *reasonp = NULL;
1802         return true;
1803     }
1804     return false;
1805 }
1806 
1807 static bool is_only_migratable(Error **reasonp, Error **errp, int modes)
1808 {
1809     ERRP_GUARD();
1810 
1811     if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) {
1812         error_propagate_prepend(errp, *reasonp,
1813                                 "disallowing migration blocker "
1814                                 "(--only-migratable) for: ");
1815         *reasonp = NULL;
1816         return true;
1817     }
1818     return false;
1819 }
1820 
1821 static int get_modes(MigMode mode, va_list ap)
1822 {
1823     int modes = 0;
1824 
1825     while (mode != -1 && mode != MIG_MODE_ALL) {
1826         assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX);
1827         modes |= BIT(mode);
1828         mode = va_arg(ap, MigMode);
1829     }
1830     if (mode == MIG_MODE_ALL) {
1831         modes = BIT(MIG_MODE__MAX) - 1;
1832     }
1833     return modes;
1834 }
1835 
1836 static int add_blockers(Error **reasonp, Error **errp, int modes)
1837 {
1838     for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
1839         if (modes & BIT(mode)) {
1840             migration_blockers[mode] = g_slist_prepend(migration_blockers[mode],
1841                                                        *reasonp);
1842         }
1843     }
1844     return 0;
1845 }
1846 
1847 int migrate_add_blocker(Error **reasonp, Error **errp)
1848 {
1849     return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL);
1850 }
1851 
1852 int migrate_add_blocker_normal(Error **reasonp, Error **errp)
1853 {
1854     return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1);
1855 }
1856 
1857 int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...)
1858 {
1859     int modes;
1860     va_list ap;
1861 
1862     va_start(ap, mode);
1863     modes = get_modes(mode, ap);
1864     va_end(ap);
1865 
1866     if (is_only_migratable(reasonp, errp, modes)) {
1867         return -EACCES;
1868     } else if (is_busy(reasonp, errp)) {
1869         return -EBUSY;
1870     }
1871     return add_blockers(reasonp, errp, modes);
1872 }
1873 
1874 int migrate_add_blocker_internal(Error **reasonp, Error **errp)
1875 {
1876     int modes = BIT(MIG_MODE__MAX) - 1;
1877 
1878     if (is_busy(reasonp, errp)) {
1879         return -EBUSY;
1880     }
1881     return add_blockers(reasonp, errp, modes);
1882 }
1883 
1884 void migrate_del_blocker(Error **reasonp)
1885 {
1886     if (*reasonp) {
1887         for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) {
1888             migration_blockers[mode] = g_slist_remove(migration_blockers[mode],
1889                                                       *reasonp);
1890         }
1891         error_free(*reasonp);
1892         *reasonp = NULL;
1893     }
1894 }
1895 
1896 void qmp_migrate_incoming(const char *uri, bool has_channels,
1897                           MigrationChannelList *channels,
1898                           bool has_exit_on_error, bool exit_on_error,
1899                           Error **errp)
1900 {
1901     Error *local_err = NULL;
1902     static bool once = true;
1903     MigrationIncomingState *mis = migration_incoming_get_current();
1904 
1905     if (!once) {
1906         error_setg(errp, "The incoming migration has already been started");
1907         return;
1908     }
1909     if (!runstate_check(RUN_STATE_INMIGRATE)) {
1910         error_setg(errp, "'-incoming' was not specified on the command line");
1911         return;
1912     }
1913 
1914     if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
1915         return;
1916     }
1917 
1918     mis->exit_on_error =
1919         has_exit_on_error ? exit_on_error : INMIGRATE_DEFAULT_EXIT_ON_ERROR;
1920 
1921     qemu_start_incoming_migration(uri, has_channels, channels, &local_err);
1922 
1923     if (local_err) {
1924         yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1925         error_propagate(errp, local_err);
1926         return;
1927     }
1928 
1929     /*
1930      * Making sure MigrationState is available until incoming migration
1931      * completes.
1932      *
1933      * NOTE: QEMU _might_ leak this refcount in some failure paths, but
1934      * that's OK.  This is the minimum change we need to at least making
1935      * sure success case is clean on the refcount.  We can try harder to
1936      * make it accurate for any kind of failures, but it might be an
1937      * overkill and doesn't bring us much benefit.
1938      */
1939     migrate_incoming_ref_outgoing_state();
1940     once = false;
1941 }
1942 
1943 void qmp_migrate_recover(const char *uri, Error **errp)
1944 {
1945     MigrationIncomingState *mis = migration_incoming_get_current();
1946 
1947     /*
1948      * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
1949      * callers (no one should ignore a recover failure); if there is, it's a
1950      * programming error.
1951      */
1952     assert(errp);
1953 
1954     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1955         error_setg(errp, "Migrate recover can only be run "
1956                    "when postcopy is paused.");
1957         return;
1958     }
1959 
1960     /* If there's an existing transport, release it */
1961     migration_incoming_transport_cleanup(mis);
1962 
1963     /*
1964      * Note that this call will never start a real migration; it will
1965      * only re-setup the migration stream and poke existing migration
1966      * to continue using that newly established channel.
1967      */
1968     qemu_start_incoming_migration(uri, false, NULL, errp);
1969 }
1970 
1971 void qmp_migrate_pause(Error **errp)
1972 {
1973     MigrationState *ms = migrate_get_current();
1974     MigrationIncomingState *mis = migration_incoming_get_current();
1975     int ret = 0;
1976 
1977     if (migration_postcopy_is_alive(ms->state)) {
1978         /* Source side, during postcopy */
1979         Error *error = NULL;
1980 
1981         /* Tell the core migration that we're pausing */
1982         error_setg(&error, "Postcopy migration is paused by the user");
1983         migrate_set_error(ms, error);
1984         error_free(error);
1985 
1986         qemu_mutex_lock(&ms->qemu_file_lock);
1987         if (ms->to_dst_file) {
1988             ret = qemu_file_shutdown(ms->to_dst_file);
1989         }
1990         qemu_mutex_unlock(&ms->qemu_file_lock);
1991         if (ret) {
1992             error_setg(errp, "Failed to pause source migration");
1993         }
1994 
1995         /*
1996          * Kick the migration thread out of any waiting windows (on behalf
1997          * of the rp thread).
1998          */
1999         migration_rp_kick(ms);
2000 
2001         return;
2002     }
2003 
2004     if (migration_postcopy_is_alive(mis->state)) {
2005         ret = qemu_file_shutdown(mis->from_src_file);
2006         if (ret) {
2007             error_setg(errp, "Failed to pause destination migration");
2008         }
2009         return;
2010     }
2011 
2012     error_setg(errp, "migrate-pause is currently only supported "
2013                "during postcopy-active or postcopy-recover state");
2014 }
2015 
2016 bool migration_is_blocked(Error **errp)
2017 {
2018     GSList *blockers = migration_blockers[migrate_mode()];
2019 
2020     if (qemu_savevm_state_blocked(errp)) {
2021         return true;
2022     }
2023 
2024     if (blockers) {
2025         error_propagate(errp, error_copy(blockers->data));
2026         return true;
2027     }
2028 
2029     return false;
2030 }
2031 
2032 /* Returns true if continue to migrate, or false if error detected */
2033 static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
2034 {
2035     if (resume) {
2036         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2037             error_setg(errp, "Cannot resume if there is no "
2038                        "paused migration");
2039             return false;
2040         }
2041 
2042         /*
2043          * Postcopy recovery won't work well with release-ram
2044          * capability since release-ram will drop the page buffer as
2045          * long as the page is put into the send buffer.  So if there
2046          * is a network failure happened, any page buffers that have
2047          * not yet reached the destination VM but have already been
2048          * sent from the source VM will be lost forever.  Let's refuse
2049          * the client from resuming such a postcopy migration.
2050          * Luckily release-ram was designed to only be used when src
2051          * and destination VMs are on the same host, so it should be
2052          * fine.
2053          */
2054         if (migrate_release_ram()) {
2055             error_setg(errp, "Postcopy recovery cannot work "
2056                        "when release-ram capability is set");
2057             return false;
2058         }
2059 
2060         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
2061                           MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
2062 
2063         /* This is a resume, skip init status */
2064         return true;
2065     }
2066 
2067     if (migration_is_running()) {
2068         error_setg(errp, "There's a migration process in progress");
2069         return false;
2070     }
2071 
2072     if (runstate_check(RUN_STATE_INMIGRATE)) {
2073         error_setg(errp, "Guest is waiting for an incoming migration");
2074         return false;
2075     }
2076 
2077     if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2078         error_setg(errp, "Can't migrate the vm that was paused due to "
2079                    "previous migration");
2080         return false;
2081     }
2082 
2083     if (kvm_hwpoisoned_mem()) {
2084         error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
2085                    "please reboot the vm and try again");
2086         return false;
2087     }
2088 
2089     if (migration_is_blocked(errp)) {
2090         return false;
2091     }
2092 
2093     if (migrate_mapped_ram()) {
2094         if (migrate_tls()) {
2095             error_setg(errp, "Cannot use TLS with mapped-ram");
2096             return false;
2097         }
2098 
2099         if (migrate_multifd_compression()) {
2100             error_setg(errp, "Cannot use compression with mapped-ram");
2101             return false;
2102         }
2103     }
2104 
2105     if (migrate_mode_is_cpr(s)) {
2106         const char *conflict = NULL;
2107 
2108         if (migrate_postcopy()) {
2109             conflict = "postcopy";
2110         } else if (migrate_background_snapshot()) {
2111             conflict = "background snapshot";
2112         } else if (migrate_colo()) {
2113             conflict = "COLO";
2114         }
2115 
2116         if (conflict) {
2117             error_setg(errp, "Cannot use %s with CPR", conflict);
2118             return false;
2119         }
2120     }
2121 
2122     if (migrate_init(s, errp)) {
2123         return false;
2124     }
2125 
2126     return true;
2127 }
2128 
2129 static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
2130                                Error **errp);
2131 
2132 static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb,
2133                             void *opaque)
2134 {
2135         s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP);
2136         g_source_set_callback(s->hup_source, cb, opaque, NULL);
2137         g_source_attach(s->hup_source, NULL);
2138 }
2139 
2140 static void migrate_hup_delete(MigrationState *s)
2141 {
2142     if (s->hup_source) {
2143         g_source_destroy(s->hup_source);
2144         g_source_unref(s->hup_source);
2145         s->hup_source = NULL;
2146     }
2147 }
2148 
2149 static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
2150                                       GIOCondition cond,
2151                                       void *opaque)
2152 {
2153     MigrationAddress *addr = opaque;
2154 
2155     qmp_migrate_finish(addr, false, NULL);
2156 
2157     cpr_state_close();
2158     migrate_hup_delete(migrate_get_current());
2159     qapi_free_MigrationAddress(addr);
2160     return G_SOURCE_REMOVE;
2161 }
2162 
2163 void qmp_migrate(const char *uri, bool has_channels,
2164                  MigrationChannelList *channels, bool has_detach, bool detach,
2165                  bool has_resume, bool resume, Error **errp)
2166 {
2167     bool resume_requested;
2168     Error *local_err = NULL;
2169     MigrationState *s = migrate_get_current();
2170     g_autoptr(MigrationChannel) channel = NULL;
2171     MigrationAddress *addr = NULL;
2172     MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL };
2173     MigrationChannel *cpr_channel = NULL;
2174 
2175     /*
2176      * Having preliminary checks for uri and channel
2177      */
2178     if (!uri == !channels) {
2179         error_setg(errp, "need either 'uri' or 'channels' argument");
2180         return;
2181     }
2182 
2183     if (channels) {
2184         for ( ; channels; channels = channels->next) {
2185             MigrationChannelType type = channels->value->channel_type;
2186 
2187             if (channelv[type]) {
2188                 error_setg(errp, "Channel list has more than one %s entry",
2189                            MigrationChannelType_str(type));
2190                 return;
2191             }
2192             channelv[type] = channels->value;
2193         }
2194         cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR];
2195         addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr;
2196         if (!addr) {
2197             error_setg(errp, "Channel list has no main entry");
2198             return;
2199         }
2200     }
2201 
2202     if (uri) {
2203         /* caller uses the old URI syntax */
2204         if (!migrate_uri_parse(uri, &channel, errp)) {
2205             return;
2206         }
2207         addr = channel->addr;
2208     }
2209 
2210     /* transport mechanism not suitable for migration? */
2211     if (!migration_channels_and_transport_compatible(addr, errp)) {
2212         return;
2213     }
2214 
2215     if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
2216         error_setg(errp, "missing 'cpr' migration channel");
2217         return;
2218     }
2219 
2220     resume_requested = has_resume && resume;
2221     if (!migrate_prepare(s, resume_requested, errp)) {
2222         /* Error detected, put into errp */
2223         return;
2224     }
2225 
2226     if (cpr_state_save(cpr_channel, &local_err)) {
2227         goto out;
2228     }
2229 
2230     /*
2231      * For cpr-transfer, the target may not be listening yet on the migration
2232      * channel, because first it must finish cpr_load_state.  The target tells
2233      * us it is listening by closing the cpr-state socket.  Wait for that HUP
2234      * event before connecting in qmp_migrate_finish.
2235      *
2236      * The HUP could occur because the target fails while reading CPR state,
2237      * in which case the target will not listen for the incoming migration
2238      * connection, so qmp_migrate_finish will fail to connect, and then recover.
2239      */
2240     if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
2241         migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb,
2242                         QAPI_CLONE(MigrationAddress, addr));
2243 
2244     } else {
2245         qmp_migrate_finish(addr, resume_requested, errp);
2246     }
2247 
2248 out:
2249     if (local_err) {
2250         migration_connect_set_error(s, local_err);
2251         error_propagate(errp, local_err);
2252     }
2253 }
2254 
2255 static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
2256                                Error **errp)
2257 {
2258     MigrationState *s = migrate_get_current();
2259     Error *local_err = NULL;
2260 
2261     if (!resume_requested) {
2262         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2263             return;
2264         }
2265     }
2266 
2267     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
2268         SocketAddress *saddr = &addr->u.socket;
2269         if (saddr->type == SOCKET_ADDRESS_TYPE_INET ||
2270             saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
2271             saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) {
2272             socket_start_outgoing_migration(s, saddr, &local_err);
2273         } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) {
2274             fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err);
2275         }
2276 #ifdef CONFIG_RDMA
2277     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
2278         rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err);
2279 #endif
2280     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
2281         exec_start_outgoing_migration(s, addr->u.exec.args, &local_err);
2282     } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
2283         file_start_outgoing_migration(s, &addr->u.file, &local_err);
2284     } else {
2285         error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri",
2286                    "a valid migration protocol");
2287         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2288                           MIGRATION_STATUS_FAILED);
2289     }
2290 
2291     if (local_err) {
2292         if (!resume_requested) {
2293             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2294         }
2295         migration_connect_set_error(s, local_err);
2296         error_propagate(errp, local_err);
2297         return;
2298     }
2299 }
2300 
2301 void qmp_migrate_cancel(Error **errp)
2302 {
2303     /*
2304      * After postcopy migration has started, the source machine is not
2305      * recoverable in case of a migration error. This also means the
2306      * cancel command cannot be used as cancel should allow the
2307      * machine to continue operation.
2308      */
2309     if (migration_in_postcopy()) {
2310         error_setg(errp, "Postcopy migration in progress, cannot cancel.");
2311         return;
2312     }
2313 
2314     migration_cancel();
2315 }
2316 
2317 void qmp_migrate_continue(MigrationStatus state, Error **errp)
2318 {
2319     MigrationState *s = migrate_get_current();
2320     if (s->state != state) {
2321         error_setg(errp,  "Migration not in expected state: %s",
2322                    MigrationStatus_str(s->state));
2323         return;
2324     }
2325     qemu_sem_post(&s->pause_sem);
2326 }
2327 
2328 int migration_rp_wait(MigrationState *s)
2329 {
2330     /* If migration has failure already, ignore the wait */
2331     if (migrate_has_error(s)) {
2332         return -1;
2333     }
2334 
2335     qemu_sem_wait(&s->rp_state.rp_sem);
2336 
2337     /* After wait, double check that there's no failure */
2338     if (migrate_has_error(s)) {
2339         return -1;
2340     }
2341 
2342     return 0;
2343 }
2344 
2345 void migration_rp_kick(MigrationState *s)
2346 {
2347     qemu_sem_post(&s->rp_state.rp_sem);
2348 }
2349 
2350 static struct rp_cmd_args {
2351     ssize_t     len; /* -1 = variable */
2352     const char *name;
2353 } rp_cmd_args[] = {
2354     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2355     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2356     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2357     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2358     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2359     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2360     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2361     [MIG_RP_MSG_SWITCHOVER_ACK] = { .len =  0, .name = "SWITCHOVER_ACK" },
2362     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2363 };
2364 
2365 /*
2366  * Process a request for pages received on the return path,
2367  * We're allowed to send more than requested (e.g. to round to our page size)
2368  * and we don't need to send pages that have already been sent.
2369  */
2370 static void
2371 migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2372                             ram_addr_t start, size_t len, Error **errp)
2373 {
2374     long our_host_ps = qemu_real_host_page_size();
2375 
2376     trace_migrate_handle_rp_req_pages(rbname, start, len);
2377 
2378     /*
2379      * Since we currently insist on matching page sizes, just sanity check
2380      * we're being asked for whole host pages.
2381      */
2382     if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2383         !QEMU_IS_ALIGNED(len, our_host_ps)) {
2384         error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:"
2385                    RAM_ADDR_FMT " len: %zd", start, len);
2386         return;
2387     }
2388 
2389     ram_save_queue_pages(rbname, start, len, errp);
2390 }
2391 
2392 static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name,
2393                                           Error **errp)
2394 {
2395     RAMBlock *block = qemu_ram_block_by_name(block_name);
2396 
2397     if (!block) {
2398         error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'",
2399                    block_name);
2400         return false;
2401     }
2402 
2403     /* Fetch the received bitmap and refresh the dirty bitmap */
2404     return ram_dirty_bitmap_reload(s, block, errp);
2405 }
2406 
2407 static bool migrate_handle_rp_resume_ack(MigrationState *s,
2408                                          uint32_t value, Error **errp)
2409 {
2410     trace_source_return_path_thread_resume_ack(value);
2411 
2412     if (value != MIGRATION_RESUME_ACK_VALUE) {
2413         error_setg(errp, "illegal resume_ack value %"PRIu32, value);
2414         return false;
2415     }
2416 
2417     /* Now both sides are active. */
2418     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2419                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2420 
2421     /* Notify send thread that time to continue send pages */
2422     migration_rp_kick(s);
2423 
2424     return true;
2425 }
2426 
2427 /*
2428  * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
2429  * existed) in a safe way.
2430  */
2431 static void migration_release_dst_files(MigrationState *ms)
2432 {
2433     QEMUFile *file = NULL;
2434 
2435     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2436         /*
2437          * Reset the from_dst_file pointer first before releasing it, as we
2438          * can't block within lock section
2439          */
2440         file = ms->rp_state.from_dst_file;
2441         ms->rp_state.from_dst_file = NULL;
2442     }
2443 
2444     /*
2445      * Do the same to postcopy fast path socket too if there is.  No
2446      * locking needed because this qemufile should only be managed by
2447      * return path thread.
2448      */
2449     if (ms->postcopy_qemufile_src) {
2450         migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
2451         qemu_file_shutdown(ms->postcopy_qemufile_src);
2452         qemu_fclose(ms->postcopy_qemufile_src);
2453         ms->postcopy_qemufile_src = NULL;
2454     }
2455 
2456     qemu_fclose(file);
2457 }
2458 
2459 /*
2460  * Handles messages sent on the return path towards the source VM
2461  *
2462  */
2463 static void *source_return_path_thread(void *opaque)
2464 {
2465     MigrationState *ms = opaque;
2466     QEMUFile *rp = ms->rp_state.from_dst_file;
2467     uint16_t header_len, header_type;
2468     uint8_t buf[512];
2469     uint32_t tmp32, sibling_error;
2470     ram_addr_t start = 0; /* =0 to silence warning */
2471     size_t  len = 0, expected_len;
2472     Error *err = NULL;
2473     int res;
2474 
2475     trace_source_return_path_thread_entry();
2476     rcu_register_thread();
2477 
2478     while (migration_is_running()) {
2479         trace_source_return_path_thread_loop_top();
2480 
2481         header_type = qemu_get_be16(rp);
2482         header_len = qemu_get_be16(rp);
2483 
2484         if (qemu_file_get_error(rp)) {
2485             qemu_file_get_error_obj(rp, &err);
2486             goto out;
2487         }
2488 
2489         if (header_type >= MIG_RP_MSG_MAX ||
2490             header_type == MIG_RP_MSG_INVALID) {
2491             error_setg(&err, "Received invalid message 0x%04x length 0x%04x",
2492                        header_type, header_len);
2493             goto out;
2494         }
2495 
2496         if ((rp_cmd_args[header_type].len != -1 &&
2497             header_len != rp_cmd_args[header_type].len) ||
2498             header_len > sizeof(buf)) {
2499             error_setg(&err, "Received '%s' message (0x%04x) with"
2500                        "incorrect length %d expecting %zu",
2501                        rp_cmd_args[header_type].name, header_type, header_len,
2502                        (size_t)rp_cmd_args[header_type].len);
2503             goto out;
2504         }
2505 
2506         /* We know we've got a valid header by this point */
2507         res = qemu_get_buffer(rp, buf, header_len);
2508         if (res != header_len) {
2509             error_setg(&err, "Failed reading data for message 0x%04x"
2510                        " read %d expected %d",
2511                        header_type, res, header_len);
2512             goto out;
2513         }
2514 
2515         /* OK, we have the message and the data */
2516         switch (header_type) {
2517         case MIG_RP_MSG_SHUT:
2518             sibling_error = ldl_be_p(buf);
2519             trace_source_return_path_thread_shut(sibling_error);
2520             if (sibling_error) {
2521                 error_setg(&err, "Sibling indicated error %d", sibling_error);
2522             }
2523             /*
2524              * We'll let the main thread deal with closing the RP
2525              * we could do a shutdown(2) on it, but we're the only user
2526              * anyway, so there's nothing gained.
2527              */
2528             goto out;
2529 
2530         case MIG_RP_MSG_PONG:
2531             tmp32 = ldl_be_p(buf);
2532             trace_source_return_path_thread_pong(tmp32);
2533             qemu_sem_post(&ms->rp_state.rp_pong_acks);
2534             break;
2535 
2536         case MIG_RP_MSG_REQ_PAGES:
2537             start = ldq_be_p(buf);
2538             len = ldl_be_p(buf + 8);
2539             migrate_handle_rp_req_pages(ms, NULL, start, len, &err);
2540             if (err) {
2541                 goto out;
2542             }
2543             break;
2544 
2545         case MIG_RP_MSG_REQ_PAGES_ID:
2546             expected_len = 12 + 1; /* header + termination */
2547 
2548             if (header_len >= expected_len) {
2549                 start = ldq_be_p(buf);
2550                 len = ldl_be_p(buf + 8);
2551                 /* Now we expect an idstr */
2552                 tmp32 = buf[12]; /* Length of the following idstr */
2553                 buf[13 + tmp32] = '\0';
2554                 expected_len += tmp32;
2555             }
2556             if (header_len != expected_len) {
2557                 error_setg(&err, "Req_Page_id with length %d expecting %zd",
2558                            header_len, expected_len);
2559                 goto out;
2560             }
2561             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len,
2562                                         &err);
2563             if (err) {
2564                 goto out;
2565             }
2566             break;
2567 
2568         case MIG_RP_MSG_RECV_BITMAP:
2569             if (header_len < 1) {
2570                 error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name");
2571                 goto out;
2572             }
2573             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2574             buf[buf[0] + 1] = '\0';
2575             if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) {
2576                 goto out;
2577             }
2578             break;
2579 
2580         case MIG_RP_MSG_RESUME_ACK:
2581             tmp32 = ldl_be_p(buf);
2582             if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) {
2583                 goto out;
2584             }
2585             break;
2586 
2587         case MIG_RP_MSG_SWITCHOVER_ACK:
2588             ms->switchover_acked = true;
2589             trace_source_return_path_thread_switchover_acked();
2590             break;
2591 
2592         default:
2593             break;
2594         }
2595     }
2596 
2597 out:
2598     if (err) {
2599         migrate_set_error(ms, err);
2600         error_free(err);
2601         trace_source_return_path_thread_bad_end();
2602     }
2603 
2604     if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2605         /*
2606          * this will be extremely unlikely: that we got yet another network
2607          * issue during recovering of the 1st network failure.. during this
2608          * period the main migration thread can be waiting on rp_sem for
2609          * this thread to sync with the other side.
2610          *
2611          * When this happens, explicitly kick the migration thread out of
2612          * RECOVER stage and back to PAUSED, so the admin can try
2613          * everything again.
2614          */
2615         migration_rp_kick(ms);
2616     }
2617 
2618     trace_source_return_path_thread_end();
2619     rcu_unregister_thread();
2620 
2621     return NULL;
2622 }
2623 
2624 static int open_return_path_on_source(MigrationState *ms)
2625 {
2626     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2627     if (!ms->rp_state.from_dst_file) {
2628         return -1;
2629     }
2630 
2631     trace_open_return_path_on_source();
2632 
2633     qemu_thread_create(&ms->rp_state.rp_thread, MIGRATION_THREAD_SRC_RETURN,
2634                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2635     ms->rp_state.rp_thread_created = true;
2636 
2637     trace_open_return_path_on_source_continue();
2638 
2639     return 0;
2640 }
2641 
2642 /* Return true if error detected, or false otherwise */
2643 static bool close_return_path_on_source(MigrationState *ms)
2644 {
2645     if (!ms->rp_state.rp_thread_created) {
2646         return false;
2647     }
2648 
2649     trace_migration_return_path_end_before();
2650 
2651     /*
2652      * If this is a normal exit then the destination will send a SHUT
2653      * and the rp_thread will exit, however if there's an error we
2654      * need to cause it to exit. shutdown(2), if we have it, will
2655      * cause it to unblock if it's stuck waiting for the destination.
2656      */
2657     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2658         if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
2659             qemu_file_shutdown(ms->rp_state.from_dst_file);
2660         }
2661     }
2662 
2663     qemu_thread_join(&ms->rp_state.rp_thread);
2664     ms->rp_state.rp_thread_created = false;
2665     migration_release_dst_files(ms);
2666     trace_migration_return_path_end_after();
2667 
2668     /* Return path will persist the error in MigrationState when quit */
2669     return migrate_has_error(ms);
2670 }
2671 
2672 static inline void
2673 migration_wait_main_channel(MigrationState *ms)
2674 {
2675     /* Wait until one PONG message received */
2676     qemu_sem_wait(&ms->rp_state.rp_pong_acks);
2677 }
2678 
2679 /*
2680  * Switch from normal iteration to postcopy
2681  * Returns non-0 on error
2682  */
2683 static int postcopy_start(MigrationState *ms, Error **errp)
2684 {
2685     int ret;
2686     QIOChannelBuffer *bioc;
2687     QEMUFile *fb;
2688 
2689     /*
2690      * Now we're 100% sure to switch to postcopy, so JSON writer won't be
2691      * useful anymore.  Free the resources early if it is there.  Clearing
2692      * the vmdesc also means any follow up vmstate_save()s will start to
2693      * skip all JSON operations, which can shrink postcopy downtime.
2694      */
2695     migration_cleanup_json_writer(ms);
2696 
2697     if (migrate_postcopy_preempt()) {
2698         migration_wait_main_channel(ms);
2699         if (postcopy_preempt_establish_channel(ms)) {
2700             if (ms->state != MIGRATION_STATUS_CANCELLING) {
2701                 migrate_set_state(&ms->state, ms->state,
2702                                   MIGRATION_STATUS_FAILED);
2703             }
2704             error_setg(errp, "%s: Failed to establish preempt channel",
2705                        __func__);
2706             return -1;
2707         }
2708     }
2709 
2710     trace_postcopy_start();
2711     bql_lock();
2712     trace_postcopy_start_set_run();
2713 
2714     ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE);
2715     if (ret < 0) {
2716         error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__);
2717         goto fail;
2718     }
2719 
2720     if (!migration_switchover_start(ms, errp)) {
2721         goto fail;
2722     }
2723 
2724     /*
2725      * Cause any non-postcopiable, but iterative devices to
2726      * send out their final data.
2727      */
2728     ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true);
2729     if (ret) {
2730         error_setg(errp, "Postcopy save non-postcopiable iterables failed");
2731         goto fail;
2732     }
2733 
2734     /*
2735      * in Finish migrate and with the io-lock held everything should
2736      * be quiet, but we've potentially still got dirty pages and we
2737      * need to tell the destination to throw any pages it's already received
2738      * that are dirty
2739      */
2740     if (migrate_postcopy_ram()) {
2741         ram_postcopy_send_discard_bitmap(ms);
2742     }
2743 
2744     if (migrate_postcopy_ram()) {
2745         /* Ping just for debugging, helps line traces up */
2746         qemu_savevm_send_ping(ms->to_dst_file, 2);
2747     }
2748 
2749     /*
2750      * While loading the device state we may trigger page transfer
2751      * requests and the fd must be free to process those, and thus
2752      * the destination must read the whole device state off the fd before
2753      * it starts processing it.  Unfortunately the ad-hoc migration format
2754      * doesn't allow the destination to know the size to read without fully
2755      * parsing it through each devices load-state code (especially the open
2756      * coded devices that use get/put).
2757      * So we wrap the device state up in a package with a length at the start;
2758      * to do this we use a qemu_buf to hold the whole of the device state.
2759      */
2760     bioc = qio_channel_buffer_new(4096);
2761     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
2762     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
2763     object_unref(OBJECT(bioc));
2764 
2765     /*
2766      * Make sure the receiver can get incoming pages before we send the rest
2767      * of the state
2768      */
2769     qemu_savevm_send_postcopy_listen(fb);
2770 
2771     ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true);
2772     if (ret) {
2773         error_setg(errp, "Postcopy save non-iterable device states failed");
2774         goto fail_closefb;
2775     }
2776 
2777     if (migrate_postcopy_ram()) {
2778         qemu_savevm_send_ping(fb, 3);
2779     }
2780 
2781     qemu_savevm_send_postcopy_run(fb);
2782 
2783     /* <><> end of stuff going into the package */
2784 
2785     /* Last point of recovery; as soon as we send the package the destination
2786      * can open devices and potentially start running.
2787      * Lets just check again we've not got any errors.
2788      */
2789     ret = qemu_file_get_error(ms->to_dst_file);
2790     if (ret) {
2791         error_setg(errp, "postcopy_start: Migration stream errored (pre package)");
2792         goto fail_closefb;
2793     }
2794 
2795     /* Now send that blob */
2796     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2797         error_setg(errp, "%s: Failed to send packaged data", __func__);
2798         goto fail_closefb;
2799     }
2800     qemu_fclose(fb);
2801 
2802     /* Send a notify to give a chance for anything that needs to happen
2803      * at the transition to postcopy and after the device state; in particular
2804      * spice needs to trigger a transition now
2805      */
2806     migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL);
2807 
2808     migration_downtime_end(ms);
2809 
2810     if (migrate_postcopy_ram()) {
2811         /*
2812          * Although this ping is just for debug, it could potentially be
2813          * used for getting a better measurement of downtime at the source.
2814          */
2815         qemu_savevm_send_ping(ms->to_dst_file, 4);
2816     }
2817 
2818     if (migrate_release_ram()) {
2819         ram_postcopy_migrated_memory_release(ms);
2820     }
2821 
2822     ret = qemu_file_get_error(ms->to_dst_file);
2823     if (ret) {
2824         error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
2825         goto fail;
2826     }
2827     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
2828 
2829     /*
2830      * Now postcopy officially started, switch to postcopy bandwidth that
2831      * user specified.
2832      */
2833     migration_rate_set(migrate_max_postcopy_bandwidth());
2834 
2835     /* Now, switchover looks all fine, switching to postcopy-active */
2836     migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
2837                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2838 
2839     bql_unlock();
2840 
2841     return ret;
2842 
2843 fail_closefb:
2844     qemu_fclose(fb);
2845 fail:
2846     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2847                           MIGRATION_STATUS_FAILED);
2848     migration_block_activate(NULL);
2849     migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
2850     bql_unlock();
2851     return -1;
2852 }
2853 
2854 /**
2855  * @migration_switchover_prepare: Start VM switchover procedure
2856  *
2857  * @s: The migration state object pointer
2858  *
2859  * Prepares for the switchover, depending on "pause-before-switchover"
2860  * capability.
2861  *
2862  * If cap set, state machine goes like:
2863  *   [postcopy-]active -> pre-switchover -> device
2864  *
2865  * If cap not set:
2866  *   [postcopy-]active -> device
2867  *
2868  * Returns: true on success, false on interruptions.
2869  */
2870 static bool migration_switchover_prepare(MigrationState *s)
2871 {
2872     /* Concurrent cancellation?  Quit */
2873     if (s->state == MIGRATION_STATUS_CANCELLING) {
2874         return false;
2875     }
2876 
2877     /*
2878      * No matter precopy or postcopy, since we still hold BQL it must not
2879      * change concurrently to CANCELLING, so it must be either ACTIVE or
2880      * POSTCOPY_ACTIVE.
2881      */
2882     assert(migration_is_active());
2883 
2884     /* If the pre stage not requested, directly switch to DEVICE */
2885     if (!migrate_pause_before_switchover()) {
2886         migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE);
2887         return true;
2888     }
2889 
2890     /* Since leaving this state is not atomic with posting the semaphore
2891      * it's possible that someone could have issued multiple migrate_continue
2892      * and the semaphore is incorrectly positive at this point;
2893      * the docs say it's undefined to reinit a semaphore that's already
2894      * init'd, so use timedwait to eat up any existing posts.
2895      */
2896     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
2897         /* This block intentionally left blank */
2898     }
2899 
2900     /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
2901     migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
2902     bql_unlock();
2903 
2904     qemu_sem_wait(&s->pause_sem);
2905 
2906     bql_lock();
2907     /*
2908      * After BQL released and retaken, the state can be CANCELLING if it
2909      * happend during sem_wait().. Only change the state if it's still
2910      * pre-switchover.
2911      */
2912     migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2913                       MIGRATION_STATUS_DEVICE);
2914 
2915     return s->state == MIGRATION_STATUS_DEVICE;
2916 }
2917 
2918 static bool migration_switchover_start(MigrationState *s, Error **errp)
2919 {
2920     ERRP_GUARD();
2921 
2922     if (!migration_switchover_prepare(s)) {
2923         error_setg(errp, "Switchover is interrupted");
2924         return false;
2925     }
2926 
2927     /* Inactivate disks except in COLO */
2928     if (!migrate_colo()) {
2929         /*
2930          * Inactivate before sending QEMU_VM_EOF so that the
2931          * bdrv_activate_all() on the other end won't fail.
2932          */
2933         if (!migration_block_inactivate()) {
2934             error_setg(errp, "Block inactivate failed during switchover");
2935             return false;
2936         }
2937     }
2938 
2939     migration_rate_set(RATE_LIMIT_DISABLED);
2940 
2941     precopy_notify_complete();
2942 
2943     qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
2944 
2945     return true;
2946 }
2947 
2948 static int migration_completion_precopy(MigrationState *s)
2949 {
2950     int ret;
2951 
2952     bql_lock();
2953 
2954     if (!migrate_mode_is_cpr(s)) {
2955         ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
2956         if (ret < 0) {
2957             goto out_unlock;
2958         }
2959     }
2960 
2961     if (!migration_switchover_start(s, NULL)) {
2962         ret = -EFAULT;
2963         goto out_unlock;
2964     }
2965 
2966     ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false);
2967 out_unlock:
2968     bql_unlock();
2969     return ret;
2970 }
2971 
2972 static void migration_completion_postcopy(MigrationState *s)
2973 {
2974     trace_migration_completion_postcopy_end();
2975 
2976     bql_lock();
2977     qemu_savevm_state_complete_postcopy(s->to_dst_file);
2978     bql_unlock();
2979 
2980     /*
2981      * Shutdown the postcopy fast path thread.  This is only needed when dest
2982      * QEMU binary is old (7.1/7.2).  QEMU 8.0+ doesn't need this.
2983      */
2984     if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
2985         postcopy_preempt_shutdown_file(s);
2986     }
2987 
2988     trace_migration_completion_postcopy_end_after_complete();
2989 }
2990 
2991 /**
2992  * migration_completion: Used by migration_thread when there's not much left.
2993  *   The caller 'breaks' the loop when this returns.
2994  *
2995  * @s: Current migration state
2996  */
2997 static void migration_completion(MigrationState *s)
2998 {
2999     int ret = 0;
3000     Error *local_err = NULL;
3001 
3002     if (s->state == MIGRATION_STATUS_ACTIVE) {
3003         ret = migration_completion_precopy(s);
3004     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3005         migration_completion_postcopy(s);
3006     } else {
3007         ret = -1;
3008     }
3009 
3010     if (ret < 0) {
3011         goto fail;
3012     }
3013 
3014     if (close_return_path_on_source(s)) {
3015         goto fail;
3016     }
3017 
3018     if (qemu_file_get_error(s->to_dst_file)) {
3019         trace_migration_completion_file_err();
3020         goto fail;
3021     }
3022 
3023     if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
3024         /* COLO does not support postcopy */
3025         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3026                           MIGRATION_STATUS_COLO);
3027     } else {
3028         migration_completion_end(s);
3029     }
3030 
3031     return;
3032 
3033 fail:
3034     if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) {
3035         migrate_set_error(s, local_err);
3036         error_free(local_err);
3037     } else if (ret) {
3038         error_setg_errno(&local_err, -ret, "Error in migration completion");
3039         migrate_set_error(s, local_err);
3040         error_free(local_err);
3041     }
3042 
3043     if (s->state != MIGRATION_STATUS_CANCELLING) {
3044         migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
3045     }
3046 }
3047 
3048 /**
3049  * bg_migration_completion: Used by bg_migration_thread when after all the
3050  *   RAM has been saved. The caller 'breaks' the loop when this returns.
3051  *
3052  * @s: Current migration state
3053  */
3054 static void bg_migration_completion(MigrationState *s)
3055 {
3056     int current_active_state = s->state;
3057 
3058     if (s->state == MIGRATION_STATUS_ACTIVE) {
3059         /*
3060          * By this moment we have RAM content saved into the migration stream.
3061          * The next step is to flush the non-RAM content (device state)
3062          * right after the ram content. The device state has been stored into
3063          * the temporary buffer before RAM saving started.
3064          */
3065         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
3066         qemu_fflush(s->to_dst_file);
3067     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
3068         return;
3069     }
3070 
3071     if (qemu_file_get_error(s->to_dst_file)) {
3072         trace_migration_completion_file_err();
3073         goto fail;
3074     }
3075 
3076     migration_completion_end(s);
3077     return;
3078 
3079 fail:
3080     migrate_set_state(&s->state, current_active_state,
3081                       MIGRATION_STATUS_FAILED);
3082 }
3083 
3084 typedef enum MigThrError {
3085     /* No error detected */
3086     MIG_THR_ERR_NONE = 0,
3087     /* Detected error, but resumed successfully */
3088     MIG_THR_ERR_RECOVERED = 1,
3089     /* Detected fatal error, need to exit */
3090     MIG_THR_ERR_FATAL = 2,
3091 } MigThrError;
3092 
3093 static int postcopy_resume_handshake(MigrationState *s)
3094 {
3095     qemu_savevm_send_postcopy_resume(s->to_dst_file);
3096 
3097     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3098         if (migration_rp_wait(s)) {
3099             return -1;
3100         }
3101     }
3102 
3103     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3104         return 0;
3105     }
3106 
3107     return -1;
3108 }
3109 
3110 /* Return zero if success, or <0 for error */
3111 static int postcopy_do_resume(MigrationState *s)
3112 {
3113     int ret;
3114 
3115     /*
3116      * Call all the resume_prepare() hooks, so that modules can be
3117      * ready for the migration resume.
3118      */
3119     ret = qemu_savevm_state_resume_prepare(s);
3120     if (ret) {
3121         error_report("%s: resume_prepare() failure detected: %d",
3122                      __func__, ret);
3123         return ret;
3124     }
3125 
3126     /*
3127      * If preempt is enabled, re-establish the preempt channel.  Note that
3128      * we do it after resume prepare to make sure the main channel will be
3129      * created before the preempt channel.  E.g. with weak network, the
3130      * dest QEMU may get messed up with the preempt and main channels on
3131      * the order of connection setup.  This guarantees the correct order.
3132      */
3133     ret = postcopy_preempt_establish_channel(s);
3134     if (ret) {
3135         error_report("%s: postcopy_preempt_establish_channel(): %d",
3136                      __func__, ret);
3137         return ret;
3138     }
3139 
3140     /*
3141      * Last handshake with destination on the resume (destination will
3142      * switch to postcopy-active afterwards)
3143      */
3144     ret = postcopy_resume_handshake(s);
3145     if (ret) {
3146         error_report("%s: handshake failed: %d", __func__, ret);
3147         return ret;
3148     }
3149 
3150     return 0;
3151 }
3152 
3153 /*
3154  * We don't return until we are in a safe state to continue current
3155  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
3156  * MIG_THR_ERR_FATAL if unrecovery failure happened.
3157  */
3158 static MigThrError postcopy_pause(MigrationState *s)
3159 {
3160     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3161 
3162     while (true) {
3163         QEMUFile *file;
3164 
3165         /*
3166          * We're already pausing, so ignore any errors on the return
3167          * path and just wait for the thread to finish. It will be
3168          * re-created when we resume.
3169          */
3170         close_return_path_on_source(s);
3171 
3172         /*
3173          * Current channel is possibly broken. Release it.  Note that this is
3174          * guaranteed even without lock because to_dst_file should only be
3175          * modified by the migration thread.  That also guarantees that the
3176          * unregister of yank is safe too without the lock.  It should be safe
3177          * even to be within the qemu_file_lock, but we didn't do that to avoid
3178          * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3179          * the qemu_file_lock critical section as small as possible.
3180          */
3181         assert(s->to_dst_file);
3182         migration_ioc_unregister_yank_from_file(s->to_dst_file);
3183         qemu_mutex_lock(&s->qemu_file_lock);
3184         file = s->to_dst_file;
3185         s->to_dst_file = NULL;
3186         qemu_mutex_unlock(&s->qemu_file_lock);
3187 
3188         qemu_file_shutdown(file);
3189         qemu_fclose(file);
3190 
3191         migrate_set_state(&s->state, s->state,
3192                           MIGRATION_STATUS_POSTCOPY_PAUSED);
3193 
3194         error_report("Detected IO failure for postcopy. "
3195                      "Migration paused.");
3196 
3197         /*
3198          * We wait until things fixed up. Then someone will setup the
3199          * status back for us.
3200          */
3201         do {
3202             qemu_sem_wait(&s->postcopy_pause_sem);
3203         } while (postcopy_is_paused(s->state));
3204 
3205         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3206             /* Woken up by a recover procedure. Give it a shot */
3207 
3208             /* Do the resume logic */
3209             if (postcopy_do_resume(s) == 0) {
3210                 /* Let's continue! */
3211                 trace_postcopy_pause_continued();
3212                 return MIG_THR_ERR_RECOVERED;
3213             } else {
3214                 /*
3215                  * Something wrong happened during the recovery, let's
3216                  * pause again. Pause is always better than throwing
3217                  * data away.
3218                  */
3219                 continue;
3220             }
3221         } else {
3222             /* This is not right... Time to quit. */
3223             return MIG_THR_ERR_FATAL;
3224         }
3225     }
3226 }
3227 
3228 void migration_file_set_error(int ret, Error *err)
3229 {
3230     MigrationState *s = current_migration;
3231 
3232     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
3233         if (s->to_dst_file) {
3234             qemu_file_set_error_obj(s->to_dst_file, ret, err);
3235         } else if (err) {
3236             error_report_err(err);
3237         }
3238     }
3239 }
3240 
3241 static MigThrError migration_detect_error(MigrationState *s)
3242 {
3243     int ret;
3244     int state = s->state;
3245     Error *local_error = NULL;
3246 
3247     if (state == MIGRATION_STATUS_CANCELLING ||
3248         state == MIGRATION_STATUS_CANCELLED) {
3249         /* End the migration, but don't set the state to failed */
3250         return MIG_THR_ERR_FATAL;
3251     }
3252 
3253     /*
3254      * Try to detect any file errors.  Note that postcopy_qemufile_src will
3255      * be NULL when postcopy preempt is not enabled.
3256      */
3257     ret = qemu_file_get_error_obj_any(s->to_dst_file,
3258                                       s->postcopy_qemufile_src,
3259                                       &local_error);
3260     if (!ret) {
3261         /* Everything is fine */
3262         assert(!local_error);
3263         return MIG_THR_ERR_NONE;
3264     }
3265 
3266     if (local_error) {
3267         migrate_set_error(s, local_error);
3268         error_free(local_error);
3269     }
3270 
3271     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3272         /*
3273          * For postcopy, we allow the network to be down for a
3274          * while. After that, it can be continued by a
3275          * recovery phase.
3276          */
3277         return postcopy_pause(s);
3278     } else {
3279         /*
3280          * For precopy (or postcopy with error outside IO), we fail
3281          * with no time.
3282          */
3283         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3284         trace_migration_thread_file_err();
3285 
3286         /* Time to stop the migration, now. */
3287         return MIG_THR_ERR_FATAL;
3288     }
3289 }
3290 
3291 static void migration_completion_end(MigrationState *s)
3292 {
3293     uint64_t bytes = migration_transferred_bytes();
3294     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3295     int64_t transfer_time;
3296 
3297     /*
3298      * Take the BQL here so that query-migrate on the QMP thread sees:
3299      * - atomic update of s->total_time and s->mbps;
3300      * - correct ordering of s->mbps update vs. s->state;
3301      */
3302     bql_lock();
3303     migration_downtime_end(s);
3304     s->total_time = end_time - s->start_time;
3305     transfer_time = s->total_time - s->setup_time;
3306     if (transfer_time) {
3307         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3308     }
3309 
3310     migrate_set_state(&s->state, s->state,
3311                       MIGRATION_STATUS_COMPLETED);
3312     bql_unlock();
3313 }
3314 
3315 static void update_iteration_initial_status(MigrationState *s)
3316 {
3317     /*
3318      * Update these three fields at the same time to avoid mismatch info lead
3319      * wrong speed calculation.
3320      */
3321     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3322     s->iteration_initial_bytes = migration_transferred_bytes();
3323     s->iteration_initial_pages = ram_get_total_transferred_pages();
3324 }
3325 
3326 static void migration_update_counters(MigrationState *s,
3327                                       int64_t current_time)
3328 {
3329     uint64_t transferred, transferred_pages, time_spent;
3330     uint64_t current_bytes; /* bytes transferred since the beginning */
3331     uint64_t switchover_bw;
3332     /* Expected bandwidth when switching over to destination QEMU */
3333     double expected_bw_per_ms;
3334     double bandwidth;
3335 
3336     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3337         return;
3338     }
3339 
3340     switchover_bw = migrate_avail_switchover_bandwidth();
3341     current_bytes = migration_transferred_bytes();
3342     transferred = current_bytes - s->iteration_initial_bytes;
3343     time_spent = current_time - s->iteration_start_time;
3344     bandwidth = (double)transferred / time_spent;
3345 
3346     if (switchover_bw) {
3347         /*
3348          * If the user specified a switchover bandwidth, let's trust the
3349          * user so that can be more accurate than what we estimated.
3350          */
3351         expected_bw_per_ms = switchover_bw / 1000;
3352     } else {
3353         /* If the user doesn't specify bandwidth, we use the estimated */
3354         expected_bw_per_ms = bandwidth;
3355     }
3356 
3357     s->threshold_size = expected_bw_per_ms * migrate_downtime_limit();
3358 
3359     s->mbps = (((double) transferred * 8.0) /
3360                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3361 
3362     transferred_pages = ram_get_total_transferred_pages() -
3363                             s->iteration_initial_pages;
3364     s->pages_per_second = (double) transferred_pages /
3365                              (((double) time_spent / 1000.0));
3366 
3367     /*
3368      * if we haven't sent anything, we don't want to
3369      * recalculate. 10000 is a small enough number for our purposes
3370      */
3371     if (stat64_get(&mig_stats.dirty_pages_rate) &&
3372         transferred > 10000) {
3373         s->expected_downtime =
3374             stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms;
3375     }
3376 
3377     migration_rate_reset();
3378 
3379     update_iteration_initial_status(s);
3380 
3381     trace_migrate_transferred(transferred, time_spent,
3382                               /* Both in unit bytes/ms */
3383                               bandwidth, switchover_bw / 1000,
3384                               s->threshold_size);
3385 }
3386 
3387 static bool migration_can_switchover(MigrationState *s)
3388 {
3389     if (!migrate_switchover_ack()) {
3390         return true;
3391     }
3392 
3393     /* No reason to wait for switchover ACK if VM is stopped */
3394     if (!runstate_is_running()) {
3395         return true;
3396     }
3397 
3398     return s->switchover_acked;
3399 }
3400 
3401 /* Migration thread iteration status */
3402 typedef enum {
3403     MIG_ITERATE_RESUME,         /* Resume current iteration */
3404     MIG_ITERATE_SKIP,           /* Skip current iteration */
3405     MIG_ITERATE_BREAK,          /* Break the loop */
3406 } MigIterateState;
3407 
3408 /*
3409  * Return true if continue to the next iteration directly, false
3410  * otherwise.
3411  */
3412 static MigIterateState migration_iteration_run(MigrationState *s)
3413 {
3414     uint64_t must_precopy, can_postcopy, pending_size;
3415     Error *local_err = NULL;
3416     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3417     bool can_switchover = migration_can_switchover(s);
3418 
3419     qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
3420     pending_size = must_precopy + can_postcopy;
3421     trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
3422 
3423     if (pending_size < s->threshold_size) {
3424         qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
3425         pending_size = must_precopy + can_postcopy;
3426         trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
3427     }
3428 
3429     if ((!pending_size || pending_size < s->threshold_size) && can_switchover) {
3430         trace_migration_thread_low_pending(pending_size);
3431         migration_completion(s);
3432         return MIG_ITERATE_BREAK;
3433     }
3434 
3435     /* Still a significant amount to transfer */
3436     if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover &&
3437         qatomic_read(&s->start_postcopy)) {
3438         if (postcopy_start(s, &local_err)) {
3439             migrate_set_error(s, local_err);
3440             error_report_err(local_err);
3441         }
3442         return MIG_ITERATE_SKIP;
3443     }
3444 
3445     /* Just another iteration step */
3446     qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3447     return MIG_ITERATE_RESUME;
3448 }
3449 
3450 static void migration_iteration_finish(MigrationState *s)
3451 {
3452     bql_lock();
3453 
3454     /*
3455      * If we enabled cpu throttling for auto-converge, turn it off.
3456      * Stopping CPU throttle should be serialized by BQL to avoid
3457      * racing for the throttle_dirty_sync_timer.
3458      */
3459     if (migrate_auto_converge()) {
3460         cpu_throttle_stop();
3461     }
3462 
3463     switch (s->state) {
3464     case MIGRATION_STATUS_COMPLETED:
3465         runstate_set(RUN_STATE_POSTMIGRATE);
3466         break;
3467     case MIGRATION_STATUS_COLO:
3468         assert(migrate_colo());
3469         migrate_start_colo_process(s);
3470         s->vm_old_state = RUN_STATE_RUNNING;
3471         /* Fallthrough */
3472     case MIGRATION_STATUS_FAILED:
3473     case MIGRATION_STATUS_CANCELLED:
3474     case MIGRATION_STATUS_CANCELLING:
3475         /*
3476          * Re-activate the block drives if they're inactivated.  Note, COLO
3477          * shouldn't use block_active at all, so it should be no-op there.
3478          */
3479         migration_block_activate(NULL);
3480         if (runstate_is_live(s->vm_old_state)) {
3481             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3482                 vm_start();
3483             }
3484         } else {
3485             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3486                 runstate_set(s->vm_old_state);
3487             }
3488         }
3489         break;
3490 
3491     default:
3492         /* Should not reach here, but if so, forgive the VM. */
3493         error_report("%s: Unknown ending state %d", __func__, s->state);
3494         break;
3495     }
3496 
3497     migration_bh_schedule(migration_cleanup_bh, s);
3498     bql_unlock();
3499 }
3500 
3501 static void bg_migration_iteration_finish(MigrationState *s)
3502 {
3503     /*
3504      * Stop tracking RAM writes - un-protect memory, un-register UFFD
3505      * memory ranges, flush kernel wait queues and wake up threads
3506      * waiting for write fault to be resolved.
3507      */
3508     ram_write_tracking_stop();
3509 
3510     bql_lock();
3511     switch (s->state) {
3512     case MIGRATION_STATUS_COMPLETED:
3513     case MIGRATION_STATUS_ACTIVE:
3514     case MIGRATION_STATUS_FAILED:
3515     case MIGRATION_STATUS_CANCELLED:
3516     case MIGRATION_STATUS_CANCELLING:
3517         break;
3518 
3519     default:
3520         /* Should not reach here, but if so, forgive the VM. */
3521         error_report("%s: Unknown ending state %d", __func__, s->state);
3522         break;
3523     }
3524 
3525     migration_bh_schedule(migration_cleanup_bh, s);
3526     bql_unlock();
3527 }
3528 
3529 /*
3530  * Return true if continue to the next iteration directly, false
3531  * otherwise.
3532  */
3533 static MigIterateState bg_migration_iteration_run(MigrationState *s)
3534 {
3535     int res;
3536 
3537     res = qemu_savevm_state_iterate(s->to_dst_file, false);
3538     if (res > 0) {
3539         bg_migration_completion(s);
3540         return MIG_ITERATE_BREAK;
3541     }
3542 
3543     return MIG_ITERATE_RESUME;
3544 }
3545 
3546 void migration_make_urgent_request(void)
3547 {
3548     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3549 }
3550 
3551 void migration_consume_urgent_request(void)
3552 {
3553     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3554 }
3555 
3556 /* Returns true if the rate limiting was broken by an urgent request */
3557 bool migration_rate_limit(void)
3558 {
3559     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3560     MigrationState *s = migrate_get_current();
3561 
3562     bool urgent = false;
3563     migration_update_counters(s, now);
3564     if (migration_rate_exceeded(s->to_dst_file)) {
3565 
3566         if (qemu_file_get_error(s->to_dst_file)) {
3567             return false;
3568         }
3569         /*
3570          * Wait for a delay to do rate limiting OR
3571          * something urgent to post the semaphore.
3572          */
3573         int ms = s->iteration_start_time + BUFFER_DELAY - now;
3574         trace_migration_rate_limit_pre(ms);
3575         if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3576             /*
3577              * We were woken by one or more urgent things but
3578              * the timedwait will have consumed one of them.
3579              * The service routine for the urgent wake will dec
3580              * the semaphore itself for each item it consumes,
3581              * so add this one we just eat back.
3582              */
3583             qemu_sem_post(&s->rate_limit_sem);
3584             urgent = true;
3585         }
3586         trace_migration_rate_limit_post(urgent);
3587     }
3588     return urgent;
3589 }
3590 
3591 /*
3592  * if failover devices are present, wait they are completely
3593  * unplugged
3594  */
3595 
3596 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
3597                                     int new_state)
3598 {
3599     if (qemu_savevm_state_guest_unplug_pending()) {
3600         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
3601 
3602         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
3603                qemu_savevm_state_guest_unplug_pending()) {
3604             qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3605         }
3606         if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
3607             int timeout = 120; /* 30 seconds */
3608             /*
3609              * migration has been canceled
3610              * but as we have started an unplug we must wait the end
3611              * to be able to plug back the card
3612              */
3613             while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
3614                 qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3615             }
3616             if (qemu_savevm_state_guest_unplug_pending() &&
3617                 !qtest_enabled()) {
3618                 warn_report("migration: partially unplugged device on "
3619                             "failure");
3620             }
3621         }
3622 
3623         migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
3624     } else {
3625         migrate_set_state(&s->state, old_state, new_state);
3626     }
3627 }
3628 
3629 /*
3630  * Master migration thread on the source VM.
3631  * It drives the migration and pumps the data down the outgoing channel.
3632  */
3633 static void *migration_thread(void *opaque)
3634 {
3635     MigrationState *s = opaque;
3636     MigrationThread *thread = NULL;
3637     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3638     MigThrError thr_error;
3639     bool urgent = false;
3640     Error *local_err = NULL;
3641     int ret;
3642 
3643     thread = migration_threads_add(MIGRATION_THREAD_SRC_MAIN,
3644                                    qemu_get_thread_id());
3645 
3646     rcu_register_thread();
3647 
3648     update_iteration_initial_status(s);
3649 
3650     if (!multifd_send_setup()) {
3651         goto out;
3652     }
3653 
3654     bql_lock();
3655     qemu_savevm_state_header(s->to_dst_file);
3656     bql_unlock();
3657 
3658     /*
3659      * If we opened the return path, we need to make sure dst has it
3660      * opened as well.
3661      */
3662     if (s->rp_state.rp_thread_created) {
3663         /* Now tell the dest that it should open its end so it can reply */
3664         qemu_savevm_send_open_return_path(s->to_dst_file);
3665 
3666         /* And do a ping that will make stuff easier to debug */
3667         qemu_savevm_send_ping(s->to_dst_file, 1);
3668     }
3669 
3670     if (migrate_postcopy()) {
3671         /*
3672          * Tell the destination that we *might* want to do postcopy later;
3673          * if the other end can't do postcopy it should fail now, nice and
3674          * early.
3675          */
3676         qemu_savevm_send_postcopy_advise(s->to_dst_file);
3677     }
3678 
3679     if (migrate_colo()) {
3680         /* Notify migration destination that we enable COLO */
3681         qemu_savevm_send_colo_enable(s->to_dst_file);
3682     }
3683 
3684     if (migrate_auto_converge()) {
3685         /* Start RAMBlock dirty bitmap sync timer */
3686         cpu_throttle_dirty_sync_timer(true);
3687     }
3688 
3689     bql_lock();
3690     ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
3691     bql_unlock();
3692 
3693     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3694                                MIGRATION_STATUS_ACTIVE);
3695 
3696     /*
3697      * Handle SETUP failures after waiting for virtio-net-failover
3698      * devices to unplug. This to preserve migration state transitions.
3699      */
3700     if (ret) {
3701         migrate_set_error(s, local_err);
3702         error_free(local_err);
3703         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3704                           MIGRATION_STATUS_FAILED);
3705         goto out;
3706     }
3707 
3708     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3709 
3710     trace_migration_thread_setup_complete();
3711 
3712     while (migration_is_active()) {
3713         if (urgent || !migration_rate_exceeded(s->to_dst_file)) {
3714             MigIterateState iter_state = migration_iteration_run(s);
3715             if (iter_state == MIG_ITERATE_SKIP) {
3716                 continue;
3717             } else if (iter_state == MIG_ITERATE_BREAK) {
3718                 break;
3719             }
3720         }
3721 
3722         /*
3723          * Try to detect any kind of failures, and see whether we
3724          * should stop the migration now.
3725          */
3726         thr_error = migration_detect_error(s);
3727         if (thr_error == MIG_THR_ERR_FATAL) {
3728             /* Stop migration */
3729             break;
3730         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3731             /*
3732              * Just recovered from a e.g. network failure, reset all
3733              * the local variables. This is important to avoid
3734              * breaking transferred_bytes and bandwidth calculation
3735              */
3736             update_iteration_initial_status(s);
3737         }
3738 
3739         urgent = migration_rate_limit();
3740     }
3741 
3742 out:
3743     trace_migration_thread_after_loop();
3744     migration_iteration_finish(s);
3745     object_unref(OBJECT(s));
3746     rcu_unregister_thread();
3747     migration_threads_remove(thread);
3748     return NULL;
3749 }
3750 
3751 static void bg_migration_vm_start_bh(void *opaque)
3752 {
3753     MigrationState *s = opaque;
3754 
3755     vm_resume(s->vm_old_state);
3756     migration_downtime_end(s);
3757 }
3758 
3759 /**
3760  * Background snapshot thread, based on live migration code.
3761  * This is an alternative implementation of live migration mechanism
3762  * introduced specifically to support background snapshots.
3763  *
3764  * It takes advantage of userfault_fd write protection mechanism introduced
3765  * in v5.7 kernel. Compared to existing dirty page logging migration much
3766  * lesser stream traffic is produced resulting in smaller snapshot images,
3767  * simply cause of no page duplicates can get into the stream.
3768  *
3769  * Another key point is that generated vmstate stream reflects machine state
3770  * 'frozen' at the beginning of snapshot creation compared to dirty page logging
3771  * mechanism, which effectively results in that saved snapshot is the state of VM
3772  * at the end of the process.
3773  */
3774 static void *bg_migration_thread(void *opaque)
3775 {
3776     MigrationState *s = opaque;
3777     int64_t setup_start;
3778     MigThrError thr_error;
3779     QEMUFile *fb;
3780     bool early_fail = true;
3781     Error *local_err = NULL;
3782     int ret;
3783 
3784     rcu_register_thread();
3785 
3786     migration_rate_set(RATE_LIMIT_DISABLED);
3787 
3788     setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3789     /*
3790      * We want to save vmstate for the moment when migration has been
3791      * initiated but also we want to save RAM content while VM is running.
3792      * The RAM content should appear first in the vmstate. So, we first
3793      * stash the non-RAM part of the vmstate to the temporary buffer,
3794      * then write RAM part of the vmstate to the migration stream
3795      * with vCPUs running and, finally, write stashed non-RAM part of
3796      * the vmstate from the buffer to the migration stream.
3797      */
3798     s->bioc = qio_channel_buffer_new(512 * 1024);
3799     qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
3800     fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
3801     object_unref(OBJECT(s->bioc));
3802 
3803     update_iteration_initial_status(s);
3804 
3805     /*
3806      * Prepare for tracking memory writes with UFFD-WP - populate
3807      * RAM pages before protecting.
3808      */
3809 #ifdef __linux__
3810     ram_write_tracking_prepare();
3811 #endif
3812 
3813     bql_lock();
3814     qemu_savevm_state_header(s->to_dst_file);
3815     ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
3816     bql_unlock();
3817 
3818     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3819                                MIGRATION_STATUS_ACTIVE);
3820 
3821     /*
3822      * Handle SETUP failures after waiting for virtio-net-failover
3823      * devices to unplug. This to preserve migration state transitions.
3824      */
3825     if (ret) {
3826         migrate_set_error(s, local_err);
3827         error_free(local_err);
3828         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3829                           MIGRATION_STATUS_FAILED);
3830         goto fail_setup;
3831     }
3832 
3833     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3834 
3835     trace_migration_thread_setup_complete();
3836 
3837     bql_lock();
3838 
3839     if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
3840         goto fail;
3841     }
3842 
3843     if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) {
3844         goto fail;
3845     }
3846     /*
3847      * Since we are going to get non-iterable state data directly
3848      * from s->bioc->data, explicit flush is needed here.
3849      */
3850     qemu_fflush(fb);
3851 
3852     /* Now initialize UFFD context and start tracking RAM writes */
3853     if (ram_write_tracking_start()) {
3854         goto fail;
3855     }
3856     early_fail = false;
3857 
3858     /*
3859      * Start VM from BH handler to avoid write-fault lock here.
3860      * UFFD-WP protection for the whole RAM is already enabled so
3861      * calling VM state change notifiers from vm_start() would initiate
3862      * writes to virtio VQs memory which is in write-protected region.
3863      */
3864     migration_bh_schedule(bg_migration_vm_start_bh, s);
3865     bql_unlock();
3866 
3867     while (migration_is_active()) {
3868         MigIterateState iter_state = bg_migration_iteration_run(s);
3869         if (iter_state == MIG_ITERATE_SKIP) {
3870             continue;
3871         } else if (iter_state == MIG_ITERATE_BREAK) {
3872             break;
3873         }
3874 
3875         /*
3876          * Try to detect any kind of failures, and see whether we
3877          * should stop the migration now.
3878          */
3879         thr_error = migration_detect_error(s);
3880         if (thr_error == MIG_THR_ERR_FATAL) {
3881             /* Stop migration */
3882             break;
3883         }
3884 
3885         migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
3886     }
3887 
3888     trace_migration_thread_after_loop();
3889 
3890 fail:
3891     if (early_fail) {
3892         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3893                 MIGRATION_STATUS_FAILED);
3894         bql_unlock();
3895     }
3896 
3897 fail_setup:
3898     bg_migration_iteration_finish(s);
3899 
3900     qemu_fclose(fb);
3901     object_unref(OBJECT(s));
3902     rcu_unregister_thread();
3903 
3904     return NULL;
3905 }
3906 
3907 void migration_connect(MigrationState *s, Error *error_in)
3908 {
3909     Error *local_err = NULL;
3910     uint64_t rate_limit;
3911     bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
3912     int ret;
3913 
3914     /*
3915      * If there's a previous error, free it and prepare for another one.
3916      * Meanwhile if migration completes successfully, there won't have an error
3917      * dumped when calling migration_cleanup().
3918      */
3919     migrate_error_free(s);
3920 
3921     s->expected_downtime = migrate_downtime_limit();
3922     if (error_in) {
3923         migration_connect_set_error(s, error_in);
3924         if (resume) {
3925             /*
3926              * Don't do cleanup for resume if channel is invalid, but only dump
3927              * the error.  We wait for another channel connect from the user.
3928              * The error_report still gives HMP user a hint on what failed.
3929              * It's normally done in migration_cleanup(), but call it here
3930              * explicitly.
3931              */
3932             error_report_err(error_copy(s->error));
3933         } else {
3934             migration_cleanup(s);
3935         }
3936         return;
3937     }
3938 
3939     if (resume) {
3940         /* This is a resumed migration */
3941         rate_limit = migrate_max_postcopy_bandwidth();
3942     } else {
3943         /* This is a fresh new migration */
3944         rate_limit = migrate_max_bandwidth();
3945 
3946         /* Notify before starting migration thread */
3947         if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, &local_err)) {
3948             goto fail;
3949         }
3950     }
3951 
3952     migration_rate_set(rate_limit);
3953     qemu_file_set_blocking(s->to_dst_file, true);
3954 
3955     /*
3956      * Open the return path. For postcopy, it is used exclusively. For
3957      * precopy, only if user specified "return-path" capability would
3958      * QEMU uses the return path.
3959      */
3960     if (migrate_postcopy_ram() || migrate_return_path()) {
3961         if (open_return_path_on_source(s)) {
3962             error_setg(&local_err, "Unable to open return-path for postcopy");
3963             goto fail;
3964         }
3965     }
3966 
3967     /*
3968      * This needs to be done before resuming a postcopy.  Note: for newer
3969      * QEMUs we will delay the channel creation until postcopy_start(), to
3970      * avoid disorder of channel creations.
3971      */
3972     if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
3973         postcopy_preempt_setup(s);
3974     }
3975 
3976     if (resume) {
3977         /* Wakeup the main migration thread to do the recovery */
3978         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
3979                           MIGRATION_STATUS_POSTCOPY_RECOVER);
3980         qemu_sem_post(&s->postcopy_pause_sem);
3981         return;
3982     }
3983 
3984     if (migrate_mode_is_cpr(s)) {
3985         ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
3986         if (ret < 0) {
3987             error_setg(&local_err, "migration_stop_vm failed, error %d", -ret);
3988             goto fail;
3989         }
3990     }
3991 
3992     /*
3993      * Take a refcount to make sure the migration object won't get freed by
3994      * the main thread already in migration_shutdown().
3995      *
3996      * The refcount will be released at the end of the thread function.
3997      */
3998     object_ref(OBJECT(s));
3999 
4000     if (migrate_background_snapshot()) {
4001         qemu_thread_create(&s->thread, MIGRATION_THREAD_SNAPSHOT,
4002                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
4003     } else {
4004         qemu_thread_create(&s->thread, MIGRATION_THREAD_SRC_MAIN,
4005                 migration_thread, s, QEMU_THREAD_JOINABLE);
4006     }
4007     s->migration_thread_running = true;
4008     return;
4009 
4010 fail:
4011     migrate_set_error(s, local_err);
4012     if (s->state != MIGRATION_STATUS_CANCELLING) {
4013         migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
4014     }
4015     error_report_err(local_err);
4016     migration_cleanup(s);
4017 }
4018 
4019 static void migration_class_init(ObjectClass *klass, void *data)
4020 {
4021     DeviceClass *dc = DEVICE_CLASS(klass);
4022 
4023     dc->user_creatable = false;
4024     device_class_set_props_n(dc, migration_properties,
4025                              migration_properties_count);
4026 }
4027 
4028 static void migration_instance_finalize(Object *obj)
4029 {
4030     MigrationState *ms = MIGRATION_OBJ(obj);
4031 
4032     qemu_mutex_destroy(&ms->error_mutex);
4033     qemu_mutex_destroy(&ms->qemu_file_lock);
4034     qemu_sem_destroy(&ms->wait_unplug_sem);
4035     qemu_sem_destroy(&ms->rate_limit_sem);
4036     qemu_sem_destroy(&ms->pause_sem);
4037     qemu_sem_destroy(&ms->postcopy_pause_sem);
4038     qemu_sem_destroy(&ms->rp_state.rp_sem);
4039     qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
4040     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
4041     error_free(ms->error);
4042 }
4043 
4044 static void migration_instance_init(Object *obj)
4045 {
4046     MigrationState *ms = MIGRATION_OBJ(obj);
4047 
4048     ms->state = MIGRATION_STATUS_NONE;
4049     ms->mbps = -1;
4050     ms->pages_per_second = -1;
4051     qemu_sem_init(&ms->pause_sem, 0);
4052     qemu_mutex_init(&ms->error_mutex);
4053 
4054     migrate_params_init(&ms->parameters);
4055 
4056     qemu_sem_init(&ms->postcopy_pause_sem, 0);
4057     qemu_sem_init(&ms->rp_state.rp_sem, 0);
4058     qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
4059     qemu_sem_init(&ms->rate_limit_sem, 0);
4060     qemu_sem_init(&ms->wait_unplug_sem, 0);
4061     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
4062     qemu_mutex_init(&ms->qemu_file_lock);
4063 }
4064 
4065 /*
4066  * Return true if check pass, false otherwise. Error will be put
4067  * inside errp if provided.
4068  */
4069 static bool migration_object_check(MigrationState *ms, Error **errp)
4070 {
4071     /* Assuming all off */
4072     bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 };
4073 
4074     if (!migrate_params_check(&ms->parameters, errp)) {
4075         return false;
4076     }
4077 
4078     return migrate_caps_check(old_caps, ms->capabilities, errp);
4079 }
4080 
4081 static const TypeInfo migration_type = {
4082     .name = TYPE_MIGRATION,
4083     /*
4084      * NOTE: TYPE_MIGRATION is not really a device, as the object is
4085      * not created using qdev_new(), it is not attached to the qdev
4086      * device tree, and it is never realized.
4087      *
4088      * TODO: Make this TYPE_OBJECT once QOM provides something like
4089      * TYPE_DEVICE's "-global" properties.
4090      */
4091     .parent = TYPE_DEVICE,
4092     .class_init = migration_class_init,
4093     .class_size = sizeof(MigrationClass),
4094     .instance_size = sizeof(MigrationState),
4095     .instance_init = migration_instance_init,
4096     .instance_finalize = migration_instance_finalize,
4097 };
4098 
4099 static void register_migration_types(void)
4100 {
4101     type_register_static(&migration_type);
4102 }
4103 
4104 type_init(register_migration_types);
4105