xref: /cloud-hypervisor/vmm/src/lib.rs (revision 50bac1694f5bb305f00f02dc257896536eac5e0f)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use std::collections::HashMap;
12 use std::fs::File;
13 use std::io::{stdout, Read, Write};
14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
15 use std::os::unix::net::{UnixListener, UnixStream};
16 use std::panic::AssertUnwindSafe;
17 use std::path::PathBuf;
18 use std::rc::Rc;
19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
20 use std::sync::{Arc, Mutex};
21 use std::time::Instant;
22 use std::{io, result, thread};
23 
24 use anyhow::anyhow;
25 #[cfg(feature = "dbus_api")]
26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
27 use api::http::HttpApiHandle;
28 use console_devices::{pre_create_console_devices, ConsoleInfo};
29 use landlock::LandlockError;
30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
31 use memory_manager::MemoryManagerSnapshotData;
32 use pci::PciBdf;
33 use seccompiler::{apply_filter, SeccompAction};
34 use serde::ser::{SerializeStruct, Serializer};
35 use serde::{Deserialize, Serialize};
36 use signal_hook::iterator::{Handle, Signals};
37 use thiserror::Error;
38 use tracer::trace_scoped;
39 use vm_memory::bitmap::AtomicBitmap;
40 use vm_memory::{ReadVolatile, WriteVolatile};
41 use vm_migration::protocol::*;
42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
43 use vmm_sys_util::eventfd::EventFd;
44 use vmm_sys_util::signal::unblock_signal;
45 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
46 
47 use crate::api::{
48     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
49     VmSendMigrationData, VmmPingResponse,
50 };
51 use crate::config::{add_to_config, RestoreConfig};
52 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
53 use crate::coredump::GuestDebuggable;
54 use crate::landlock::Landlock;
55 use crate::memory_manager::MemoryManager;
56 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
57 use crate::migration::get_vm_snapshot;
58 use crate::migration::{recv_vm_config, recv_vm_state};
59 use crate::seccomp_filters::{get_seccomp_filter, Thread};
60 use crate::vm::{Error as VmError, Vm, VmState};
61 use crate::vm_config::{
62     DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
63     VmConfig, VsockConfig,
64 };
65 
66 mod acpi;
67 pub mod api;
68 mod clone3;
69 pub mod config;
70 pub mod console_devices;
71 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
72 mod coredump;
73 pub mod cpu;
74 pub mod device_manager;
75 pub mod device_tree;
76 #[cfg(feature = "guest_debug")]
77 mod gdb;
78 #[cfg(feature = "igvm")]
79 mod igvm;
80 pub mod interrupt;
81 pub mod landlock;
82 pub mod memory_manager;
83 pub mod migration;
84 mod pci_segment;
85 pub mod seccomp_filters;
86 mod serial_manager;
87 mod sigwinch_listener;
88 pub mod vm;
89 pub mod vm_config;
90 
91 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
92 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
93 
94 /// Errors associated with VMM management
95 #[derive(Debug, Error)]
96 pub enum Error {
97     /// API request receive error
98     #[error("Error receiving API request: {0}")]
99     ApiRequestRecv(#[source] RecvError),
100 
101     /// API response send error
102     #[error("Error sending API request: {0}")]
103     ApiResponseSend(#[source] SendError<ApiResponse>),
104 
105     /// Cannot bind to the UNIX domain socket path
106     #[error("Error binding to UNIX domain socket: {0}")]
107     Bind(#[source] io::Error),
108 
109     /// Cannot clone EventFd.
110     #[error("Error cloning EventFd: {0}")]
111     EventFdClone(#[source] io::Error),
112 
113     /// Cannot create EventFd.
114     #[error("Error creating EventFd: {0}")]
115     EventFdCreate(#[source] io::Error),
116 
117     /// Cannot read from EventFd.
118     #[error("Error reading from EventFd: {0}")]
119     EventFdRead(#[source] io::Error),
120 
121     /// Cannot create epoll context.
122     #[error("Error creating epoll context: {0}")]
123     Epoll(#[source] io::Error),
124 
125     /// Cannot create HTTP thread
126     #[error("Error spawning HTTP thread: {0}")]
127     HttpThreadSpawn(#[source] io::Error),
128 
129     /// Cannot create D-Bus thread
130     #[cfg(feature = "dbus_api")]
131     #[error("Error spawning D-Bus thread: {0}")]
132     DBusThreadSpawn(#[source] io::Error),
133 
134     /// Cannot start D-Bus session
135     #[cfg(feature = "dbus_api")]
136     #[error("Error starting D-Bus session: {0}")]
137     CreateDBusSession(#[source] zbus::Error),
138 
139     /// Cannot create `event-monitor` thread
140     #[error("Error spawning `event-monitor` thread: {0}")]
141     EventMonitorThreadSpawn(#[source] io::Error),
142 
143     /// Cannot handle the VM STDIN stream
144     #[error("Error handling VM stdin: {0:?}")]
145     Stdin(VmError),
146 
147     /// Cannot handle the VM pty stream
148     #[error("Error handling VM pty: {0:?}")]
149     Pty(VmError),
150 
151     /// Cannot reboot the VM
152     #[error("Error rebooting VM: {0:?}")]
153     VmReboot(VmError),
154 
155     /// Cannot create VMM thread
156     #[error("Error spawning VMM thread {0:?}")]
157     VmmThreadSpawn(#[source] io::Error),
158 
159     /// Cannot shut the VMM down
160     #[error("Error shutting down VMM: {0:?}")]
161     VmmShutdown(VmError),
162 
163     /// Cannot create seccomp filter
164     #[error("Error creating seccomp filter: {0}")]
165     CreateSeccompFilter(seccompiler::Error),
166 
167     /// Cannot apply seccomp filter
168     #[error("Error applying seccomp filter: {0}")]
169     ApplySeccompFilter(seccompiler::Error),
170 
171     /// Error activating virtio devices
172     #[error("Error activating virtio devices: {0:?}")]
173     ActivateVirtioDevices(VmError),
174 
175     /// Error creating API server
176     #[error("Error creating API server {0:?}")]
177     CreateApiServer(micro_http::ServerError),
178 
179     /// Error binding API server socket
180     #[error("Error creation API server's socket {0:?}")]
181     CreateApiServerSocket(#[source] io::Error),
182 
183     #[cfg(feature = "guest_debug")]
184     #[error("Failed to start the GDB thread: {0}")]
185     GdbThreadSpawn(io::Error),
186 
187     /// GDB request receive error
188     #[cfg(feature = "guest_debug")]
189     #[error("Error receiving GDB request: {0}")]
190     GdbRequestRecv(#[source] RecvError),
191 
192     /// GDB response send error
193     #[cfg(feature = "guest_debug")]
194     #[error("Error sending GDB request: {0}")]
195     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
196 
197     #[error("Cannot spawn a signal handler thread: {0}")]
198     SignalHandlerSpawn(#[source] io::Error),
199 
200     #[error("Failed to join on threads: {0:?}")]
201     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
202 
203     /// Cannot create Landlock object
204     #[error("Error creating landlock object: {0}")]
205     CreateLandlock(LandlockError),
206 
207     /// Cannot apply landlock based sandboxing
208     #[error("Error applying landlock: {0}")]
209     ApplyLandlock(LandlockError),
210 }
211 pub type Result<T> = result::Result<T, Error>;
212 
213 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
214 #[repr(u64)]
215 pub enum EpollDispatch {
216     Exit = 0,
217     Reset = 1,
218     Api = 2,
219     ActivateVirtioDevices = 3,
220     Debug = 4,
221     Unknown,
222 }
223 
224 impl From<u64> for EpollDispatch {
225     fn from(v: u64) -> Self {
226         use EpollDispatch::*;
227         match v {
228             0 => Exit,
229             1 => Reset,
230             2 => Api,
231             3 => ActivateVirtioDevices,
232             4 => Debug,
233             _ => Unknown,
234         }
235     }
236 }
237 
238 pub struct EpollContext {
239     epoll_file: File,
240 }
241 
242 impl EpollContext {
243     pub fn new() -> result::Result<EpollContext, io::Error> {
244         let epoll_fd = epoll::create(true)?;
245         // Use 'File' to enforce closing on 'epoll_fd'
246         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
247         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
248 
249         Ok(EpollContext { epoll_file })
250     }
251 
252     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
253     where
254         T: AsRawFd,
255     {
256         let dispatch_index = token as u64;
257         epoll::ctl(
258             self.epoll_file.as_raw_fd(),
259             epoll::ControlOptions::EPOLL_CTL_ADD,
260             fd.as_raw_fd(),
261             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
262         )?;
263 
264         Ok(())
265     }
266 
267     #[cfg(fuzzing)]
268     pub fn add_event_custom<T>(
269         &mut self,
270         fd: &T,
271         id: u64,
272         evts: epoll::Events,
273     ) -> result::Result<(), io::Error>
274     where
275         T: AsRawFd,
276     {
277         epoll::ctl(
278             self.epoll_file.as_raw_fd(),
279             epoll::ControlOptions::EPOLL_CTL_ADD,
280             fd.as_raw_fd(),
281             epoll::Event::new(evts, id),
282         )?;
283 
284         Ok(())
285     }
286 }
287 
288 impl AsRawFd for EpollContext {
289     fn as_raw_fd(&self) -> RawFd {
290         self.epoll_file.as_raw_fd()
291     }
292 }
293 
294 pub struct PciDeviceInfo {
295     pub id: String,
296     pub bdf: PciBdf,
297 }
298 
299 impl Serialize for PciDeviceInfo {
300     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
301     where
302         S: Serializer,
303     {
304         let bdf_str = self.bdf.to_string();
305 
306         // Serialize the structure.
307         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
308         state.serialize_field("id", &self.id)?;
309         state.serialize_field("bdf", &bdf_str)?;
310         state.end()
311     }
312 }
313 
314 pub fn feature_list() -> Vec<String> {
315     vec![
316         #[cfg(feature = "dbus_api")]
317         "dbus_api".to_string(),
318         #[cfg(feature = "dhat-heap")]
319         "dhat-heap".to_string(),
320         #[cfg(feature = "guest_debug")]
321         "guest_debug".to_string(),
322         #[cfg(feature = "igvm")]
323         "igvm".to_string(),
324         #[cfg(feature = "io_uring")]
325         "io_uring".to_string(),
326         #[cfg(feature = "kvm")]
327         "kvm".to_string(),
328         #[cfg(feature = "mshv")]
329         "mshv".to_string(),
330         #[cfg(feature = "sev_snp")]
331         "sev_snp".to_string(),
332         #[cfg(feature = "tdx")]
333         "tdx".to_string(),
334         #[cfg(feature = "tracing")]
335         "tracing".to_string(),
336     ]
337 }
338 
339 pub fn start_event_monitor_thread(
340     mut monitor: event_monitor::Monitor,
341     seccomp_action: &SeccompAction,
342     landlock_enable: bool,
343     hypervisor_type: hypervisor::HypervisorType,
344     exit_event: EventFd,
345 ) -> Result<thread::JoinHandle<Result<()>>> {
346     // Retrieve seccomp filter
347     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
348         .map_err(Error::CreateSeccompFilter)?;
349 
350     thread::Builder::new()
351         .name("event-monitor".to_owned())
352         .spawn(move || {
353             // Apply seccomp filter
354             if !seccomp_filter.is_empty() {
355                 apply_filter(&seccomp_filter)
356                     .map_err(Error::ApplySeccompFilter)
357                     .map_err(|e| {
358                         error!("Error applying seccomp filter: {:?}", e);
359                         exit_event.write(1).ok();
360                         e
361                     })?;
362             }
363             if landlock_enable {
364                 Landlock::new()
365                     .map_err(Error::CreateLandlock)?
366                     .restrict_self()
367                     .map_err(Error::ApplyLandlock)
368                     .map_err(|e| {
369                         error!("Error applying landlock to event monitor thread: {:?}", e);
370                         exit_event.write(1).ok();
371                         e
372                     })?;
373             }
374 
375             std::panic::catch_unwind(AssertUnwindSafe(move || {
376                 while let Ok(event) = monitor.rx.recv() {
377                     let event = Arc::new(event);
378 
379                     if let Some(ref mut file) = monitor.file {
380                         file.write_all(event.as_bytes().as_ref()).ok();
381                         file.write_all(b"\n\n").ok();
382                     }
383 
384                     for tx in monitor.broadcast.iter() {
385                         tx.send(event.clone()).ok();
386                     }
387                 }
388             }))
389             .map_err(|_| {
390                 error!("`event-monitor` thread panicked");
391                 exit_event.write(1).ok();
392             })
393             .ok();
394 
395             Ok(())
396         })
397         .map_err(Error::EventMonitorThreadSpawn)
398 }
399 
400 #[allow(unused_variables)]
401 #[allow(clippy::too_many_arguments)]
402 pub fn start_vmm_thread(
403     vmm_version: VmmVersionInfo,
404     http_path: &Option<String>,
405     http_fd: Option<RawFd>,
406     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
407     api_event: EventFd,
408     api_sender: Sender<ApiRequest>,
409     api_receiver: Receiver<ApiRequest>,
410     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
411     #[cfg(feature = "guest_debug")] debug_event: EventFd,
412     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
413     exit_event: EventFd,
414     seccomp_action: &SeccompAction,
415     hypervisor: Arc<dyn hypervisor::Hypervisor>,
416     landlock_enable: bool,
417 ) -> Result<VmmThreadHandle> {
418     #[cfg(feature = "guest_debug")]
419     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
420     #[cfg(feature = "guest_debug")]
421     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
422     #[cfg(feature = "guest_debug")]
423     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
424     #[cfg(feature = "guest_debug")]
425     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
426 
427     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
428     let hypervisor_type = hypervisor.hypervisor_type();
429 
430     // Retrieve seccomp filter
431     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
432         .map_err(Error::CreateSeccompFilter)?;
433 
434     let vmm_seccomp_action = seccomp_action.clone();
435     let thread = {
436         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
437         thread::Builder::new()
438             .name("vmm".to_string())
439             .spawn(move || {
440                 // Apply seccomp filter for VMM thread.
441                 if !vmm_seccomp_filter.is_empty() {
442                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
443                 }
444 
445                 let mut vmm = Vmm::new(
446                     vmm_version,
447                     api_event,
448                     #[cfg(feature = "guest_debug")]
449                     debug_event,
450                     #[cfg(feature = "guest_debug")]
451                     vm_debug_event,
452                     vmm_seccomp_action,
453                     hypervisor,
454                     exit_event,
455                 )?;
456 
457                 vmm.setup_signal_handler(landlock_enable)?;
458 
459                 vmm.control_loop(
460                     Rc::new(api_receiver),
461                     #[cfg(feature = "guest_debug")]
462                     Rc::new(gdb_receiver),
463                 )
464             })
465             .map_err(Error::VmmThreadSpawn)?
466     };
467 
468     // The VMM thread is started, we can start the dbus thread
469     // and start serving HTTP requests
470     #[cfg(feature = "dbus_api")]
471     let dbus_shutdown_chs = match dbus_options {
472         Some(opts) => {
473             let (_, chs) = api::start_dbus_thread(
474                 opts,
475                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
476                 api_sender.clone(),
477                 seccomp_action,
478                 exit_event.try_clone().map_err(Error::EventFdClone)?,
479                 hypervisor_type,
480             )?;
481             Some(chs)
482         }
483         None => None,
484     };
485 
486     let http_api_handle = if let Some(http_path) = http_path {
487         Some(api::start_http_path_thread(
488             http_path,
489             api_event_clone,
490             api_sender,
491             seccomp_action,
492             exit_event,
493             hypervisor_type,
494             landlock_enable,
495         )?)
496     } else if let Some(http_fd) = http_fd {
497         Some(api::start_http_fd_thread(
498             http_fd,
499             api_event_clone,
500             api_sender,
501             seccomp_action,
502             exit_event,
503             hypervisor_type,
504             landlock_enable,
505         )?)
506     } else {
507         None
508     };
509 
510     #[cfg(feature = "guest_debug")]
511     if let Some(debug_path) = debug_path {
512         let target = gdb::GdbStub::new(
513             gdb_sender,
514             gdb_debug_event,
515             gdb_vm_debug_event,
516             gdb_hw_breakpoints,
517         );
518         thread::Builder::new()
519             .name("gdb".to_owned())
520             .spawn(move || gdb::gdb_thread(target, &debug_path))
521             .map_err(Error::GdbThreadSpawn)?;
522     }
523 
524     Ok(VmmThreadHandle {
525         thread_handle: thread,
526         #[cfg(feature = "dbus_api")]
527         dbus_shutdown_chs,
528         http_api_handle,
529     })
530 }
531 
532 #[derive(Clone, Deserialize, Serialize)]
533 struct VmMigrationConfig {
534     vm_config: Arc<Mutex<VmConfig>>,
535     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
536     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
537     memory_manager_data: MemoryManagerSnapshotData,
538 }
539 
540 #[derive(Debug, Clone)]
541 pub struct VmmVersionInfo {
542     pub build_version: String,
543     pub version: String,
544 }
545 
546 impl VmmVersionInfo {
547     pub fn new(build_version: &str, version: &str) -> Self {
548         Self {
549             build_version: build_version.to_owned(),
550             version: version.to_owned(),
551         }
552     }
553 }
554 
555 pub struct VmmThreadHandle {
556     pub thread_handle: thread::JoinHandle<Result<()>>,
557     #[cfg(feature = "dbus_api")]
558     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
559     pub http_api_handle: Option<HttpApiHandle>,
560 }
561 
562 pub struct Vmm {
563     epoll: EpollContext,
564     exit_evt: EventFd,
565     reset_evt: EventFd,
566     api_evt: EventFd,
567     #[cfg(feature = "guest_debug")]
568     debug_evt: EventFd,
569     #[cfg(feature = "guest_debug")]
570     vm_debug_evt: EventFd,
571     version: VmmVersionInfo,
572     vm: Option<Vm>,
573     vm_config: Option<Arc<Mutex<VmConfig>>>,
574     seccomp_action: SeccompAction,
575     hypervisor: Arc<dyn hypervisor::Hypervisor>,
576     activate_evt: EventFd,
577     signals: Option<Handle>,
578     threads: Vec<thread::JoinHandle<()>>,
579     original_termios_opt: Arc<Mutex<Option<termios>>>,
580     console_resize_pipe: Option<Arc<File>>,
581     console_info: Option<ConsoleInfo>,
582 }
583 
584 impl Vmm {
585     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
586 
587     fn signal_handler(
588         mut signals: Signals,
589         original_termios_opt: Arc<Mutex<Option<termios>>>,
590         exit_evt: &EventFd,
591     ) {
592         for sig in &Self::HANDLED_SIGNALS {
593             unblock_signal(*sig).unwrap();
594         }
595 
596         for signal in signals.forever() {
597             match signal {
598                 SIGTERM | SIGINT => {
599                     if exit_evt.write(1).is_err() {
600                         // Resetting the terminal is usually done as the VMM exits
601                         if let Ok(lock) = original_termios_opt.lock() {
602                             if let Some(termios) = *lock {
603                                 // SAFETY: FFI call
604                                 let _ = unsafe {
605                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
606                                 };
607                             }
608                         } else {
609                             warn!("Failed to lock original termios");
610                         }
611 
612                         std::process::exit(1);
613                     }
614                 }
615                 _ => (),
616             }
617         }
618     }
619 
620     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
621         let signals = Signals::new(Self::HANDLED_SIGNALS);
622         match signals {
623             Ok(signals) => {
624                 self.signals = Some(signals.handle());
625                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
626                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
627 
628                 let signal_handler_seccomp_filter = get_seccomp_filter(
629                     &self.seccomp_action,
630                     Thread::SignalHandler,
631                     self.hypervisor.hypervisor_type(),
632                 )
633                 .map_err(Error::CreateSeccompFilter)?;
634                 self.threads.push(
635                     thread::Builder::new()
636                         .name("vmm_signal_handler".to_string())
637                         .spawn(move || {
638                             if !signal_handler_seccomp_filter.is_empty() {
639                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
640                                     .map_err(Error::ApplySeccompFilter)
641                                 {
642                                     error!("Error applying seccomp filter: {:?}", e);
643                                     exit_evt.write(1).ok();
644                                     return;
645                                 }
646                             }
647                             if landlock_enable{
648                                 match Landlock::new() {
649                                     Ok(landlock) => {
650                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
651                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
652                                             exit_evt.write(1).ok();
653                                         });
654                                     }
655                                     Err(e) => {
656                                         error!("Error creating Landlock object: {:?}", e);
657                                         exit_evt.write(1).ok();
658                                     }
659                                 };
660                             }
661 
662                             std::panic::catch_unwind(AssertUnwindSafe(|| {
663                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
664                             }))
665                             .map_err(|_| {
666                                 error!("vmm signal_handler thread panicked");
667                                 exit_evt.write(1).ok()
668                             })
669                             .ok();
670                         })
671                         .map_err(Error::SignalHandlerSpawn)?,
672                 );
673             }
674             Err(e) => error!("Signal not found {}", e),
675         }
676         Ok(())
677     }
678 
679     #[allow(clippy::too_many_arguments)]
680     fn new(
681         vmm_version: VmmVersionInfo,
682         api_evt: EventFd,
683         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
684         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
685         seccomp_action: SeccompAction,
686         hypervisor: Arc<dyn hypervisor::Hypervisor>,
687         exit_evt: EventFd,
688     ) -> Result<Self> {
689         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
690         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
692 
693         epoll
694             .add_event(&exit_evt, EpollDispatch::Exit)
695             .map_err(Error::Epoll)?;
696 
697         epoll
698             .add_event(&reset_evt, EpollDispatch::Reset)
699             .map_err(Error::Epoll)?;
700 
701         epoll
702             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
703             .map_err(Error::Epoll)?;
704 
705         epoll
706             .add_event(&api_evt, EpollDispatch::Api)
707             .map_err(Error::Epoll)?;
708 
709         #[cfg(feature = "guest_debug")]
710         epoll
711             .add_event(&debug_evt, EpollDispatch::Debug)
712             .map_err(Error::Epoll)?;
713 
714         Ok(Vmm {
715             epoll,
716             exit_evt,
717             reset_evt,
718             api_evt,
719             #[cfg(feature = "guest_debug")]
720             debug_evt,
721             #[cfg(feature = "guest_debug")]
722             vm_debug_evt,
723             version: vmm_version,
724             vm: None,
725             vm_config: None,
726             seccomp_action,
727             hypervisor,
728             activate_evt,
729             signals: None,
730             threads: vec![],
731             original_termios_opt: Arc::new(Mutex::new(None)),
732             console_resize_pipe: None,
733             console_info: None,
734         })
735     }
736 
737     fn vm_receive_config<T>(
738         &mut self,
739         req: &Request,
740         socket: &mut T,
741         existing_memory_files: Option<HashMap<u32, File>>,
742     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
743     where
744         T: Read + Write,
745     {
746         // Read in config data along with memory manager data
747         let mut data: Vec<u8> = Vec::new();
748         data.resize_with(req.length() as usize, Default::default);
749         socket
750             .read_exact(&mut data)
751             .map_err(MigratableError::MigrateSocket)?;
752 
753         let vm_migration_config: VmMigrationConfig =
754             serde_json::from_slice(&data).map_err(|e| {
755                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
756             })?;
757 
758         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
759         self.vm_check_cpuid_compatibility(
760             &vm_migration_config.vm_config,
761             &vm_migration_config.common_cpuid,
762         )?;
763 
764         let config = vm_migration_config.vm_config.clone();
765         self.vm_config = Some(vm_migration_config.vm_config);
766         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
767             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
768         })?);
769 
770         if self
771             .vm_config
772             .as_ref()
773             .unwrap()
774             .lock()
775             .unwrap()
776             .landlock_enable
777         {
778             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
779                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
780             })?;
781         }
782 
783         let vm = Vm::create_hypervisor_vm(
784             &self.hypervisor,
785             #[cfg(feature = "tdx")]
786             false,
787             #[cfg(feature = "sev_snp")]
788             false,
789             #[cfg(feature = "sev_snp")]
790             config.lock().unwrap().memory.total_size(),
791         )
792         .map_err(|e| {
793             MigratableError::MigrateReceive(anyhow!(
794                 "Error creating hypervisor VM from snapshot: {:?}",
795                 e
796             ))
797         })?;
798 
799         let phys_bits =
800             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
801 
802         let memory_manager = MemoryManager::new(
803             vm,
804             &config.lock().unwrap().memory.clone(),
805             None,
806             phys_bits,
807             #[cfg(feature = "tdx")]
808             false,
809             Some(&vm_migration_config.memory_manager_data),
810             existing_memory_files,
811             #[cfg(target_arch = "x86_64")]
812             None,
813         )
814         .map_err(|e| {
815             MigratableError::MigrateReceive(anyhow!(
816                 "Error creating MemoryManager from snapshot: {:?}",
817                 e
818             ))
819         })?;
820 
821         Response::ok().write_to(socket)?;
822 
823         Ok(memory_manager)
824     }
825 
826     fn vm_receive_state<T>(
827         &mut self,
828         req: &Request,
829         socket: &mut T,
830         mm: Arc<Mutex<MemoryManager>>,
831     ) -> std::result::Result<(), MigratableError>
832     where
833         T: Read + Write,
834     {
835         // Read in state data
836         let mut data: Vec<u8> = Vec::new();
837         data.resize_with(req.length() as usize, Default::default);
838         socket
839             .read_exact(&mut data)
840             .map_err(MigratableError::MigrateSocket)?;
841         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
842             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
843         })?;
844 
845         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
846             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
847         })?;
848         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
849             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
850         })?;
851         #[cfg(feature = "guest_debug")]
852         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
853             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
854         })?;
855         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
856             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
857         })?;
858 
859         let timestamp = Instant::now();
860         let hypervisor_vm = mm.lock().unwrap().vm.clone();
861         let mut vm = Vm::new_from_memory_manager(
862             self.vm_config.clone().unwrap(),
863             mm,
864             hypervisor_vm,
865             exit_evt,
866             reset_evt,
867             #[cfg(feature = "guest_debug")]
868             debug_evt,
869             &self.seccomp_action,
870             self.hypervisor.clone(),
871             activate_evt,
872             timestamp,
873             self.console_info.clone(),
874             self.console_resize_pipe.clone(),
875             Arc::clone(&self.original_termios_opt),
876             Some(snapshot),
877         )
878         .map_err(|e| {
879             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
880         })?;
881 
882         // Create VM
883         vm.restore().map_err(|e| {
884             Response::error().write_to(socket).ok();
885             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
886         })?;
887         self.vm = Some(vm);
888 
889         Response::ok().write_to(socket)?;
890 
891         Ok(())
892     }
893 
894     fn vm_receive_memory<T>(
895         &mut self,
896         req: &Request,
897         socket: &mut T,
898         memory_manager: &mut MemoryManager,
899     ) -> std::result::Result<(), MigratableError>
900     where
901         T: Read + ReadVolatile + Write,
902     {
903         // Read table
904         let table = MemoryRangeTable::read_from(socket, req.length())?;
905 
906         // And then read the memory itself
907         memory_manager
908             .receive_memory_regions(&table, socket)
909             .inspect_err(|_| {
910                 Response::error().write_to(socket).ok();
911             })?;
912         Response::ok().write_to(socket)?;
913         Ok(())
914     }
915 
916     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
917         url.strip_prefix("unix:")
918             .ok_or_else(|| {
919                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
920             })
921             .map(|s| s.into())
922     }
923 
924     // Returns true if there were dirty pages to send
925     fn vm_maybe_send_dirty_pages<T>(
926         vm: &mut Vm,
927         socket: &mut T,
928     ) -> result::Result<bool, MigratableError>
929     where
930         T: Read + Write + WriteVolatile,
931     {
932         // Send (dirty) memory table
933         let table = vm.dirty_log()?;
934 
935         // But if there are no regions go straight to pause
936         if table.regions().is_empty() {
937             return Ok(false);
938         }
939 
940         Request::memory(table.length()).write_to(socket).unwrap();
941         table.write_to(socket)?;
942         // And then the memory itself
943         vm.send_memory_regions(&table, socket)?;
944         Response::read_from(socket)?.ok_or_abandon(
945             socket,
946             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
947         )?;
948 
949         Ok(true)
950     }
951 
952     fn send_migration(
953         vm: &mut Vm,
954         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
955             dyn hypervisor::Hypervisor,
956         >,
957         send_data_migration: VmSendMigrationData,
958     ) -> result::Result<(), MigratableError> {
959         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
960         let mut socket = UnixStream::connect(path).map_err(|e| {
961             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
962         })?;
963 
964         // Start the migration
965         Request::start().write_to(&mut socket)?;
966         Response::read_from(&mut socket)?.ok_or_abandon(
967             &mut socket,
968             MigratableError::MigrateSend(anyhow!("Error starting migration")),
969         )?;
970 
971         // Send config
972         let vm_config = vm.get_config();
973         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
974         let common_cpuid = {
975             #[cfg(feature = "tdx")]
976             if vm_config.lock().unwrap().is_tdx_enabled() {
977                 return Err(MigratableError::MigrateSend(anyhow!(
978                     "Live Migration is not supported when TDX is enabled"
979                 )));
980             };
981 
982             let amx = vm_config.lock().unwrap().cpus.features.amx;
983             let phys_bits =
984                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
985             arch::generate_common_cpuid(
986                 &hypervisor,
987                 &arch::CpuidConfig {
988                     sgx_epc_sections: None,
989                     phys_bits,
990                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
991                     #[cfg(feature = "tdx")]
992                     tdx: false,
993                     amx,
994                 },
995             )
996             .map_err(|e| {
997                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
998             })?
999         };
1000 
1001         if send_data_migration.local {
1002             vm.send_memory_fds(&mut socket)?;
1003         }
1004 
1005         let vm_migration_config = VmMigrationConfig {
1006             vm_config,
1007             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1008             common_cpuid,
1009             memory_manager_data: vm.memory_manager_data(),
1010         };
1011         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1012         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1013         socket
1014             .write_all(&config_data)
1015             .map_err(MigratableError::MigrateSocket)?;
1016         Response::read_from(&mut socket)?.ok_or_abandon(
1017             &mut socket,
1018             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1019         )?;
1020 
1021         // Let every Migratable object know about the migration being started.
1022         vm.start_migration()?;
1023 
1024         if send_data_migration.local {
1025             // Now pause VM
1026             vm.pause()?;
1027         } else {
1028             // Start logging dirty pages
1029             vm.start_dirty_log()?;
1030 
1031             // Send memory table
1032             let table = vm.memory_range_table()?;
1033             Request::memory(table.length())
1034                 .write_to(&mut socket)
1035                 .unwrap();
1036             table.write_to(&mut socket)?;
1037             // And then the memory itself
1038             vm.send_memory_regions(&table, &mut socket)?;
1039             Response::read_from(&mut socket)?.ok_or_abandon(
1040                 &mut socket,
1041                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1042             )?;
1043 
1044             // Try at most 5 passes of dirty memory sending
1045             const MAX_DIRTY_MIGRATIONS: usize = 5;
1046             for i in 0..MAX_DIRTY_MIGRATIONS {
1047                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1048                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1049                     break;
1050                 }
1051             }
1052 
1053             // Now pause VM
1054             vm.pause()?;
1055 
1056             // Send last batch of dirty pages
1057             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1058 
1059             // Stop logging dirty pages
1060             vm.stop_dirty_log()?;
1061         }
1062         // Capture snapshot and send it
1063         let vm_snapshot = vm.snapshot()?;
1064         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1065         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1066         socket
1067             .write_all(&snapshot_data)
1068             .map_err(MigratableError::MigrateSocket)?;
1069         Response::read_from(&mut socket)?.ok_or_abandon(
1070             &mut socket,
1071             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1072         )?;
1073         // Complete the migration
1074         Request::complete().write_to(&mut socket)?;
1075         Response::read_from(&mut socket)?.ok_or_abandon(
1076             &mut socket,
1077             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1078         )?;
1079 
1080         info!("Migration complete");
1081 
1082         // Let every Migratable object know about the migration being complete
1083         vm.complete_migration()
1084     }
1085 
1086     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1087     fn vm_check_cpuid_compatibility(
1088         &self,
1089         src_vm_config: &Arc<Mutex<VmConfig>>,
1090         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1091     ) -> result::Result<(), MigratableError> {
1092         #[cfg(feature = "tdx")]
1093         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1094             return Err(MigratableError::MigrateReceive(anyhow!(
1095                 "Live Migration is not supported when TDX is enabled"
1096             )));
1097         };
1098 
1099         // We check the `CPUID` compatibility of between the source vm and destination, which is
1100         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1101         let dest_cpuid = &{
1102             let vm_config = &src_vm_config.lock().unwrap();
1103 
1104             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1105             arch::generate_common_cpuid(
1106                 &self.hypervisor.clone(),
1107                 &arch::CpuidConfig {
1108                     sgx_epc_sections: None,
1109                     phys_bits,
1110                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1111                     #[cfg(feature = "tdx")]
1112                     tdx: false,
1113                     amx: vm_config.cpus.features.amx,
1114                 },
1115             )
1116             .map_err(|e| {
1117                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1118             })?
1119         };
1120         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1121             MigratableError::MigrateReceive(anyhow!(
1122                 "Error checking cpu feature compatibility': {:?}",
1123                 e
1124             ))
1125         })
1126     }
1127 
1128     fn control_loop(
1129         &mut self,
1130         api_receiver: Rc<Receiver<ApiRequest>>,
1131         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1132     ) -> Result<()> {
1133         const EPOLL_EVENTS_LEN: usize = 100;
1134 
1135         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1136         let epoll_fd = self.epoll.as_raw_fd();
1137 
1138         'outer: loop {
1139             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1140                 Ok(res) => res,
1141                 Err(e) => {
1142                     if e.kind() == io::ErrorKind::Interrupted {
1143                         // It's well defined from the epoll_wait() syscall
1144                         // documentation that the epoll loop can be interrupted
1145                         // before any of the requested events occurred or the
1146                         // timeout expired. In both those cases, epoll_wait()
1147                         // returns an error of type EINTR, but this should not
1148                         // be considered as a regular error. Instead it is more
1149                         // appropriate to retry, by calling into epoll_wait().
1150                         continue;
1151                     }
1152                     return Err(Error::Epoll(e));
1153                 }
1154             };
1155 
1156             for event in events.iter().take(num_events) {
1157                 let dispatch_event: EpollDispatch = event.data.into();
1158                 match dispatch_event {
1159                     EpollDispatch::Unknown => {
1160                         let event = event.data;
1161                         warn!("Unknown VMM loop event: {}", event);
1162                     }
1163                     EpollDispatch::Exit => {
1164                         info!("VM exit event");
1165                         // Consume the event.
1166                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1167                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1168 
1169                         break 'outer;
1170                     }
1171                     EpollDispatch::Reset => {
1172                         info!("VM reset event");
1173                         // Consume the event.
1174                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1175                         self.vm_reboot().map_err(Error::VmReboot)?;
1176                     }
1177                     EpollDispatch::ActivateVirtioDevices => {
1178                         if let Some(ref vm) = self.vm {
1179                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1180                             info!(
1181                                 "Trying to activate pending virtio devices: count = {}",
1182                                 count
1183                             );
1184                             vm.activate_virtio_devices()
1185                                 .map_err(Error::ActivateVirtioDevices)?;
1186                         }
1187                     }
1188                     EpollDispatch::Api => {
1189                         // Consume the events.
1190                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1191                             // Read from the API receiver channel
1192                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1193 
1194                             if api_request(self)? {
1195                                 break 'outer;
1196                             }
1197                         }
1198                     }
1199                     #[cfg(feature = "guest_debug")]
1200                     EpollDispatch::Debug => {
1201                         // Consume the events.
1202                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1203                             // Read from the API receiver channel
1204                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1205 
1206                             let response = if let Some(ref mut vm) = self.vm {
1207                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1208                             } else {
1209                                 Err(VmError::VmNotRunning)
1210                             }
1211                             .map_err(gdb::Error::Vm);
1212 
1213                             gdb_request
1214                                 .sender
1215                                 .send(response)
1216                                 .map_err(Error::GdbResponseSend)?;
1217                         }
1218                     }
1219                     #[cfg(not(feature = "guest_debug"))]
1220                     EpollDispatch::Debug => {}
1221                 }
1222             }
1223         }
1224 
1225         // Trigger the termination of the signal_handler thread
1226         if let Some(signals) = self.signals.take() {
1227             signals.close();
1228         }
1229 
1230         // Wait for all the threads to finish
1231         for thread in self.threads.drain(..) {
1232             thread.join().map_err(Error::ThreadCleanup)?
1233         }
1234 
1235         Ok(())
1236     }
1237 }
1238 
1239 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1240     vm_config.lock().unwrap().apply_landlock()?;
1241     Ok(())
1242 }
1243 
1244 impl RequestHandler for Vmm {
1245     fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> {
1246         // We only store the passed VM config.
1247         // The VM will be created when being asked to boot it.
1248         if self.vm_config.is_none() {
1249             self.vm_config = Some(Arc::new(Mutex::new(*config)));
1250             self.console_info =
1251                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1252 
1253             if self
1254                 .vm_config
1255                 .as_ref()
1256                 .unwrap()
1257                 .lock()
1258                 .unwrap()
1259                 .landlock_enable
1260             {
1261                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1262                     .map_err(VmError::ApplyLandlock)?;
1263             }
1264             Ok(())
1265         } else {
1266             Err(VmError::VmAlreadyCreated)
1267         }
1268     }
1269 
1270     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1271         tracer::start();
1272         info!("Booting VM");
1273         event!("vm", "booting");
1274         let r = {
1275             trace_scoped!("vm_boot");
1276             // If we don't have a config, we cannot boot a VM.
1277             if self.vm_config.is_none() {
1278                 return Err(VmError::VmMissingConfig);
1279             };
1280 
1281             // console_info is set to None in vm_shutdown. re-populate here if empty
1282             if self.console_info.is_none() {
1283                 self.console_info =
1284                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1285             }
1286 
1287             // Create a new VM if we don't have one yet.
1288             if self.vm.is_none() {
1289                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1290                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1291                 #[cfg(feature = "guest_debug")]
1292                 let vm_debug_evt = self
1293                     .vm_debug_evt
1294                     .try_clone()
1295                     .map_err(VmError::EventFdClone)?;
1296                 let activate_evt = self
1297                     .activate_evt
1298                     .try_clone()
1299                     .map_err(VmError::EventFdClone)?;
1300 
1301                 if let Some(ref vm_config) = self.vm_config {
1302                     let vm = Vm::new(
1303                         Arc::clone(vm_config),
1304                         exit_evt,
1305                         reset_evt,
1306                         #[cfg(feature = "guest_debug")]
1307                         vm_debug_evt,
1308                         &self.seccomp_action,
1309                         self.hypervisor.clone(),
1310                         activate_evt,
1311                         self.console_info.clone(),
1312                         self.console_resize_pipe.clone(),
1313                         Arc::clone(&self.original_termios_opt),
1314                         None,
1315                         None,
1316                         None,
1317                     )?;
1318 
1319                     self.vm = Some(vm);
1320                 }
1321             }
1322 
1323             // Now we can boot the VM.
1324             if let Some(ref mut vm) = self.vm {
1325                 vm.boot()
1326             } else {
1327                 Err(VmError::VmNotCreated)
1328             }
1329         };
1330         tracer::end();
1331         if r.is_ok() {
1332             event!("vm", "booted");
1333         }
1334         r
1335     }
1336 
1337     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1338         if let Some(ref mut vm) = self.vm {
1339             vm.pause().map_err(VmError::Pause)
1340         } else {
1341             Err(VmError::VmNotRunning)
1342         }
1343     }
1344 
1345     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1346         if let Some(ref mut vm) = self.vm {
1347             vm.resume().map_err(VmError::Resume)
1348         } else {
1349             Err(VmError::VmNotRunning)
1350         }
1351     }
1352 
1353     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1354         if let Some(ref mut vm) = self.vm {
1355             // Drain console_info so that FDs are not reused
1356             let _ = self.console_info.take();
1357             vm.snapshot()
1358                 .map_err(VmError::Snapshot)
1359                 .and_then(|snapshot| {
1360                     vm.send(&snapshot, destination_url)
1361                         .map_err(VmError::SnapshotSend)
1362                 })
1363         } else {
1364             Err(VmError::VmNotRunning)
1365         }
1366     }
1367 
1368     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1369         if self.vm.is_some() || self.vm_config.is_some() {
1370             return Err(VmError::VmAlreadyCreated);
1371         }
1372 
1373         let source_url = restore_cfg.source_url.as_path().to_str();
1374         if source_url.is_none() {
1375             return Err(VmError::InvalidRestoreSourceUrl);
1376         }
1377         // Safe to unwrap as we checked it was Some(&str).
1378         let source_url = source_url.unwrap();
1379 
1380         let vm_config = Arc::new(Mutex::new(
1381             recv_vm_config(source_url).map_err(VmError::Restore)?,
1382         ));
1383         restore_cfg
1384             .validate(&vm_config.lock().unwrap().clone())
1385             .map_err(VmError::ConfigValidation)?;
1386 
1387         // Update VM's net configurations with new fds received for restore operation
1388         if let (Some(restored_nets), Some(vm_net_configs)) =
1389             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1390         {
1391             for net in restored_nets.iter() {
1392                 for net_config in vm_net_configs.iter_mut() {
1393                     // update only if the net dev is backed by FDs
1394                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1395                         net_config.fds.clone_from(&net.fds);
1396                     }
1397                 }
1398             }
1399         }
1400 
1401         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1402         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1403         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1404 
1405         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1406         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1407             .map_err(VmError::Restore)?;
1408 
1409         self.vm_config = Some(Arc::clone(&vm_config));
1410 
1411         // console_info is set to None in vm_snapshot. re-populate here if empty
1412         if self.console_info.is_none() {
1413             self.console_info =
1414                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1415         }
1416 
1417         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1418         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1419         #[cfg(feature = "guest_debug")]
1420         let debug_evt = self
1421             .vm_debug_evt
1422             .try_clone()
1423             .map_err(VmError::EventFdClone)?;
1424         let activate_evt = self
1425             .activate_evt
1426             .try_clone()
1427             .map_err(VmError::EventFdClone)?;
1428 
1429         let vm = Vm::new(
1430             vm_config,
1431             exit_evt,
1432             reset_evt,
1433             #[cfg(feature = "guest_debug")]
1434             debug_evt,
1435             &self.seccomp_action,
1436             self.hypervisor.clone(),
1437             activate_evt,
1438             self.console_info.clone(),
1439             self.console_resize_pipe.clone(),
1440             Arc::clone(&self.original_termios_opt),
1441             Some(snapshot),
1442             Some(source_url),
1443             Some(restore_cfg.prefault),
1444         )?;
1445         self.vm = Some(vm);
1446 
1447         if self
1448             .vm_config
1449             .as_ref()
1450             .unwrap()
1451             .lock()
1452             .unwrap()
1453             .landlock_enable
1454         {
1455             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1456                 .map_err(VmError::ApplyLandlock)?;
1457         }
1458 
1459         // Now we can restore the rest of the VM.
1460         if let Some(ref mut vm) = self.vm {
1461             vm.restore()
1462         } else {
1463             Err(VmError::VmNotCreated)
1464         }
1465     }
1466 
1467     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1468     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1469         if let Some(ref mut vm) = self.vm {
1470             vm.coredump(destination_url).map_err(VmError::Coredump)
1471         } else {
1472             Err(VmError::VmNotRunning)
1473         }
1474     }
1475 
1476     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1477         let r = if let Some(ref mut vm) = self.vm.take() {
1478             // Drain console_info so that the FDs are not reused
1479             let _ = self.console_info.take();
1480             vm.shutdown()
1481         } else {
1482             Err(VmError::VmNotRunning)
1483         };
1484 
1485         if r.is_ok() {
1486             event!("vm", "shutdown");
1487         }
1488 
1489         r
1490     }
1491 
1492     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1493         event!("vm", "rebooting");
1494 
1495         // First we stop the current VM
1496         let config = if let Some(mut vm) = self.vm.take() {
1497             let config = vm.get_config();
1498             vm.shutdown()?;
1499             config
1500         } else {
1501             return Err(VmError::VmNotCreated);
1502         };
1503 
1504         // vm.shutdown() closes all the console devices, so set console_info to None
1505         // so that the closed FD #s are not reused.
1506         let _ = self.console_info.take();
1507 
1508         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1509         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1510         #[cfg(feature = "guest_debug")]
1511         let debug_evt = self
1512             .vm_debug_evt
1513             .try_clone()
1514             .map_err(VmError::EventFdClone)?;
1515         let activate_evt = self
1516             .activate_evt
1517             .try_clone()
1518             .map_err(VmError::EventFdClone)?;
1519 
1520         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1521         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1522         // during the boot process.
1523         if self.reset_evt.read().is_ok() {
1524             warn!("Spurious second reset event received. Ignoring.");
1525         }
1526 
1527         self.console_info =
1528             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1529 
1530         // Then we create the new VM
1531         let mut vm = Vm::new(
1532             config,
1533             exit_evt,
1534             reset_evt,
1535             #[cfg(feature = "guest_debug")]
1536             debug_evt,
1537             &self.seccomp_action,
1538             self.hypervisor.clone(),
1539             activate_evt,
1540             self.console_info.clone(),
1541             self.console_resize_pipe.clone(),
1542             Arc::clone(&self.original_termios_opt),
1543             None,
1544             None,
1545             None,
1546         )?;
1547 
1548         // And we boot it
1549         vm.boot()?;
1550 
1551         self.vm = Some(vm);
1552 
1553         event!("vm", "rebooted");
1554 
1555         Ok(())
1556     }
1557 
1558     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1559         match &self.vm_config {
1560             Some(vm_config) => {
1561                 let state = match &self.vm {
1562                     Some(vm) => vm.get_state()?,
1563                     None => VmState::Created,
1564                 };
1565                 let config = vm_config.lock().unwrap().clone();
1566 
1567                 let mut memory_actual_size = config.memory.total_size();
1568                 if let Some(vm) = &self.vm {
1569                     memory_actual_size -= vm.balloon_size();
1570                 }
1571 
1572                 let device_tree = self
1573                     .vm
1574                     .as_ref()
1575                     .map(|vm| vm.device_tree().lock().unwrap().clone());
1576 
1577                 Ok(VmInfoResponse {
1578                     config: Box::new(config),
1579                     state,
1580                     memory_actual_size,
1581                     device_tree,
1582                 })
1583             }
1584             None => Err(VmError::VmNotCreated),
1585         }
1586     }
1587 
1588     fn vmm_ping(&self) -> VmmPingResponse {
1589         let VmmVersionInfo {
1590             build_version,
1591             version,
1592         } = self.version.clone();
1593 
1594         VmmPingResponse {
1595             build_version,
1596             version,
1597             pid: std::process::id() as i64,
1598             features: feature_list(),
1599         }
1600     }
1601 
1602     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1603         if self.vm_config.is_none() {
1604             return Ok(());
1605         }
1606 
1607         // If a VM is booted, we first try to shut it down.
1608         if self.vm.is_some() {
1609             self.vm_shutdown()?;
1610         }
1611 
1612         self.vm_config = None;
1613 
1614         event!("vm", "deleted");
1615 
1616         Ok(())
1617     }
1618 
1619     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1620         self.vm_delete()?;
1621         event!("vmm", "shutdown");
1622         Ok(())
1623     }
1624 
1625     fn vm_resize(
1626         &mut self,
1627         desired_vcpus: Option<u8>,
1628         desired_ram: Option<u64>,
1629         desired_balloon: Option<u64>,
1630     ) -> result::Result<(), VmError> {
1631         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1632 
1633         if let Some(ref mut vm) = self.vm {
1634             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1635                 error!("Error when resizing VM: {:?}", e);
1636                 Err(e)
1637             } else {
1638                 Ok(())
1639             }
1640         } else {
1641             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1642             if let Some(desired_vcpus) = desired_vcpus {
1643                 config.cpus.boot_vcpus = desired_vcpus;
1644             }
1645             if let Some(desired_ram) = desired_ram {
1646                 config.memory.size = desired_ram;
1647             }
1648             if let Some(desired_balloon) = desired_balloon {
1649                 if let Some(balloon_config) = &mut config.balloon {
1650                     balloon_config.size = desired_balloon;
1651                 }
1652             }
1653             Ok(())
1654         }
1655     }
1656 
1657     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1658         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1659 
1660         if let Some(ref mut vm) = self.vm {
1661             if let Err(e) = vm.resize_zone(id, desired_ram) {
1662                 error!("Error when resizing VM: {:?}", e);
1663                 Err(e)
1664             } else {
1665                 Ok(())
1666             }
1667         } else {
1668             // Update VmConfig by setting the new desired ram.
1669             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1670 
1671             if let Some(zones) = &mut memory_config.zones {
1672                 for zone in zones.iter_mut() {
1673                     if zone.id == id {
1674                         zone.size = desired_ram;
1675                         return Ok(());
1676                     }
1677                 }
1678             }
1679 
1680             error!("Could not find the memory zone {} for the resize", id);
1681             Err(VmError::ResizeZone)
1682         }
1683     }
1684 
1685     fn vm_add_device(
1686         &mut self,
1687         device_cfg: DeviceConfig,
1688     ) -> result::Result<Option<Vec<u8>>, VmError> {
1689         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1690 
1691         {
1692             // Validate the configuration change in a cloned configuration
1693             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1694             add_to_config(&mut config.devices, device_cfg.clone());
1695             config.validate().map_err(VmError::ConfigValidation)?;
1696         }
1697 
1698         if let Some(ref mut vm) = self.vm {
1699             let info = vm.add_device(device_cfg).map_err(|e| {
1700                 error!("Error when adding new device to the VM: {:?}", e);
1701                 e
1702             })?;
1703             serde_json::to_vec(&info)
1704                 .map(Some)
1705                 .map_err(VmError::SerializeJson)
1706         } else {
1707             // Update VmConfig by adding the new device.
1708             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1709             add_to_config(&mut config.devices, device_cfg);
1710             Ok(None)
1711         }
1712     }
1713 
1714     fn vm_add_user_device(
1715         &mut self,
1716         device_cfg: UserDeviceConfig,
1717     ) -> result::Result<Option<Vec<u8>>, VmError> {
1718         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1719 
1720         {
1721             // Validate the configuration change in a cloned configuration
1722             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1723             add_to_config(&mut config.user_devices, device_cfg.clone());
1724             config.validate().map_err(VmError::ConfigValidation)?;
1725         }
1726 
1727         if let Some(ref mut vm) = self.vm {
1728             let info = vm.add_user_device(device_cfg).map_err(|e| {
1729                 error!("Error when adding new user device to the VM: {:?}", e);
1730                 e
1731             })?;
1732             serde_json::to_vec(&info)
1733                 .map(Some)
1734                 .map_err(VmError::SerializeJson)
1735         } else {
1736             // Update VmConfig by adding the new device.
1737             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1738             add_to_config(&mut config.user_devices, device_cfg);
1739             Ok(None)
1740         }
1741     }
1742 
1743     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1744         if let Some(ref mut vm) = self.vm {
1745             if let Err(e) = vm.remove_device(id) {
1746                 error!("Error when removing device from the VM: {:?}", e);
1747                 Err(e)
1748             } else {
1749                 Ok(())
1750             }
1751         } else if let Some(ref config) = self.vm_config {
1752             let mut config = config.lock().unwrap();
1753             if config.remove_device(&id) {
1754                 Ok(())
1755             } else {
1756                 Err(VmError::NoDeviceToRemove(id))
1757             }
1758         } else {
1759             Err(VmError::VmNotCreated)
1760         }
1761     }
1762 
1763     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1764         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1765 
1766         {
1767             // Validate the configuration change in a cloned configuration
1768             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1769             add_to_config(&mut config.disks, disk_cfg.clone());
1770             config.validate().map_err(VmError::ConfigValidation)?;
1771         }
1772 
1773         if let Some(ref mut vm) = self.vm {
1774             let info = vm.add_disk(disk_cfg).map_err(|e| {
1775                 error!("Error when adding new disk to the VM: {:?}", e);
1776                 e
1777             })?;
1778             serde_json::to_vec(&info)
1779                 .map(Some)
1780                 .map_err(VmError::SerializeJson)
1781         } else {
1782             // Update VmConfig by adding the new device.
1783             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1784             add_to_config(&mut config.disks, disk_cfg);
1785             Ok(None)
1786         }
1787     }
1788 
1789     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1790         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1791 
1792         {
1793             // Validate the configuration change in a cloned configuration
1794             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1795             add_to_config(&mut config.fs, fs_cfg.clone());
1796             config.validate().map_err(VmError::ConfigValidation)?;
1797         }
1798 
1799         if let Some(ref mut vm) = self.vm {
1800             let info = vm.add_fs(fs_cfg).map_err(|e| {
1801                 error!("Error when adding new fs to the VM: {:?}", e);
1802                 e
1803             })?;
1804             serde_json::to_vec(&info)
1805                 .map(Some)
1806                 .map_err(VmError::SerializeJson)
1807         } else {
1808             // Update VmConfig by adding the new device.
1809             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1810             add_to_config(&mut config.fs, fs_cfg);
1811             Ok(None)
1812         }
1813     }
1814 
1815     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1816         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1817 
1818         {
1819             // Validate the configuration change in a cloned configuration
1820             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1821             add_to_config(&mut config.pmem, pmem_cfg.clone());
1822             config.validate().map_err(VmError::ConfigValidation)?;
1823         }
1824 
1825         if let Some(ref mut vm) = self.vm {
1826             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1827                 error!("Error when adding new pmem device to the VM: {:?}", e);
1828                 e
1829             })?;
1830             serde_json::to_vec(&info)
1831                 .map(Some)
1832                 .map_err(VmError::SerializeJson)
1833         } else {
1834             // Update VmConfig by adding the new device.
1835             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1836             add_to_config(&mut config.pmem, pmem_cfg);
1837             Ok(None)
1838         }
1839     }
1840 
1841     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1842         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1843 
1844         {
1845             // Validate the configuration change in a cloned configuration
1846             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1847             add_to_config(&mut config.net, net_cfg.clone());
1848             config.validate().map_err(VmError::ConfigValidation)?;
1849         }
1850 
1851         if let Some(ref mut vm) = self.vm {
1852             let info = vm.add_net(net_cfg).map_err(|e| {
1853                 error!("Error when adding new network device to the VM: {:?}", e);
1854                 e
1855             })?;
1856             serde_json::to_vec(&info)
1857                 .map(Some)
1858                 .map_err(VmError::SerializeJson)
1859         } else {
1860             // Update VmConfig by adding the new device.
1861             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1862             add_to_config(&mut config.net, net_cfg);
1863             Ok(None)
1864         }
1865     }
1866 
1867     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1868         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1869 
1870         {
1871             // Validate the configuration change in a cloned configuration
1872             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1873             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1874             config.validate().map_err(VmError::ConfigValidation)?;
1875         }
1876 
1877         if let Some(ref mut vm) = self.vm {
1878             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1879                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1880                 e
1881             })?;
1882             serde_json::to_vec(&info)
1883                 .map(Some)
1884                 .map_err(VmError::SerializeJson)
1885         } else {
1886             // Update VmConfig by adding the new device.
1887             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1888             add_to_config(&mut config.vdpa, vdpa_cfg);
1889             Ok(None)
1890         }
1891     }
1892 
1893     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1894         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1895 
1896         {
1897             // Validate the configuration change in a cloned configuration
1898             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1899 
1900             if config.vsock.is_some() {
1901                 return Err(VmError::TooManyVsockDevices);
1902             }
1903 
1904             config.vsock = Some(vsock_cfg.clone());
1905             config.validate().map_err(VmError::ConfigValidation)?;
1906         }
1907 
1908         if let Some(ref mut vm) = self.vm {
1909             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1910                 error!("Error when adding new vsock device to the VM: {:?}", e);
1911                 e
1912             })?;
1913             serde_json::to_vec(&info)
1914                 .map(Some)
1915                 .map_err(VmError::SerializeJson)
1916         } else {
1917             // Update VmConfig by adding the new device.
1918             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1919             config.vsock = Some(vsock_cfg);
1920             Ok(None)
1921         }
1922     }
1923 
1924     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1925         if let Some(ref mut vm) = self.vm {
1926             let info = vm.counters().map_err(|e| {
1927                 error!("Error when getting counters from the VM: {:?}", e);
1928                 e
1929             })?;
1930             serde_json::to_vec(&info)
1931                 .map(Some)
1932                 .map_err(VmError::SerializeJson)
1933         } else {
1934             Err(VmError::VmNotRunning)
1935         }
1936     }
1937 
1938     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1939         if let Some(ref mut vm) = self.vm {
1940             vm.power_button()
1941         } else {
1942             Err(VmError::VmNotRunning)
1943         }
1944     }
1945 
1946     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1947         if let Some(ref mut vm) = self.vm {
1948             vm.nmi()
1949         } else {
1950             Err(VmError::VmNotRunning)
1951         }
1952     }
1953 
1954     fn vm_receive_migration(
1955         &mut self,
1956         receive_data_migration: VmReceiveMigrationData,
1957     ) -> result::Result<(), MigratableError> {
1958         info!(
1959             "Receiving migration: receiver_url = {}",
1960             receive_data_migration.receiver_url
1961         );
1962 
1963         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1964         let listener = UnixListener::bind(&path).map_err(|e| {
1965             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1966         })?;
1967         let (mut socket, _addr) = listener.accept().map_err(|e| {
1968             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1969         })?;
1970         std::fs::remove_file(&path).map_err(|e| {
1971             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1972         })?;
1973 
1974         let mut started = false;
1975         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1976         let mut existing_memory_files = None;
1977         loop {
1978             let req = Request::read_from(&mut socket)?;
1979             match req.command() {
1980                 Command::Invalid => info!("Invalid Command Received"),
1981                 Command::Start => {
1982                     info!("Start Command Received");
1983                     started = true;
1984 
1985                     Response::ok().write_to(&mut socket)?;
1986                 }
1987                 Command::Config => {
1988                     info!("Config Command Received");
1989 
1990                     if !started {
1991                         warn!("Migration not started yet");
1992                         Response::error().write_to(&mut socket)?;
1993                         continue;
1994                     }
1995                     memory_manager = Some(self.vm_receive_config(
1996                         &req,
1997                         &mut socket,
1998                         existing_memory_files.take(),
1999                     )?);
2000                 }
2001                 Command::State => {
2002                     info!("State Command Received");
2003 
2004                     if !started {
2005                         warn!("Migration not started yet");
2006                         Response::error().write_to(&mut socket)?;
2007                         continue;
2008                     }
2009                     if let Some(mm) = memory_manager.take() {
2010                         self.vm_receive_state(&req, &mut socket, mm)?;
2011                     } else {
2012                         warn!("Configuration not sent yet");
2013                         Response::error().write_to(&mut socket)?;
2014                     }
2015                 }
2016                 Command::Memory => {
2017                     info!("Memory Command Received");
2018 
2019                     if !started {
2020                         warn!("Migration not started yet");
2021                         Response::error().write_to(&mut socket)?;
2022                         continue;
2023                     }
2024                     if let Some(mm) = memory_manager.as_ref() {
2025                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2026                     } else {
2027                         warn!("Configuration not sent yet");
2028                         Response::error().write_to(&mut socket)?;
2029                     }
2030                 }
2031                 Command::MemoryFd => {
2032                     info!("MemoryFd Command Received");
2033 
2034                     if !started {
2035                         warn!("Migration not started yet");
2036                         Response::error().write_to(&mut socket)?;
2037                         continue;
2038                     }
2039 
2040                     let mut buf = [0u8; 4];
2041                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2042                         MigratableError::MigrateReceive(anyhow!(
2043                             "Error receiving slot from socket: {}",
2044                             e
2045                         ))
2046                     })?;
2047 
2048                     if existing_memory_files.is_none() {
2049                         existing_memory_files = Some(HashMap::default())
2050                     }
2051 
2052                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2053                         let slot = u32::from_le_bytes(buf);
2054                         existing_memory_files.insert(slot, file.unwrap());
2055                     }
2056 
2057                     Response::ok().write_to(&mut socket)?;
2058                 }
2059                 Command::Complete => {
2060                     info!("Complete Command Received");
2061                     if let Some(ref mut vm) = self.vm.as_mut() {
2062                         vm.resume()?;
2063                         Response::ok().write_to(&mut socket)?;
2064                     } else {
2065                         warn!("VM not created yet");
2066                         Response::error().write_to(&mut socket)?;
2067                     }
2068                     break;
2069                 }
2070                 Command::Abandon => {
2071                     info!("Abandon Command Received");
2072                     self.vm = None;
2073                     self.vm_config = None;
2074                     Response::ok().write_to(&mut socket).ok();
2075                     break;
2076                 }
2077             }
2078         }
2079 
2080         Ok(())
2081     }
2082 
2083     fn vm_send_migration(
2084         &mut self,
2085         send_data_migration: VmSendMigrationData,
2086     ) -> result::Result<(), MigratableError> {
2087         info!(
2088             "Sending migration: destination_url = {}, local = {}",
2089             send_data_migration.destination_url, send_data_migration.local
2090         );
2091 
2092         if !self
2093             .vm_config
2094             .as_ref()
2095             .unwrap()
2096             .lock()
2097             .unwrap()
2098             .backed_by_shared_memory()
2099             && send_data_migration.local
2100         {
2101             return Err(MigratableError::MigrateSend(anyhow!(
2102                 "Local migration requires shared memory or hugepages enabled"
2103             )));
2104         }
2105 
2106         if let Some(vm) = self.vm.as_mut() {
2107             Self::send_migration(
2108                 vm,
2109                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2110                 self.hypervisor.clone(),
2111                 send_data_migration,
2112             )
2113             .map_err(|migration_err| {
2114                 error!("Migration failed: {:?}", migration_err);
2115 
2116                 // Stop logging dirty pages
2117                 if let Err(e) = vm.stop_dirty_log() {
2118                     return e;
2119                 }
2120 
2121                 if vm.get_state().unwrap() == VmState::Paused {
2122                     if let Err(e) = vm.resume() {
2123                         return e;
2124                     }
2125                 }
2126 
2127                 migration_err
2128             })?;
2129 
2130             // Shutdown the VM after the migration succeeded
2131             self.exit_evt.write(1).map_err(|e| {
2132                 MigratableError::MigrateSend(anyhow!(
2133                     "Failed shutting down the VM after migration: {:?}",
2134                     e
2135                 ))
2136             })
2137         } else {
2138             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2139         }
2140     }
2141 }
2142 
2143 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2144 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2145 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2146 
2147 #[cfg(test)]
2148 mod unit_tests {
2149     use super::*;
2150     #[cfg(target_arch = "x86_64")]
2151     use crate::vm_config::DebugConsoleConfig;
2152     use crate::vm_config::{
2153         ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig,
2154         PayloadConfig, RngConfig,
2155     };
2156 
2157     fn create_dummy_vmm() -> Vmm {
2158         Vmm::new(
2159             VmmVersionInfo::new("dummy", "dummy"),
2160             EventFd::new(EFD_NONBLOCK).unwrap(),
2161             #[cfg(feature = "guest_debug")]
2162             EventFd::new(EFD_NONBLOCK).unwrap(),
2163             #[cfg(feature = "guest_debug")]
2164             EventFd::new(EFD_NONBLOCK).unwrap(),
2165             SeccompAction::Allow,
2166             hypervisor::new().unwrap(),
2167             EventFd::new(EFD_NONBLOCK).unwrap(),
2168         )
2169         .unwrap()
2170     }
2171 
2172     fn create_dummy_vm_config() -> Box<VmConfig> {
2173         Box::new(VmConfig {
2174             cpus: CpusConfig {
2175                 boot_vcpus: 1,
2176                 max_vcpus: 1,
2177                 topology: None,
2178                 kvm_hyperv: false,
2179                 max_phys_bits: 46,
2180                 affinity: None,
2181                 features: CpuFeatures::default(),
2182             },
2183             memory: MemoryConfig {
2184                 size: 536_870_912,
2185                 mergeable: false,
2186                 hotplug_method: HotplugMethod::Acpi,
2187                 hotplug_size: None,
2188                 hotplugged_size: None,
2189                 shared: true,
2190                 hugepages: false,
2191                 hugepage_size: None,
2192                 prefault: false,
2193                 zones: None,
2194                 thp: true,
2195             },
2196             payload: Some(PayloadConfig {
2197                 kernel: Some(PathBuf::from("/path/to/kernel")),
2198                 firmware: None,
2199                 cmdline: None,
2200                 initramfs: None,
2201                 #[cfg(feature = "igvm")]
2202                 igvm: None,
2203                 #[cfg(feature = "sev_snp")]
2204                 host_data: None,
2205             }),
2206             rate_limit_groups: None,
2207             disks: None,
2208             net: None,
2209             rng: RngConfig {
2210                 src: PathBuf::from("/dev/urandom"),
2211                 iommu: false,
2212             },
2213             balloon: None,
2214             fs: None,
2215             pmem: None,
2216             serial: ConsoleConfig {
2217                 file: None,
2218                 mode: ConsoleOutputMode::Null,
2219                 iommu: false,
2220                 socket: None,
2221             },
2222             console: ConsoleConfig {
2223                 file: None,
2224                 mode: ConsoleOutputMode::Tty,
2225                 iommu: false,
2226                 socket: None,
2227             },
2228             #[cfg(target_arch = "x86_64")]
2229             debug_console: DebugConsoleConfig::default(),
2230             devices: None,
2231             user_devices: None,
2232             vdpa: None,
2233             vsock: None,
2234             #[cfg(feature = "pvmemcontrol")]
2235             pvmemcontrol: None,
2236             pvpanic: false,
2237             iommu: false,
2238             #[cfg(target_arch = "x86_64")]
2239             sgx_epc: None,
2240             numa: None,
2241             watchdog: false,
2242             #[cfg(feature = "guest_debug")]
2243             gdb: false,
2244             pci_segments: None,
2245             platform: None,
2246             tpm: None,
2247             preserved_fds: None,
2248             landlock_enable: false,
2249             landlock_rules: None,
2250         })
2251     }
2252 
2253     #[test]
2254     fn test_vmm_vm_create() {
2255         let mut vmm = create_dummy_vmm();
2256         let config = create_dummy_vm_config();
2257 
2258         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2259         assert!(matches!(
2260             vmm.vm_create(config),
2261             Err(VmError::VmAlreadyCreated)
2262         ));
2263     }
2264 
2265     #[test]
2266     fn test_vmm_vm_cold_add_device() {
2267         let mut vmm = create_dummy_vmm();
2268         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2269 
2270         assert!(matches!(
2271             vmm.vm_add_device(device_config.clone()),
2272             Err(VmError::VmNotCreated)
2273         ));
2274 
2275         let _ = vmm.vm_create(create_dummy_vm_config());
2276         assert!(vmm
2277             .vm_config
2278             .as_ref()
2279             .unwrap()
2280             .lock()
2281             .unwrap()
2282             .devices
2283             .is_none());
2284 
2285         assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none());
2286         assert_eq!(
2287             vmm.vm_config
2288                 .as_ref()
2289                 .unwrap()
2290                 .lock()
2291                 .unwrap()
2292                 .devices
2293                 .clone()
2294                 .unwrap()
2295                 .len(),
2296             1
2297         );
2298         assert_eq!(
2299             vmm.vm_config
2300                 .as_ref()
2301                 .unwrap()
2302                 .lock()
2303                 .unwrap()
2304                 .devices
2305                 .clone()
2306                 .unwrap()[0],
2307             device_config
2308         );
2309     }
2310 
2311     #[test]
2312     fn test_vmm_vm_cold_add_user_device() {
2313         let mut vmm = create_dummy_vmm();
2314         let user_device_config =
2315             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2316 
2317         assert!(matches!(
2318             vmm.vm_add_user_device(user_device_config.clone()),
2319             Err(VmError::VmNotCreated)
2320         ));
2321 
2322         let _ = vmm.vm_create(create_dummy_vm_config());
2323         assert!(vmm
2324             .vm_config
2325             .as_ref()
2326             .unwrap()
2327             .lock()
2328             .unwrap()
2329             .user_devices
2330             .is_none());
2331 
2332         assert!(vmm
2333             .vm_add_user_device(user_device_config.clone())
2334             .unwrap()
2335             .is_none());
2336         assert_eq!(
2337             vmm.vm_config
2338                 .as_ref()
2339                 .unwrap()
2340                 .lock()
2341                 .unwrap()
2342                 .user_devices
2343                 .clone()
2344                 .unwrap()
2345                 .len(),
2346             1
2347         );
2348         assert_eq!(
2349             vmm.vm_config
2350                 .as_ref()
2351                 .unwrap()
2352                 .lock()
2353                 .unwrap()
2354                 .user_devices
2355                 .clone()
2356                 .unwrap()[0],
2357             user_device_config
2358         );
2359     }
2360 
2361     #[test]
2362     fn test_vmm_vm_cold_add_disk() {
2363         let mut vmm = create_dummy_vmm();
2364         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2365 
2366         assert!(matches!(
2367             vmm.vm_add_disk(disk_config.clone()),
2368             Err(VmError::VmNotCreated)
2369         ));
2370 
2371         let _ = vmm.vm_create(create_dummy_vm_config());
2372         assert!(vmm
2373             .vm_config
2374             .as_ref()
2375             .unwrap()
2376             .lock()
2377             .unwrap()
2378             .disks
2379             .is_none());
2380 
2381         assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none());
2382         assert_eq!(
2383             vmm.vm_config
2384                 .as_ref()
2385                 .unwrap()
2386                 .lock()
2387                 .unwrap()
2388                 .disks
2389                 .clone()
2390                 .unwrap()
2391                 .len(),
2392             1
2393         );
2394         assert_eq!(
2395             vmm.vm_config
2396                 .as_ref()
2397                 .unwrap()
2398                 .lock()
2399                 .unwrap()
2400                 .disks
2401                 .clone()
2402                 .unwrap()[0],
2403             disk_config
2404         );
2405     }
2406 
2407     #[test]
2408     fn test_vmm_vm_cold_add_fs() {
2409         let mut vmm = create_dummy_vmm();
2410         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2411 
2412         assert!(matches!(
2413             vmm.vm_add_fs(fs_config.clone()),
2414             Err(VmError::VmNotCreated)
2415         ));
2416 
2417         let _ = vmm.vm_create(create_dummy_vm_config());
2418         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2419 
2420         assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none());
2421         assert_eq!(
2422             vmm.vm_config
2423                 .as_ref()
2424                 .unwrap()
2425                 .lock()
2426                 .unwrap()
2427                 .fs
2428                 .clone()
2429                 .unwrap()
2430                 .len(),
2431             1
2432         );
2433         assert_eq!(
2434             vmm.vm_config
2435                 .as_ref()
2436                 .unwrap()
2437                 .lock()
2438                 .unwrap()
2439                 .fs
2440                 .clone()
2441                 .unwrap()[0],
2442             fs_config
2443         );
2444     }
2445 
2446     #[test]
2447     fn test_vmm_vm_cold_add_pmem() {
2448         let mut vmm = create_dummy_vmm();
2449         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2450 
2451         assert!(matches!(
2452             vmm.vm_add_pmem(pmem_config.clone()),
2453             Err(VmError::VmNotCreated)
2454         ));
2455 
2456         let _ = vmm.vm_create(create_dummy_vm_config());
2457         assert!(vmm
2458             .vm_config
2459             .as_ref()
2460             .unwrap()
2461             .lock()
2462             .unwrap()
2463             .pmem
2464             .is_none());
2465 
2466         assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none());
2467         assert_eq!(
2468             vmm.vm_config
2469                 .as_ref()
2470                 .unwrap()
2471                 .lock()
2472                 .unwrap()
2473                 .pmem
2474                 .clone()
2475                 .unwrap()
2476                 .len(),
2477             1
2478         );
2479         assert_eq!(
2480             vmm.vm_config
2481                 .as_ref()
2482                 .unwrap()
2483                 .lock()
2484                 .unwrap()
2485                 .pmem
2486                 .clone()
2487                 .unwrap()[0],
2488             pmem_config
2489         );
2490     }
2491 
2492     #[test]
2493     fn test_vmm_vm_cold_add_net() {
2494         let mut vmm = create_dummy_vmm();
2495         let net_config = NetConfig::parse(
2496             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2497         )
2498         .unwrap();
2499 
2500         assert!(matches!(
2501             vmm.vm_add_net(net_config.clone()),
2502             Err(VmError::VmNotCreated)
2503         ));
2504 
2505         let _ = vmm.vm_create(create_dummy_vm_config());
2506         assert!(vmm
2507             .vm_config
2508             .as_ref()
2509             .unwrap()
2510             .lock()
2511             .unwrap()
2512             .net
2513             .is_none());
2514 
2515         assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none());
2516         assert_eq!(
2517             vmm.vm_config
2518                 .as_ref()
2519                 .unwrap()
2520                 .lock()
2521                 .unwrap()
2522                 .net
2523                 .clone()
2524                 .unwrap()
2525                 .len(),
2526             1
2527         );
2528         assert_eq!(
2529             vmm.vm_config
2530                 .as_ref()
2531                 .unwrap()
2532                 .lock()
2533                 .unwrap()
2534                 .net
2535                 .clone()
2536                 .unwrap()[0],
2537             net_config
2538         );
2539     }
2540 
2541     #[test]
2542     fn test_vmm_vm_cold_add_vdpa() {
2543         let mut vmm = create_dummy_vmm();
2544         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2545 
2546         assert!(matches!(
2547             vmm.vm_add_vdpa(vdpa_config.clone()),
2548             Err(VmError::VmNotCreated)
2549         ));
2550 
2551         let _ = vmm.vm_create(create_dummy_vm_config());
2552         assert!(vmm
2553             .vm_config
2554             .as_ref()
2555             .unwrap()
2556             .lock()
2557             .unwrap()
2558             .vdpa
2559             .is_none());
2560 
2561         assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none());
2562         assert_eq!(
2563             vmm.vm_config
2564                 .as_ref()
2565                 .unwrap()
2566                 .lock()
2567                 .unwrap()
2568                 .vdpa
2569                 .clone()
2570                 .unwrap()
2571                 .len(),
2572             1
2573         );
2574         assert_eq!(
2575             vmm.vm_config
2576                 .as_ref()
2577                 .unwrap()
2578                 .lock()
2579                 .unwrap()
2580                 .vdpa
2581                 .clone()
2582                 .unwrap()[0],
2583             vdpa_config
2584         );
2585     }
2586 
2587     #[test]
2588     fn test_vmm_vm_cold_add_vsock() {
2589         let mut vmm = create_dummy_vmm();
2590         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2591 
2592         assert!(matches!(
2593             vmm.vm_add_vsock(vsock_config.clone()),
2594             Err(VmError::VmNotCreated)
2595         ));
2596 
2597         let _ = vmm.vm_create(create_dummy_vm_config());
2598         assert!(vmm
2599             .vm_config
2600             .as_ref()
2601             .unwrap()
2602             .lock()
2603             .unwrap()
2604             .vsock
2605             .is_none());
2606 
2607         assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none());
2608         assert_eq!(
2609             vmm.vm_config
2610                 .as_ref()
2611                 .unwrap()
2612                 .lock()
2613                 .unwrap()
2614                 .vsock
2615                 .clone()
2616                 .unwrap(),
2617             vsock_config
2618         );
2619     }
2620 }
2621