xref: /cloud-hypervisor/vmm/src/lib.rs (revision 61e57e1cb149de03ae1e0b799b9e5ba9a4a63ace)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use std::collections::HashMap;
12 use std::fs::File;
13 use std::io::{stdout, Read, Write};
14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
15 use std::os::unix::net::{UnixListener, UnixStream};
16 use std::panic::AssertUnwindSafe;
17 use std::path::PathBuf;
18 use std::rc::Rc;
19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
20 use std::sync::{Arc, Mutex};
21 use std::time::Instant;
22 use std::{io, result, thread};
23 
24 use anyhow::anyhow;
25 #[cfg(feature = "dbus_api")]
26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
27 use api::http::HttpApiHandle;
28 use console_devices::{pre_create_console_devices, ConsoleInfo};
29 use landlock::LandlockError;
30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
31 use memory_manager::MemoryManagerSnapshotData;
32 use pci::PciBdf;
33 use seccompiler::{apply_filter, SeccompAction};
34 use serde::ser::{SerializeStruct, Serializer};
35 use serde::{Deserialize, Serialize};
36 use signal_hook::iterator::{Handle, Signals};
37 use thiserror::Error;
38 use tracer::trace_scoped;
39 use vm_memory::bitmap::AtomicBitmap;
40 use vm_memory::{ReadVolatile, WriteVolatile};
41 use vm_migration::protocol::*;
42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
43 use vmm_sys_util::eventfd::EventFd;
44 use vmm_sys_util::signal::unblock_signal;
45 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
46 
47 use crate::api::{
48     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
49     VmSendMigrationData, VmmPingResponse,
50 };
51 use crate::config::{
52     add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig,
53     UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
54 };
55 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
56 use crate::coredump::GuestDebuggable;
57 use crate::landlock::Landlock;
58 use crate::memory_manager::MemoryManager;
59 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
60 use crate::migration::get_vm_snapshot;
61 use crate::migration::{recv_vm_config, recv_vm_state};
62 use crate::seccomp_filters::{get_seccomp_filter, Thread};
63 use crate::vm::{Error as VmError, Vm, VmState};
64 
65 mod acpi;
66 pub mod api;
67 mod clone3;
68 pub mod config;
69 pub mod console_devices;
70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
71 mod coredump;
72 pub mod cpu;
73 pub mod device_manager;
74 pub mod device_tree;
75 #[cfg(feature = "guest_debug")]
76 mod gdb;
77 #[cfg(feature = "igvm")]
78 mod igvm;
79 pub mod interrupt;
80 pub mod landlock;
81 pub mod memory_manager;
82 pub mod migration;
83 mod pci_segment;
84 pub mod seccomp_filters;
85 mod serial_manager;
86 mod sigwinch_listener;
87 pub mod vm;
88 pub mod vm_config;
89 
90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
92 
93 /// Errors associated with VMM management
94 #[derive(Debug, Error)]
95 pub enum Error {
96     /// API request receive error
97     #[error("Error receiving API request: {0}")]
98     ApiRequestRecv(#[source] RecvError),
99 
100     /// API response send error
101     #[error("Error sending API request: {0}")]
102     ApiResponseSend(#[source] SendError<ApiResponse>),
103 
104     /// Cannot bind to the UNIX domain socket path
105     #[error("Error binding to UNIX domain socket: {0}")]
106     Bind(#[source] io::Error),
107 
108     /// Cannot clone EventFd.
109     #[error("Error cloning EventFd: {0}")]
110     EventFdClone(#[source] io::Error),
111 
112     /// Cannot create EventFd.
113     #[error("Error creating EventFd: {0}")]
114     EventFdCreate(#[source] io::Error),
115 
116     /// Cannot read from EventFd.
117     #[error("Error reading from EventFd: {0}")]
118     EventFdRead(#[source] io::Error),
119 
120     /// Cannot create epoll context.
121     #[error("Error creating epoll context: {0}")]
122     Epoll(#[source] io::Error),
123 
124     /// Cannot create HTTP thread
125     #[error("Error spawning HTTP thread: {0}")]
126     HttpThreadSpawn(#[source] io::Error),
127 
128     /// Cannot create D-Bus thread
129     #[cfg(feature = "dbus_api")]
130     #[error("Error spawning D-Bus thread: {0}")]
131     DBusThreadSpawn(#[source] io::Error),
132 
133     /// Cannot start D-Bus session
134     #[cfg(feature = "dbus_api")]
135     #[error("Error starting D-Bus session: {0}")]
136     CreateDBusSession(#[source] zbus::Error),
137 
138     /// Cannot create `event-monitor` thread
139     #[error("Error spawning `event-monitor` thread: {0}")]
140     EventMonitorThreadSpawn(#[source] io::Error),
141 
142     /// Cannot handle the VM STDIN stream
143     #[error("Error handling VM stdin: {0:?}")]
144     Stdin(VmError),
145 
146     /// Cannot handle the VM pty stream
147     #[error("Error handling VM pty: {0:?}")]
148     Pty(VmError),
149 
150     /// Cannot reboot the VM
151     #[error("Error rebooting VM: {0:?}")]
152     VmReboot(VmError),
153 
154     /// Cannot create VMM thread
155     #[error("Error spawning VMM thread {0:?}")]
156     VmmThreadSpawn(#[source] io::Error),
157 
158     /// Cannot shut the VMM down
159     #[error("Error shutting down VMM: {0:?}")]
160     VmmShutdown(VmError),
161 
162     /// Cannot create seccomp filter
163     #[error("Error creating seccomp filter: {0}")]
164     CreateSeccompFilter(seccompiler::Error),
165 
166     /// Cannot apply seccomp filter
167     #[error("Error applying seccomp filter: {0}")]
168     ApplySeccompFilter(seccompiler::Error),
169 
170     /// Error activating virtio devices
171     #[error("Error activating virtio devices: {0:?}")]
172     ActivateVirtioDevices(VmError),
173 
174     /// Error creating API server
175     #[error("Error creating API server {0:?}")]
176     CreateApiServer(micro_http::ServerError),
177 
178     /// Error binding API server socket
179     #[error("Error creation API server's socket {0:?}")]
180     CreateApiServerSocket(#[source] io::Error),
181 
182     #[cfg(feature = "guest_debug")]
183     #[error("Failed to start the GDB thread: {0}")]
184     GdbThreadSpawn(io::Error),
185 
186     /// GDB request receive error
187     #[cfg(feature = "guest_debug")]
188     #[error("Error receiving GDB request: {0}")]
189     GdbRequestRecv(#[source] RecvError),
190 
191     /// GDB response send error
192     #[cfg(feature = "guest_debug")]
193     #[error("Error sending GDB request: {0}")]
194     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
195 
196     #[error("Cannot spawn a signal handler thread: {0}")]
197     SignalHandlerSpawn(#[source] io::Error),
198 
199     #[error("Failed to join on threads: {0:?}")]
200     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
201 
202     /// Cannot create Landlock object
203     #[error("Error creating landlock object: {0}")]
204     CreateLandlock(LandlockError),
205 
206     /// Cannot apply landlock based sandboxing
207     #[error("Error applying landlock: {0}")]
208     ApplyLandlock(LandlockError),
209 }
210 pub type Result<T> = result::Result<T, Error>;
211 
212 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
213 #[repr(u64)]
214 pub enum EpollDispatch {
215     Exit = 0,
216     Reset = 1,
217     Api = 2,
218     ActivateVirtioDevices = 3,
219     Debug = 4,
220     Unknown,
221 }
222 
223 impl From<u64> for EpollDispatch {
224     fn from(v: u64) -> Self {
225         use EpollDispatch::*;
226         match v {
227             0 => Exit,
228             1 => Reset,
229             2 => Api,
230             3 => ActivateVirtioDevices,
231             4 => Debug,
232             _ => Unknown,
233         }
234     }
235 }
236 
237 pub struct EpollContext {
238     epoll_file: File,
239 }
240 
241 impl EpollContext {
242     pub fn new() -> result::Result<EpollContext, io::Error> {
243         let epoll_fd = epoll::create(true)?;
244         // Use 'File' to enforce closing on 'epoll_fd'
245         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
246         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
247 
248         Ok(EpollContext { epoll_file })
249     }
250 
251     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
252     where
253         T: AsRawFd,
254     {
255         let dispatch_index = token as u64;
256         epoll::ctl(
257             self.epoll_file.as_raw_fd(),
258             epoll::ControlOptions::EPOLL_CTL_ADD,
259             fd.as_raw_fd(),
260             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
261         )?;
262 
263         Ok(())
264     }
265 
266     #[cfg(fuzzing)]
267     pub fn add_event_custom<T>(
268         &mut self,
269         fd: &T,
270         id: u64,
271         evts: epoll::Events,
272     ) -> result::Result<(), io::Error>
273     where
274         T: AsRawFd,
275     {
276         epoll::ctl(
277             self.epoll_file.as_raw_fd(),
278             epoll::ControlOptions::EPOLL_CTL_ADD,
279             fd.as_raw_fd(),
280             epoll::Event::new(evts, id),
281         )?;
282 
283         Ok(())
284     }
285 }
286 
287 impl AsRawFd for EpollContext {
288     fn as_raw_fd(&self) -> RawFd {
289         self.epoll_file.as_raw_fd()
290     }
291 }
292 
293 pub struct PciDeviceInfo {
294     pub id: String,
295     pub bdf: PciBdf,
296 }
297 
298 impl Serialize for PciDeviceInfo {
299     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
300     where
301         S: Serializer,
302     {
303         let bdf_str = self.bdf.to_string();
304 
305         // Serialize the structure.
306         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
307         state.serialize_field("id", &self.id)?;
308         state.serialize_field("bdf", &bdf_str)?;
309         state.end()
310     }
311 }
312 
313 pub fn feature_list() -> Vec<String> {
314     vec![
315         #[cfg(feature = "dbus_api")]
316         "dbus_api".to_string(),
317         #[cfg(feature = "dhat-heap")]
318         "dhat-heap".to_string(),
319         #[cfg(feature = "guest_debug")]
320         "guest_debug".to_string(),
321         #[cfg(feature = "igvm")]
322         "igvm".to_string(),
323         #[cfg(feature = "io_uring")]
324         "io_uring".to_string(),
325         #[cfg(feature = "kvm")]
326         "kvm".to_string(),
327         #[cfg(feature = "mshv")]
328         "mshv".to_string(),
329         #[cfg(feature = "sev_snp")]
330         "sev_snp".to_string(),
331         #[cfg(feature = "tdx")]
332         "tdx".to_string(),
333         #[cfg(feature = "tracing")]
334         "tracing".to_string(),
335     ]
336 }
337 
338 pub fn start_event_monitor_thread(
339     mut monitor: event_monitor::Monitor,
340     seccomp_action: &SeccompAction,
341     landlock_enable: bool,
342     hypervisor_type: hypervisor::HypervisorType,
343     exit_event: EventFd,
344 ) -> Result<thread::JoinHandle<Result<()>>> {
345     // Retrieve seccomp filter
346     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
347         .map_err(Error::CreateSeccompFilter)?;
348 
349     thread::Builder::new()
350         .name("event-monitor".to_owned())
351         .spawn(move || {
352             // Apply seccomp filter
353             if !seccomp_filter.is_empty() {
354                 apply_filter(&seccomp_filter)
355                     .map_err(Error::ApplySeccompFilter)
356                     .map_err(|e| {
357                         error!("Error applying seccomp filter: {:?}", e);
358                         exit_event.write(1).ok();
359                         e
360                     })?;
361             }
362             if landlock_enable {
363                 Landlock::new()
364                     .map_err(Error::CreateLandlock)?
365                     .restrict_self()
366                     .map_err(Error::ApplyLandlock)
367                     .map_err(|e| {
368                         error!("Error applying landlock to event monitor thread: {:?}", e);
369                         exit_event.write(1).ok();
370                         e
371                     })?;
372             }
373 
374             std::panic::catch_unwind(AssertUnwindSafe(move || {
375                 while let Ok(event) = monitor.rx.recv() {
376                     let event = Arc::new(event);
377 
378                     if let Some(ref mut file) = monitor.file {
379                         file.write_all(event.as_bytes().as_ref()).ok();
380                         file.write_all(b"\n\n").ok();
381                     }
382 
383                     for tx in monitor.broadcast.iter() {
384                         tx.send(event.clone()).ok();
385                     }
386                 }
387             }))
388             .map_err(|_| {
389                 error!("`event-monitor` thread panicked");
390                 exit_event.write(1).ok();
391             })
392             .ok();
393 
394             Ok(())
395         })
396         .map_err(Error::EventMonitorThreadSpawn)
397 }
398 
399 #[allow(unused_variables)]
400 #[allow(clippy::too_many_arguments)]
401 pub fn start_vmm_thread(
402     vmm_version: VmmVersionInfo,
403     http_path: &Option<String>,
404     http_fd: Option<RawFd>,
405     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
406     api_event: EventFd,
407     api_sender: Sender<ApiRequest>,
408     api_receiver: Receiver<ApiRequest>,
409     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
410     #[cfg(feature = "guest_debug")] debug_event: EventFd,
411     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
412     exit_event: EventFd,
413     seccomp_action: &SeccompAction,
414     hypervisor: Arc<dyn hypervisor::Hypervisor>,
415     landlock_enable: bool,
416 ) -> Result<VmmThreadHandle> {
417     #[cfg(feature = "guest_debug")]
418     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
419     #[cfg(feature = "guest_debug")]
420     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
421     #[cfg(feature = "guest_debug")]
422     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
423     #[cfg(feature = "guest_debug")]
424     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
425 
426     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
427     let hypervisor_type = hypervisor.hypervisor_type();
428 
429     // Retrieve seccomp filter
430     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
431         .map_err(Error::CreateSeccompFilter)?;
432 
433     let vmm_seccomp_action = seccomp_action.clone();
434     let thread = {
435         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
436         thread::Builder::new()
437             .name("vmm".to_string())
438             .spawn(move || {
439                 // Apply seccomp filter for VMM thread.
440                 if !vmm_seccomp_filter.is_empty() {
441                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
442                 }
443 
444                 let mut vmm = Vmm::new(
445                     vmm_version,
446                     api_event,
447                     #[cfg(feature = "guest_debug")]
448                     debug_event,
449                     #[cfg(feature = "guest_debug")]
450                     vm_debug_event,
451                     vmm_seccomp_action,
452                     hypervisor,
453                     exit_event,
454                 )?;
455 
456                 vmm.setup_signal_handler(landlock_enable)?;
457 
458                 vmm.control_loop(
459                     Rc::new(api_receiver),
460                     #[cfg(feature = "guest_debug")]
461                     Rc::new(gdb_receiver),
462                 )
463             })
464             .map_err(Error::VmmThreadSpawn)?
465     };
466 
467     // The VMM thread is started, we can start the dbus thread
468     // and start serving HTTP requests
469     #[cfg(feature = "dbus_api")]
470     let dbus_shutdown_chs = match dbus_options {
471         Some(opts) => {
472             let (_, chs) = api::start_dbus_thread(
473                 opts,
474                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
475                 api_sender.clone(),
476                 seccomp_action,
477                 exit_event.try_clone().map_err(Error::EventFdClone)?,
478                 hypervisor_type,
479             )?;
480             Some(chs)
481         }
482         None => None,
483     };
484 
485     let http_api_handle = if let Some(http_path) = http_path {
486         Some(api::start_http_path_thread(
487             http_path,
488             api_event_clone,
489             api_sender,
490             seccomp_action,
491             exit_event,
492             hypervisor_type,
493             landlock_enable,
494         )?)
495     } else if let Some(http_fd) = http_fd {
496         Some(api::start_http_fd_thread(
497             http_fd,
498             api_event_clone,
499             api_sender,
500             seccomp_action,
501             exit_event,
502             hypervisor_type,
503             landlock_enable,
504         )?)
505     } else {
506         None
507     };
508 
509     #[cfg(feature = "guest_debug")]
510     if let Some(debug_path) = debug_path {
511         let target = gdb::GdbStub::new(
512             gdb_sender,
513             gdb_debug_event,
514             gdb_vm_debug_event,
515             gdb_hw_breakpoints,
516         );
517         thread::Builder::new()
518             .name("gdb".to_owned())
519             .spawn(move || gdb::gdb_thread(target, &debug_path))
520             .map_err(Error::GdbThreadSpawn)?;
521     }
522 
523     Ok(VmmThreadHandle {
524         thread_handle: thread,
525         #[cfg(feature = "dbus_api")]
526         dbus_shutdown_chs,
527         http_api_handle,
528     })
529 }
530 
531 #[derive(Clone, Deserialize, Serialize)]
532 struct VmMigrationConfig {
533     vm_config: Arc<Mutex<VmConfig>>,
534     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
535     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
536     memory_manager_data: MemoryManagerSnapshotData,
537 }
538 
539 #[derive(Debug, Clone)]
540 pub struct VmmVersionInfo {
541     pub build_version: String,
542     pub version: String,
543 }
544 
545 impl VmmVersionInfo {
546     pub fn new(build_version: &str, version: &str) -> Self {
547         Self {
548             build_version: build_version.to_owned(),
549             version: version.to_owned(),
550         }
551     }
552 }
553 
554 pub struct VmmThreadHandle {
555     pub thread_handle: thread::JoinHandle<Result<()>>,
556     #[cfg(feature = "dbus_api")]
557     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
558     pub http_api_handle: Option<HttpApiHandle>,
559 }
560 
561 pub struct Vmm {
562     epoll: EpollContext,
563     exit_evt: EventFd,
564     reset_evt: EventFd,
565     api_evt: EventFd,
566     #[cfg(feature = "guest_debug")]
567     debug_evt: EventFd,
568     #[cfg(feature = "guest_debug")]
569     vm_debug_evt: EventFd,
570     version: VmmVersionInfo,
571     vm: Option<Vm>,
572     vm_config: Option<Arc<Mutex<VmConfig>>>,
573     seccomp_action: SeccompAction,
574     hypervisor: Arc<dyn hypervisor::Hypervisor>,
575     activate_evt: EventFd,
576     signals: Option<Handle>,
577     threads: Vec<thread::JoinHandle<()>>,
578     original_termios_opt: Arc<Mutex<Option<termios>>>,
579     console_resize_pipe: Option<Arc<File>>,
580     console_info: Option<ConsoleInfo>,
581 }
582 
583 impl Vmm {
584     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
585 
586     fn signal_handler(
587         mut signals: Signals,
588         original_termios_opt: Arc<Mutex<Option<termios>>>,
589         exit_evt: &EventFd,
590     ) {
591         for sig in &Self::HANDLED_SIGNALS {
592             unblock_signal(*sig).unwrap();
593         }
594 
595         for signal in signals.forever() {
596             match signal {
597                 SIGTERM | SIGINT => {
598                     if exit_evt.write(1).is_err() {
599                         // Resetting the terminal is usually done as the VMM exits
600                         if let Ok(lock) = original_termios_opt.lock() {
601                             if let Some(termios) = *lock {
602                                 // SAFETY: FFI call
603                                 let _ = unsafe {
604                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
605                                 };
606                             }
607                         } else {
608                             warn!("Failed to lock original termios");
609                         }
610 
611                         std::process::exit(1);
612                     }
613                 }
614                 _ => (),
615             }
616         }
617     }
618 
619     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
620         let signals = Signals::new(Self::HANDLED_SIGNALS);
621         match signals {
622             Ok(signals) => {
623                 self.signals = Some(signals.handle());
624                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
625                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
626 
627                 let signal_handler_seccomp_filter = get_seccomp_filter(
628                     &self.seccomp_action,
629                     Thread::SignalHandler,
630                     self.hypervisor.hypervisor_type(),
631                 )
632                 .map_err(Error::CreateSeccompFilter)?;
633                 self.threads.push(
634                     thread::Builder::new()
635                         .name("vmm_signal_handler".to_string())
636                         .spawn(move || {
637                             if !signal_handler_seccomp_filter.is_empty() {
638                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
639                                     .map_err(Error::ApplySeccompFilter)
640                                 {
641                                     error!("Error applying seccomp filter: {:?}", e);
642                                     exit_evt.write(1).ok();
643                                     return;
644                                 }
645                             }
646                             if landlock_enable{
647                                 match Landlock::new() {
648                                     Ok(landlock) => {
649                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
650                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
651                                             exit_evt.write(1).ok();
652                                         });
653                                     }
654                                     Err(e) => {
655                                         error!("Error creating Landlock object: {:?}", e);
656                                         exit_evt.write(1).ok();
657                                     }
658                                 };
659                             }
660 
661                             std::panic::catch_unwind(AssertUnwindSafe(|| {
662                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
663                             }))
664                             .map_err(|_| {
665                                 error!("vmm signal_handler thread panicked");
666                                 exit_evt.write(1).ok()
667                             })
668                             .ok();
669                         })
670                         .map_err(Error::SignalHandlerSpawn)?,
671                 );
672             }
673             Err(e) => error!("Signal not found {}", e),
674         }
675         Ok(())
676     }
677 
678     #[allow(clippy::too_many_arguments)]
679     fn new(
680         vmm_version: VmmVersionInfo,
681         api_evt: EventFd,
682         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
683         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
684         seccomp_action: SeccompAction,
685         hypervisor: Arc<dyn hypervisor::Hypervisor>,
686         exit_evt: EventFd,
687     ) -> Result<Self> {
688         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
689         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
690         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691 
692         epoll
693             .add_event(&exit_evt, EpollDispatch::Exit)
694             .map_err(Error::Epoll)?;
695 
696         epoll
697             .add_event(&reset_evt, EpollDispatch::Reset)
698             .map_err(Error::Epoll)?;
699 
700         epoll
701             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
702             .map_err(Error::Epoll)?;
703 
704         epoll
705             .add_event(&api_evt, EpollDispatch::Api)
706             .map_err(Error::Epoll)?;
707 
708         #[cfg(feature = "guest_debug")]
709         epoll
710             .add_event(&debug_evt, EpollDispatch::Debug)
711             .map_err(Error::Epoll)?;
712 
713         Ok(Vmm {
714             epoll,
715             exit_evt,
716             reset_evt,
717             api_evt,
718             #[cfg(feature = "guest_debug")]
719             debug_evt,
720             #[cfg(feature = "guest_debug")]
721             vm_debug_evt,
722             version: vmm_version,
723             vm: None,
724             vm_config: None,
725             seccomp_action,
726             hypervisor,
727             activate_evt,
728             signals: None,
729             threads: vec![],
730             original_termios_opt: Arc::new(Mutex::new(None)),
731             console_resize_pipe: None,
732             console_info: None,
733         })
734     }
735 
736     fn vm_receive_config<T>(
737         &mut self,
738         req: &Request,
739         socket: &mut T,
740         existing_memory_files: Option<HashMap<u32, File>>,
741     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
742     where
743         T: Read + Write,
744     {
745         // Read in config data along with memory manager data
746         let mut data: Vec<u8> = Vec::new();
747         data.resize_with(req.length() as usize, Default::default);
748         socket
749             .read_exact(&mut data)
750             .map_err(MigratableError::MigrateSocket)?;
751 
752         let vm_migration_config: VmMigrationConfig =
753             serde_json::from_slice(&data).map_err(|e| {
754                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
755             })?;
756 
757         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
758         self.vm_check_cpuid_compatibility(
759             &vm_migration_config.vm_config,
760             &vm_migration_config.common_cpuid,
761         )?;
762 
763         let config = vm_migration_config.vm_config.clone();
764         self.vm_config = Some(vm_migration_config.vm_config);
765         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
766             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
767         })?);
768 
769         if self
770             .vm_config
771             .as_ref()
772             .unwrap()
773             .lock()
774             .unwrap()
775             .landlock_enable
776         {
777             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
778                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
779             })?;
780         }
781 
782         let vm = Vm::create_hypervisor_vm(
783             &self.hypervisor,
784             #[cfg(feature = "tdx")]
785             false,
786             #[cfg(feature = "sev_snp")]
787             false,
788         )
789         .map_err(|e| {
790             MigratableError::MigrateReceive(anyhow!(
791                 "Error creating hypervisor VM from snapshot: {:?}",
792                 e
793             ))
794         })?;
795 
796         let phys_bits =
797             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
798 
799         let memory_manager = MemoryManager::new(
800             vm,
801             &config.lock().unwrap().memory.clone(),
802             None,
803             phys_bits,
804             #[cfg(feature = "tdx")]
805             false,
806             Some(&vm_migration_config.memory_manager_data),
807             existing_memory_files,
808             #[cfg(target_arch = "x86_64")]
809             None,
810         )
811         .map_err(|e| {
812             MigratableError::MigrateReceive(anyhow!(
813                 "Error creating MemoryManager from snapshot: {:?}",
814                 e
815             ))
816         })?;
817 
818         Response::ok().write_to(socket)?;
819 
820         Ok(memory_manager)
821     }
822 
823     fn vm_receive_state<T>(
824         &mut self,
825         req: &Request,
826         socket: &mut T,
827         mm: Arc<Mutex<MemoryManager>>,
828     ) -> std::result::Result<(), MigratableError>
829     where
830         T: Read + Write,
831     {
832         // Read in state data
833         let mut data: Vec<u8> = Vec::new();
834         data.resize_with(req.length() as usize, Default::default);
835         socket
836             .read_exact(&mut data)
837             .map_err(MigratableError::MigrateSocket)?;
838         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
839             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
840         })?;
841 
842         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
843             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
844         })?;
845         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
846             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
847         })?;
848         #[cfg(feature = "guest_debug")]
849         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
850             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
851         })?;
852         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
853             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
854         })?;
855 
856         let timestamp = Instant::now();
857         let hypervisor_vm = mm.lock().unwrap().vm.clone();
858         let mut vm = Vm::new_from_memory_manager(
859             self.vm_config.clone().unwrap(),
860             mm,
861             hypervisor_vm,
862             exit_evt,
863             reset_evt,
864             #[cfg(feature = "guest_debug")]
865             debug_evt,
866             &self.seccomp_action,
867             self.hypervisor.clone(),
868             activate_evt,
869             timestamp,
870             self.console_info.clone(),
871             self.console_resize_pipe.as_ref().map(Arc::clone),
872             Arc::clone(&self.original_termios_opt),
873             Some(snapshot),
874         )
875         .map_err(|e| {
876             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
877         })?;
878 
879         // Create VM
880         vm.restore().map_err(|e| {
881             Response::error().write_to(socket).ok();
882             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
883         })?;
884         self.vm = Some(vm);
885 
886         Response::ok().write_to(socket)?;
887 
888         Ok(())
889     }
890 
891     fn vm_receive_memory<T>(
892         &mut self,
893         req: &Request,
894         socket: &mut T,
895         memory_manager: &mut MemoryManager,
896     ) -> std::result::Result<(), MigratableError>
897     where
898         T: Read + ReadVolatile + Write,
899     {
900         // Read table
901         let table = MemoryRangeTable::read_from(socket, req.length())?;
902 
903         // And then read the memory itself
904         memory_manager
905             .receive_memory_regions(&table, socket)
906             .inspect_err(|_| {
907                 Response::error().write_to(socket).ok();
908             })?;
909         Response::ok().write_to(socket)?;
910         Ok(())
911     }
912 
913     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
914         url.strip_prefix("unix:")
915             .ok_or_else(|| {
916                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
917             })
918             .map(|s| s.into())
919     }
920 
921     // Returns true if there were dirty pages to send
922     fn vm_maybe_send_dirty_pages<T>(
923         vm: &mut Vm,
924         socket: &mut T,
925     ) -> result::Result<bool, MigratableError>
926     where
927         T: Read + Write + WriteVolatile,
928     {
929         // Send (dirty) memory table
930         let table = vm.dirty_log()?;
931 
932         // But if there are no regions go straight to pause
933         if table.regions().is_empty() {
934             return Ok(false);
935         }
936 
937         Request::memory(table.length()).write_to(socket).unwrap();
938         table.write_to(socket)?;
939         // And then the memory itself
940         vm.send_memory_regions(&table, socket)?;
941         Response::read_from(socket)?.ok_or_abandon(
942             socket,
943             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
944         )?;
945 
946         Ok(true)
947     }
948 
949     fn send_migration(
950         vm: &mut Vm,
951         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
952             dyn hypervisor::Hypervisor,
953         >,
954         send_data_migration: VmSendMigrationData,
955     ) -> result::Result<(), MigratableError> {
956         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
957         let mut socket = UnixStream::connect(path).map_err(|e| {
958             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
959         })?;
960 
961         // Start the migration
962         Request::start().write_to(&mut socket)?;
963         Response::read_from(&mut socket)?.ok_or_abandon(
964             &mut socket,
965             MigratableError::MigrateSend(anyhow!("Error starting migration")),
966         )?;
967 
968         // Send config
969         let vm_config = vm.get_config();
970         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
971         let common_cpuid = {
972             #[cfg(feature = "tdx")]
973             if vm_config.lock().unwrap().is_tdx_enabled() {
974                 return Err(MigratableError::MigrateSend(anyhow!(
975                     "Live Migration is not supported when TDX is enabled"
976                 )));
977             };
978 
979             let amx = vm_config.lock().unwrap().cpus.features.amx;
980             let phys_bits =
981                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
982             arch::generate_common_cpuid(
983                 &hypervisor,
984                 &arch::CpuidConfig {
985                     sgx_epc_sections: None,
986                     phys_bits,
987                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
988                     #[cfg(feature = "tdx")]
989                     tdx: false,
990                     amx,
991                 },
992             )
993             .map_err(|e| {
994                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
995             })?
996         };
997 
998         if send_data_migration.local {
999             vm.send_memory_fds(&mut socket)?;
1000         }
1001 
1002         let vm_migration_config = VmMigrationConfig {
1003             vm_config,
1004             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1005             common_cpuid,
1006             memory_manager_data: vm.memory_manager_data(),
1007         };
1008         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1009         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1010         socket
1011             .write_all(&config_data)
1012             .map_err(MigratableError::MigrateSocket)?;
1013         Response::read_from(&mut socket)?.ok_or_abandon(
1014             &mut socket,
1015             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1016         )?;
1017 
1018         // Let every Migratable object know about the migration being started.
1019         vm.start_migration()?;
1020 
1021         if send_data_migration.local {
1022             // Now pause VM
1023             vm.pause()?;
1024         } else {
1025             // Start logging dirty pages
1026             vm.start_dirty_log()?;
1027 
1028             // Send memory table
1029             let table = vm.memory_range_table()?;
1030             Request::memory(table.length())
1031                 .write_to(&mut socket)
1032                 .unwrap();
1033             table.write_to(&mut socket)?;
1034             // And then the memory itself
1035             vm.send_memory_regions(&table, &mut socket)?;
1036             Response::read_from(&mut socket)?.ok_or_abandon(
1037                 &mut socket,
1038                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1039             )?;
1040 
1041             // Try at most 5 passes of dirty memory sending
1042             const MAX_DIRTY_MIGRATIONS: usize = 5;
1043             for i in 0..MAX_DIRTY_MIGRATIONS {
1044                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1045                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1046                     break;
1047                 }
1048             }
1049 
1050             // Now pause VM
1051             vm.pause()?;
1052 
1053             // Send last batch of dirty pages
1054             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1055 
1056             // Stop logging dirty pages
1057             vm.stop_dirty_log()?;
1058         }
1059         // Capture snapshot and send it
1060         let vm_snapshot = vm.snapshot()?;
1061         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1062         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1063         socket
1064             .write_all(&snapshot_data)
1065             .map_err(MigratableError::MigrateSocket)?;
1066         Response::read_from(&mut socket)?.ok_or_abandon(
1067             &mut socket,
1068             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1069         )?;
1070         // Complete the migration
1071         Request::complete().write_to(&mut socket)?;
1072         Response::read_from(&mut socket)?.ok_or_abandon(
1073             &mut socket,
1074             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1075         )?;
1076 
1077         info!("Migration complete");
1078 
1079         // Let every Migratable object know about the migration being complete
1080         vm.complete_migration()
1081     }
1082 
1083     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1084     fn vm_check_cpuid_compatibility(
1085         &self,
1086         src_vm_config: &Arc<Mutex<VmConfig>>,
1087         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1088     ) -> result::Result<(), MigratableError> {
1089         #[cfg(feature = "tdx")]
1090         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1091             return Err(MigratableError::MigrateReceive(anyhow!(
1092                 "Live Migration is not supported when TDX is enabled"
1093             )));
1094         };
1095 
1096         // We check the `CPUID` compatibility of between the source vm and destination, which is
1097         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1098         let dest_cpuid = &{
1099             let vm_config = &src_vm_config.lock().unwrap();
1100 
1101             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1102             arch::generate_common_cpuid(
1103                 &self.hypervisor.clone(),
1104                 &arch::CpuidConfig {
1105                     sgx_epc_sections: None,
1106                     phys_bits,
1107                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1108                     #[cfg(feature = "tdx")]
1109                     tdx: false,
1110                     amx: vm_config.cpus.features.amx,
1111                 },
1112             )
1113             .map_err(|e| {
1114                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1115             })?
1116         };
1117         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1118             MigratableError::MigrateReceive(anyhow!(
1119                 "Error checking cpu feature compatibility': {:?}",
1120                 e
1121             ))
1122         })
1123     }
1124 
1125     fn control_loop(
1126         &mut self,
1127         api_receiver: Rc<Receiver<ApiRequest>>,
1128         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1129     ) -> Result<()> {
1130         const EPOLL_EVENTS_LEN: usize = 100;
1131 
1132         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1133         let epoll_fd = self.epoll.as_raw_fd();
1134 
1135         'outer: loop {
1136             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1137                 Ok(res) => res,
1138                 Err(e) => {
1139                     if e.kind() == io::ErrorKind::Interrupted {
1140                         // It's well defined from the epoll_wait() syscall
1141                         // documentation that the epoll loop can be interrupted
1142                         // before any of the requested events occurred or the
1143                         // timeout expired. In both those cases, epoll_wait()
1144                         // returns an error of type EINTR, but this should not
1145                         // be considered as a regular error. Instead it is more
1146                         // appropriate to retry, by calling into epoll_wait().
1147                         continue;
1148                     }
1149                     return Err(Error::Epoll(e));
1150                 }
1151             };
1152 
1153             for event in events.iter().take(num_events) {
1154                 let dispatch_event: EpollDispatch = event.data.into();
1155                 match dispatch_event {
1156                     EpollDispatch::Unknown => {
1157                         let event = event.data;
1158                         warn!("Unknown VMM loop event: {}", event);
1159                     }
1160                     EpollDispatch::Exit => {
1161                         info!("VM exit event");
1162                         // Consume the event.
1163                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1164                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1165 
1166                         break 'outer;
1167                     }
1168                     EpollDispatch::Reset => {
1169                         info!("VM reset event");
1170                         // Consume the event.
1171                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1172                         self.vm_reboot().map_err(Error::VmReboot)?;
1173                     }
1174                     EpollDispatch::ActivateVirtioDevices => {
1175                         if let Some(ref vm) = self.vm {
1176                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1177                             info!(
1178                                 "Trying to activate pending virtio devices: count = {}",
1179                                 count
1180                             );
1181                             vm.activate_virtio_devices()
1182                                 .map_err(Error::ActivateVirtioDevices)?;
1183                         }
1184                     }
1185                     EpollDispatch::Api => {
1186                         // Consume the events.
1187                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1188                             // Read from the API receiver channel
1189                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1190 
1191                             if api_request(self)? {
1192                                 break 'outer;
1193                             }
1194                         }
1195                     }
1196                     #[cfg(feature = "guest_debug")]
1197                     EpollDispatch::Debug => {
1198                         // Consume the events.
1199                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1200                             // Read from the API receiver channel
1201                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1202 
1203                             let response = if let Some(ref mut vm) = self.vm {
1204                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1205                             } else {
1206                                 Err(VmError::VmNotRunning)
1207                             }
1208                             .map_err(gdb::Error::Vm);
1209 
1210                             gdb_request
1211                                 .sender
1212                                 .send(response)
1213                                 .map_err(Error::GdbResponseSend)?;
1214                         }
1215                     }
1216                     #[cfg(not(feature = "guest_debug"))]
1217                     EpollDispatch::Debug => {}
1218                 }
1219             }
1220         }
1221 
1222         // Trigger the termination of the signal_handler thread
1223         if let Some(signals) = self.signals.take() {
1224             signals.close();
1225         }
1226 
1227         // Wait for all the threads to finish
1228         for thread in self.threads.drain(..) {
1229             thread.join().map_err(Error::ThreadCleanup)?
1230         }
1231 
1232         Ok(())
1233     }
1234 }
1235 
1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1237     vm_config.lock().unwrap().apply_landlock()?;
1238     Ok(())
1239 }
1240 
1241 impl RequestHandler for Vmm {
1242     fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> {
1243         // We only store the passed VM config.
1244         // The VM will be created when being asked to boot it.
1245         if self.vm_config.is_none() {
1246             self.vm_config = Some(Arc::new(Mutex::new(*config)));
1247             self.console_info =
1248                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1249 
1250             if self
1251                 .vm_config
1252                 .as_ref()
1253                 .unwrap()
1254                 .lock()
1255                 .unwrap()
1256                 .landlock_enable
1257             {
1258                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1259                     .map_err(VmError::ApplyLandlock)?;
1260             }
1261             Ok(())
1262         } else {
1263             Err(VmError::VmAlreadyCreated)
1264         }
1265     }
1266 
1267     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1268         tracer::start();
1269         info!("Booting VM");
1270         event!("vm", "booting");
1271         let r = {
1272             trace_scoped!("vm_boot");
1273             // If we don't have a config, we cannot boot a VM.
1274             if self.vm_config.is_none() {
1275                 return Err(VmError::VmMissingConfig);
1276             };
1277 
1278             // console_info is set to None in vm_shutdown. re-populate here if empty
1279             if self.console_info.is_none() {
1280                 self.console_info =
1281                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1282             }
1283 
1284             // Create a new VM if we don't have one yet.
1285             if self.vm.is_none() {
1286                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1287                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1288                 #[cfg(feature = "guest_debug")]
1289                 let vm_debug_evt = self
1290                     .vm_debug_evt
1291                     .try_clone()
1292                     .map_err(VmError::EventFdClone)?;
1293                 let activate_evt = self
1294                     .activate_evt
1295                     .try_clone()
1296                     .map_err(VmError::EventFdClone)?;
1297 
1298                 if let Some(ref vm_config) = self.vm_config {
1299                     let vm = Vm::new(
1300                         Arc::clone(vm_config),
1301                         exit_evt,
1302                         reset_evt,
1303                         #[cfg(feature = "guest_debug")]
1304                         vm_debug_evt,
1305                         &self.seccomp_action,
1306                         self.hypervisor.clone(),
1307                         activate_evt,
1308                         self.console_info.clone(),
1309                         self.console_resize_pipe.as_ref().map(Arc::clone),
1310                         Arc::clone(&self.original_termios_opt),
1311                         None,
1312                         None,
1313                         None,
1314                     )?;
1315 
1316                     self.vm = Some(vm);
1317                 }
1318             }
1319 
1320             // Now we can boot the VM.
1321             if let Some(ref mut vm) = self.vm {
1322                 vm.boot()
1323             } else {
1324                 Err(VmError::VmNotCreated)
1325             }
1326         };
1327         tracer::end();
1328         if r.is_ok() {
1329             event!("vm", "booted");
1330         }
1331         r
1332     }
1333 
1334     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1335         if let Some(ref mut vm) = self.vm {
1336             vm.pause().map_err(VmError::Pause)
1337         } else {
1338             Err(VmError::VmNotRunning)
1339         }
1340     }
1341 
1342     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1343         if let Some(ref mut vm) = self.vm {
1344             vm.resume().map_err(VmError::Resume)
1345         } else {
1346             Err(VmError::VmNotRunning)
1347         }
1348     }
1349 
1350     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1351         if let Some(ref mut vm) = self.vm {
1352             // Drain console_info so that FDs are not reused
1353             let _ = self.console_info.take();
1354             vm.snapshot()
1355                 .map_err(VmError::Snapshot)
1356                 .and_then(|snapshot| {
1357                     vm.send(&snapshot, destination_url)
1358                         .map_err(VmError::SnapshotSend)
1359                 })
1360         } else {
1361             Err(VmError::VmNotRunning)
1362         }
1363     }
1364 
1365     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1366         if self.vm.is_some() || self.vm_config.is_some() {
1367             return Err(VmError::VmAlreadyCreated);
1368         }
1369 
1370         let source_url = restore_cfg.source_url.as_path().to_str();
1371         if source_url.is_none() {
1372             return Err(VmError::InvalidRestoreSourceUrl);
1373         }
1374         // Safe to unwrap as we checked it was Some(&str).
1375         let source_url = source_url.unwrap();
1376 
1377         let vm_config = Arc::new(Mutex::new(
1378             recv_vm_config(source_url).map_err(VmError::Restore)?,
1379         ));
1380         restore_cfg
1381             .validate(&vm_config.lock().unwrap().clone())
1382             .map_err(VmError::ConfigValidation)?;
1383 
1384         // Update VM's net configurations with new fds received for restore operation
1385         if let (Some(restored_nets), Some(vm_net_configs)) =
1386             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1387         {
1388             for net in restored_nets.iter() {
1389                 for net_config in vm_net_configs.iter_mut() {
1390                     // update only if the net dev is backed by FDs
1391                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1392                         net_config.fds.clone_from(&net.fds);
1393                     }
1394                 }
1395             }
1396         }
1397 
1398         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1399         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1400         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1401 
1402         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1403         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1404             .map_err(VmError::Restore)?;
1405 
1406         self.vm_config = Some(Arc::clone(&vm_config));
1407 
1408         // console_info is set to None in vm_snapshot. re-populate here if empty
1409         if self.console_info.is_none() {
1410             self.console_info =
1411                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1412         }
1413 
1414         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1415         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1416         #[cfg(feature = "guest_debug")]
1417         let debug_evt = self
1418             .vm_debug_evt
1419             .try_clone()
1420             .map_err(VmError::EventFdClone)?;
1421         let activate_evt = self
1422             .activate_evt
1423             .try_clone()
1424             .map_err(VmError::EventFdClone)?;
1425 
1426         let vm = Vm::new(
1427             vm_config,
1428             exit_evt,
1429             reset_evt,
1430             #[cfg(feature = "guest_debug")]
1431             debug_evt,
1432             &self.seccomp_action,
1433             self.hypervisor.clone(),
1434             activate_evt,
1435             self.console_info.clone(),
1436             self.console_resize_pipe.as_ref().map(Arc::clone),
1437             Arc::clone(&self.original_termios_opt),
1438             Some(snapshot),
1439             Some(source_url),
1440             Some(restore_cfg.prefault),
1441         )?;
1442         self.vm = Some(vm);
1443 
1444         if self
1445             .vm_config
1446             .as_ref()
1447             .unwrap()
1448             .lock()
1449             .unwrap()
1450             .landlock_enable
1451         {
1452             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1453                 .map_err(VmError::ApplyLandlock)?;
1454         }
1455 
1456         // Now we can restore the rest of the VM.
1457         if let Some(ref mut vm) = self.vm {
1458             vm.restore()
1459         } else {
1460             Err(VmError::VmNotCreated)
1461         }
1462     }
1463 
1464     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1465     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1466         if let Some(ref mut vm) = self.vm {
1467             vm.coredump(destination_url).map_err(VmError::Coredump)
1468         } else {
1469             Err(VmError::VmNotRunning)
1470         }
1471     }
1472 
1473     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1474         let r = if let Some(ref mut vm) = self.vm.take() {
1475             // Drain console_info so that the FDs are not reused
1476             let _ = self.console_info.take();
1477             vm.shutdown()
1478         } else {
1479             Err(VmError::VmNotRunning)
1480         };
1481 
1482         if r.is_ok() {
1483             event!("vm", "shutdown");
1484         }
1485 
1486         r
1487     }
1488 
1489     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1490         event!("vm", "rebooting");
1491 
1492         // First we stop the current VM
1493         let config = if let Some(mut vm) = self.vm.take() {
1494             let config = vm.get_config();
1495             vm.shutdown()?;
1496             config
1497         } else {
1498             return Err(VmError::VmNotCreated);
1499         };
1500 
1501         // vm.shutdown() closes all the console devices, so set console_info to None
1502         // so that the closed FD #s are not reused.
1503         let _ = self.console_info.take();
1504 
1505         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1506         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1507         #[cfg(feature = "guest_debug")]
1508         let debug_evt = self
1509             .vm_debug_evt
1510             .try_clone()
1511             .map_err(VmError::EventFdClone)?;
1512         let activate_evt = self
1513             .activate_evt
1514             .try_clone()
1515             .map_err(VmError::EventFdClone)?;
1516 
1517         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1518         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1519         // during the boot process.
1520         if self.reset_evt.read().is_ok() {
1521             warn!("Spurious second reset event received. Ignoring.");
1522         }
1523 
1524         self.console_info =
1525             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1526 
1527         // Then we create the new VM
1528         let mut vm = Vm::new(
1529             config,
1530             exit_evt,
1531             reset_evt,
1532             #[cfg(feature = "guest_debug")]
1533             debug_evt,
1534             &self.seccomp_action,
1535             self.hypervisor.clone(),
1536             activate_evt,
1537             self.console_info.clone(),
1538             self.console_resize_pipe.as_ref().map(Arc::clone),
1539             Arc::clone(&self.original_termios_opt),
1540             None,
1541             None,
1542             None,
1543         )?;
1544 
1545         // And we boot it
1546         vm.boot()?;
1547 
1548         self.vm = Some(vm);
1549 
1550         event!("vm", "rebooted");
1551 
1552         Ok(())
1553     }
1554 
1555     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1556         match &self.vm_config {
1557             Some(vm_config) => {
1558                 let state = match &self.vm {
1559                     Some(vm) => vm.get_state()?,
1560                     None => VmState::Created,
1561                 };
1562                 let config = vm_config.lock().unwrap().clone();
1563 
1564                 let mut memory_actual_size = config.memory.total_size();
1565                 if let Some(vm) = &self.vm {
1566                     memory_actual_size -= vm.balloon_size();
1567                 }
1568 
1569                 let device_tree = self
1570                     .vm
1571                     .as_ref()
1572                     .map(|vm| vm.device_tree().lock().unwrap().clone());
1573 
1574                 Ok(VmInfoResponse {
1575                     config: Box::new(config),
1576                     state,
1577                     memory_actual_size,
1578                     device_tree,
1579                 })
1580             }
1581             None => Err(VmError::VmNotCreated),
1582         }
1583     }
1584 
1585     fn vmm_ping(&self) -> VmmPingResponse {
1586         let VmmVersionInfo {
1587             build_version,
1588             version,
1589         } = self.version.clone();
1590 
1591         VmmPingResponse {
1592             build_version,
1593             version,
1594             pid: std::process::id() as i64,
1595             features: feature_list(),
1596         }
1597     }
1598 
1599     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1600         if self.vm_config.is_none() {
1601             return Ok(());
1602         }
1603 
1604         // If a VM is booted, we first try to shut it down.
1605         if self.vm.is_some() {
1606             self.vm_shutdown()?;
1607         }
1608 
1609         self.vm_config = None;
1610 
1611         event!("vm", "deleted");
1612 
1613         Ok(())
1614     }
1615 
1616     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1617         self.vm_delete()?;
1618         event!("vmm", "shutdown");
1619         Ok(())
1620     }
1621 
1622     fn vm_resize(
1623         &mut self,
1624         desired_vcpus: Option<u8>,
1625         desired_ram: Option<u64>,
1626         desired_balloon: Option<u64>,
1627     ) -> result::Result<(), VmError> {
1628         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1629 
1630         if let Some(ref mut vm) = self.vm {
1631             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1632                 error!("Error when resizing VM: {:?}", e);
1633                 Err(e)
1634             } else {
1635                 Ok(())
1636             }
1637         } else {
1638             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1639             if let Some(desired_vcpus) = desired_vcpus {
1640                 config.cpus.boot_vcpus = desired_vcpus;
1641             }
1642             if let Some(desired_ram) = desired_ram {
1643                 config.memory.size = desired_ram;
1644             }
1645             if let Some(desired_balloon) = desired_balloon {
1646                 if let Some(balloon_config) = &mut config.balloon {
1647                     balloon_config.size = desired_balloon;
1648                 }
1649             }
1650             Ok(())
1651         }
1652     }
1653 
1654     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1655         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1656 
1657         if let Some(ref mut vm) = self.vm {
1658             if let Err(e) = vm.resize_zone(id, desired_ram) {
1659                 error!("Error when resizing VM: {:?}", e);
1660                 Err(e)
1661             } else {
1662                 Ok(())
1663             }
1664         } else {
1665             // Update VmConfig by setting the new desired ram.
1666             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1667 
1668             if let Some(zones) = &mut memory_config.zones {
1669                 for zone in zones.iter_mut() {
1670                     if zone.id == id {
1671                         zone.size = desired_ram;
1672                         return Ok(());
1673                     }
1674                 }
1675             }
1676 
1677             error!("Could not find the memory zone {} for the resize", id);
1678             Err(VmError::ResizeZone)
1679         }
1680     }
1681 
1682     fn vm_add_device(
1683         &mut self,
1684         device_cfg: DeviceConfig,
1685     ) -> result::Result<Option<Vec<u8>>, VmError> {
1686         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1687 
1688         {
1689             // Validate the configuration change in a cloned configuration
1690             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1691             add_to_config(&mut config.devices, device_cfg.clone());
1692             config.validate().map_err(VmError::ConfigValidation)?;
1693         }
1694 
1695         if let Some(ref mut vm) = self.vm {
1696             let info = vm.add_device(device_cfg).map_err(|e| {
1697                 error!("Error when adding new device to the VM: {:?}", e);
1698                 e
1699             })?;
1700             serde_json::to_vec(&info)
1701                 .map(Some)
1702                 .map_err(VmError::SerializeJson)
1703         } else {
1704             // Update VmConfig by adding the new device.
1705             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1706             add_to_config(&mut config.devices, device_cfg);
1707             Ok(None)
1708         }
1709     }
1710 
1711     fn vm_add_user_device(
1712         &mut self,
1713         device_cfg: UserDeviceConfig,
1714     ) -> result::Result<Option<Vec<u8>>, VmError> {
1715         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1716 
1717         {
1718             // Validate the configuration change in a cloned configuration
1719             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1720             add_to_config(&mut config.user_devices, device_cfg.clone());
1721             config.validate().map_err(VmError::ConfigValidation)?;
1722         }
1723 
1724         if let Some(ref mut vm) = self.vm {
1725             let info = vm.add_user_device(device_cfg).map_err(|e| {
1726                 error!("Error when adding new user device to the VM: {:?}", e);
1727                 e
1728             })?;
1729             serde_json::to_vec(&info)
1730                 .map(Some)
1731                 .map_err(VmError::SerializeJson)
1732         } else {
1733             // Update VmConfig by adding the new device.
1734             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1735             add_to_config(&mut config.user_devices, device_cfg);
1736             Ok(None)
1737         }
1738     }
1739 
1740     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1741         if let Some(ref mut vm) = self.vm {
1742             if let Err(e) = vm.remove_device(id) {
1743                 error!("Error when removing device from the VM: {:?}", e);
1744                 Err(e)
1745             } else {
1746                 Ok(())
1747             }
1748         } else if let Some(ref config) = self.vm_config {
1749             let mut config = config.lock().unwrap();
1750             if config.remove_device(&id) {
1751                 Ok(())
1752             } else {
1753                 Err(VmError::NoDeviceToRemove(id))
1754             }
1755         } else {
1756             Err(VmError::VmNotCreated)
1757         }
1758     }
1759 
1760     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1761         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1762 
1763         {
1764             // Validate the configuration change in a cloned configuration
1765             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1766             add_to_config(&mut config.disks, disk_cfg.clone());
1767             config.validate().map_err(VmError::ConfigValidation)?;
1768         }
1769 
1770         if let Some(ref mut vm) = self.vm {
1771             let info = vm.add_disk(disk_cfg).map_err(|e| {
1772                 error!("Error when adding new disk to the VM: {:?}", e);
1773                 e
1774             })?;
1775             serde_json::to_vec(&info)
1776                 .map(Some)
1777                 .map_err(VmError::SerializeJson)
1778         } else {
1779             // Update VmConfig by adding the new device.
1780             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1781             add_to_config(&mut config.disks, disk_cfg);
1782             Ok(None)
1783         }
1784     }
1785 
1786     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1787         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1788 
1789         {
1790             // Validate the configuration change in a cloned configuration
1791             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1792             add_to_config(&mut config.fs, fs_cfg.clone());
1793             config.validate().map_err(VmError::ConfigValidation)?;
1794         }
1795 
1796         if let Some(ref mut vm) = self.vm {
1797             let info = vm.add_fs(fs_cfg).map_err(|e| {
1798                 error!("Error when adding new fs to the VM: {:?}", e);
1799                 e
1800             })?;
1801             serde_json::to_vec(&info)
1802                 .map(Some)
1803                 .map_err(VmError::SerializeJson)
1804         } else {
1805             // Update VmConfig by adding the new device.
1806             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1807             add_to_config(&mut config.fs, fs_cfg);
1808             Ok(None)
1809         }
1810     }
1811 
1812     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1813         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1814 
1815         {
1816             // Validate the configuration change in a cloned configuration
1817             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1818             add_to_config(&mut config.pmem, pmem_cfg.clone());
1819             config.validate().map_err(VmError::ConfigValidation)?;
1820         }
1821 
1822         if let Some(ref mut vm) = self.vm {
1823             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1824                 error!("Error when adding new pmem device to the VM: {:?}", e);
1825                 e
1826             })?;
1827             serde_json::to_vec(&info)
1828                 .map(Some)
1829                 .map_err(VmError::SerializeJson)
1830         } else {
1831             // Update VmConfig by adding the new device.
1832             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1833             add_to_config(&mut config.pmem, pmem_cfg);
1834             Ok(None)
1835         }
1836     }
1837 
1838     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1839         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1840 
1841         {
1842             // Validate the configuration change in a cloned configuration
1843             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1844             add_to_config(&mut config.net, net_cfg.clone());
1845             config.validate().map_err(VmError::ConfigValidation)?;
1846         }
1847 
1848         if let Some(ref mut vm) = self.vm {
1849             let info = vm.add_net(net_cfg).map_err(|e| {
1850                 error!("Error when adding new network device to the VM: {:?}", e);
1851                 e
1852             })?;
1853             serde_json::to_vec(&info)
1854                 .map(Some)
1855                 .map_err(VmError::SerializeJson)
1856         } else {
1857             // Update VmConfig by adding the new device.
1858             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1859             add_to_config(&mut config.net, net_cfg);
1860             Ok(None)
1861         }
1862     }
1863 
1864     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1865         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1866 
1867         {
1868             // Validate the configuration change in a cloned configuration
1869             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1870             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1871             config.validate().map_err(VmError::ConfigValidation)?;
1872         }
1873 
1874         if let Some(ref mut vm) = self.vm {
1875             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1876                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1877                 e
1878             })?;
1879             serde_json::to_vec(&info)
1880                 .map(Some)
1881                 .map_err(VmError::SerializeJson)
1882         } else {
1883             // Update VmConfig by adding the new device.
1884             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1885             add_to_config(&mut config.vdpa, vdpa_cfg);
1886             Ok(None)
1887         }
1888     }
1889 
1890     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1891         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1892 
1893         {
1894             // Validate the configuration change in a cloned configuration
1895             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1896 
1897             if config.vsock.is_some() {
1898                 return Err(VmError::TooManyVsockDevices);
1899             }
1900 
1901             config.vsock = Some(vsock_cfg.clone());
1902             config.validate().map_err(VmError::ConfigValidation)?;
1903         }
1904 
1905         if let Some(ref mut vm) = self.vm {
1906             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1907                 error!("Error when adding new vsock device to the VM: {:?}", e);
1908                 e
1909             })?;
1910             serde_json::to_vec(&info)
1911                 .map(Some)
1912                 .map_err(VmError::SerializeJson)
1913         } else {
1914             // Update VmConfig by adding the new device.
1915             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1916             config.vsock = Some(vsock_cfg);
1917             Ok(None)
1918         }
1919     }
1920 
1921     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1922         if let Some(ref mut vm) = self.vm {
1923             let info = vm.counters().map_err(|e| {
1924                 error!("Error when getting counters from the VM: {:?}", e);
1925                 e
1926             })?;
1927             serde_json::to_vec(&info)
1928                 .map(Some)
1929                 .map_err(VmError::SerializeJson)
1930         } else {
1931             Err(VmError::VmNotRunning)
1932         }
1933     }
1934 
1935     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1936         if let Some(ref mut vm) = self.vm {
1937             vm.power_button()
1938         } else {
1939             Err(VmError::VmNotRunning)
1940         }
1941     }
1942 
1943     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1944         if let Some(ref mut vm) = self.vm {
1945             vm.nmi()
1946         } else {
1947             Err(VmError::VmNotRunning)
1948         }
1949     }
1950 
1951     fn vm_receive_migration(
1952         &mut self,
1953         receive_data_migration: VmReceiveMigrationData,
1954     ) -> result::Result<(), MigratableError> {
1955         info!(
1956             "Receiving migration: receiver_url = {}",
1957             receive_data_migration.receiver_url
1958         );
1959 
1960         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1961         let listener = UnixListener::bind(&path).map_err(|e| {
1962             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1963         })?;
1964         let (mut socket, _addr) = listener.accept().map_err(|e| {
1965             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1966         })?;
1967         std::fs::remove_file(&path).map_err(|e| {
1968             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1969         })?;
1970 
1971         let mut started = false;
1972         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1973         let mut existing_memory_files = None;
1974         loop {
1975             let req = Request::read_from(&mut socket)?;
1976             match req.command() {
1977                 Command::Invalid => info!("Invalid Command Received"),
1978                 Command::Start => {
1979                     info!("Start Command Received");
1980                     started = true;
1981 
1982                     Response::ok().write_to(&mut socket)?;
1983                 }
1984                 Command::Config => {
1985                     info!("Config Command Received");
1986 
1987                     if !started {
1988                         warn!("Migration not started yet");
1989                         Response::error().write_to(&mut socket)?;
1990                         continue;
1991                     }
1992                     memory_manager = Some(self.vm_receive_config(
1993                         &req,
1994                         &mut socket,
1995                         existing_memory_files.take(),
1996                     )?);
1997                 }
1998                 Command::State => {
1999                     info!("State Command Received");
2000 
2001                     if !started {
2002                         warn!("Migration not started yet");
2003                         Response::error().write_to(&mut socket)?;
2004                         continue;
2005                     }
2006                     if let Some(mm) = memory_manager.take() {
2007                         self.vm_receive_state(&req, &mut socket, mm)?;
2008                     } else {
2009                         warn!("Configuration not sent yet");
2010                         Response::error().write_to(&mut socket)?;
2011                     }
2012                 }
2013                 Command::Memory => {
2014                     info!("Memory Command Received");
2015 
2016                     if !started {
2017                         warn!("Migration not started yet");
2018                         Response::error().write_to(&mut socket)?;
2019                         continue;
2020                     }
2021                     if let Some(mm) = memory_manager.as_ref() {
2022                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2023                     } else {
2024                         warn!("Configuration not sent yet");
2025                         Response::error().write_to(&mut socket)?;
2026                     }
2027                 }
2028                 Command::MemoryFd => {
2029                     info!("MemoryFd Command Received");
2030 
2031                     if !started {
2032                         warn!("Migration not started yet");
2033                         Response::error().write_to(&mut socket)?;
2034                         continue;
2035                     }
2036 
2037                     let mut buf = [0u8; 4];
2038                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2039                         MigratableError::MigrateReceive(anyhow!(
2040                             "Error receiving slot from socket: {}",
2041                             e
2042                         ))
2043                     })?;
2044 
2045                     if existing_memory_files.is_none() {
2046                         existing_memory_files = Some(HashMap::default())
2047                     }
2048 
2049                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2050                         let slot = u32::from_le_bytes(buf);
2051                         existing_memory_files.insert(slot, file.unwrap());
2052                     }
2053 
2054                     Response::ok().write_to(&mut socket)?;
2055                 }
2056                 Command::Complete => {
2057                     info!("Complete Command Received");
2058                     if let Some(ref mut vm) = self.vm.as_mut() {
2059                         vm.resume()?;
2060                         Response::ok().write_to(&mut socket)?;
2061                     } else {
2062                         warn!("VM not created yet");
2063                         Response::error().write_to(&mut socket)?;
2064                     }
2065                     break;
2066                 }
2067                 Command::Abandon => {
2068                     info!("Abandon Command Received");
2069                     self.vm = None;
2070                     self.vm_config = None;
2071                     Response::ok().write_to(&mut socket).ok();
2072                     break;
2073                 }
2074             }
2075         }
2076 
2077         Ok(())
2078     }
2079 
2080     fn vm_send_migration(
2081         &mut self,
2082         send_data_migration: VmSendMigrationData,
2083     ) -> result::Result<(), MigratableError> {
2084         info!(
2085             "Sending migration: destination_url = {}, local = {}",
2086             send_data_migration.destination_url, send_data_migration.local
2087         );
2088 
2089         if !self
2090             .vm_config
2091             .as_ref()
2092             .unwrap()
2093             .lock()
2094             .unwrap()
2095             .backed_by_shared_memory()
2096             && send_data_migration.local
2097         {
2098             return Err(MigratableError::MigrateSend(anyhow!(
2099                 "Local migration requires shared memory or hugepages enabled"
2100             )));
2101         }
2102 
2103         if let Some(vm) = self.vm.as_mut() {
2104             Self::send_migration(
2105                 vm,
2106                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2107                 self.hypervisor.clone(),
2108                 send_data_migration,
2109             )
2110             .map_err(|migration_err| {
2111                 error!("Migration failed: {:?}", migration_err);
2112 
2113                 // Stop logging dirty pages
2114                 if let Err(e) = vm.stop_dirty_log() {
2115                     return e;
2116                 }
2117 
2118                 if vm.get_state().unwrap() == VmState::Paused {
2119                     if let Err(e) = vm.resume() {
2120                         return e;
2121                     }
2122                 }
2123 
2124                 migration_err
2125             })?;
2126 
2127             // Shutdown the VM after the migration succeeded
2128             self.exit_evt.write(1).map_err(|e| {
2129                 MigratableError::MigrateSend(anyhow!(
2130                     "Failed shutting down the VM after migration: {:?}",
2131                     e
2132                 ))
2133             })
2134         } else {
2135             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2136         }
2137     }
2138 }
2139 
2140 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2141 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2142 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2143 
2144 #[cfg(test)]
2145 mod unit_tests {
2146     use config::{
2147         ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig,
2148         RngConfig,
2149     };
2150 
2151     use super::*;
2152     #[cfg(target_arch = "x86_64")]
2153     use crate::config::DebugConsoleConfig;
2154 
2155     fn create_dummy_vmm() -> Vmm {
2156         Vmm::new(
2157             VmmVersionInfo::new("dummy", "dummy"),
2158             EventFd::new(EFD_NONBLOCK).unwrap(),
2159             #[cfg(feature = "guest_debug")]
2160             EventFd::new(EFD_NONBLOCK).unwrap(),
2161             #[cfg(feature = "guest_debug")]
2162             EventFd::new(EFD_NONBLOCK).unwrap(),
2163             SeccompAction::Allow,
2164             hypervisor::new().unwrap(),
2165             EventFd::new(EFD_NONBLOCK).unwrap(),
2166         )
2167         .unwrap()
2168     }
2169 
2170     fn create_dummy_vm_config() -> Box<VmConfig> {
2171         Box::new(VmConfig {
2172             cpus: CpusConfig {
2173                 boot_vcpus: 1,
2174                 max_vcpus: 1,
2175                 topology: None,
2176                 kvm_hyperv: false,
2177                 max_phys_bits: 46,
2178                 affinity: None,
2179                 features: config::CpuFeatures::default(),
2180             },
2181             memory: MemoryConfig {
2182                 size: 536_870_912,
2183                 mergeable: false,
2184                 hotplug_method: HotplugMethod::Acpi,
2185                 hotplug_size: None,
2186                 hotplugged_size: None,
2187                 shared: true,
2188                 hugepages: false,
2189                 hugepage_size: None,
2190                 prefault: false,
2191                 zones: None,
2192                 thp: true,
2193             },
2194             payload: Some(PayloadConfig {
2195                 kernel: Some(PathBuf::from("/path/to/kernel")),
2196                 firmware: None,
2197                 cmdline: None,
2198                 initramfs: None,
2199                 #[cfg(feature = "igvm")]
2200                 igvm: None,
2201                 #[cfg(feature = "sev_snp")]
2202                 host_data: None,
2203             }),
2204             rate_limit_groups: None,
2205             disks: None,
2206             net: None,
2207             rng: RngConfig {
2208                 src: PathBuf::from("/dev/urandom"),
2209                 iommu: false,
2210             },
2211             balloon: None,
2212             fs: None,
2213             pmem: None,
2214             serial: ConsoleConfig {
2215                 file: None,
2216                 mode: ConsoleOutputMode::Null,
2217                 iommu: false,
2218                 socket: None,
2219             },
2220             console: ConsoleConfig {
2221                 file: None,
2222                 mode: ConsoleOutputMode::Tty,
2223                 iommu: false,
2224                 socket: None,
2225             },
2226             #[cfg(target_arch = "x86_64")]
2227             debug_console: DebugConsoleConfig::default(),
2228             devices: None,
2229             user_devices: None,
2230             vdpa: None,
2231             vsock: None,
2232             #[cfg(feature = "pvmemcontrol")]
2233             pvmemcontrol: None,
2234             pvpanic: false,
2235             iommu: false,
2236             #[cfg(target_arch = "x86_64")]
2237             sgx_epc: None,
2238             numa: None,
2239             watchdog: false,
2240             #[cfg(feature = "guest_debug")]
2241             gdb: false,
2242             pci_segments: None,
2243             platform: None,
2244             tpm: None,
2245             preserved_fds: None,
2246             landlock_enable: false,
2247             landlock_rules: None,
2248         })
2249     }
2250 
2251     #[test]
2252     fn test_vmm_vm_create() {
2253         let mut vmm = create_dummy_vmm();
2254         let config = create_dummy_vm_config();
2255 
2256         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2257         assert!(matches!(
2258             vmm.vm_create(config),
2259             Err(VmError::VmAlreadyCreated)
2260         ));
2261     }
2262 
2263     #[test]
2264     fn test_vmm_vm_cold_add_device() {
2265         let mut vmm = create_dummy_vmm();
2266         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2267 
2268         assert!(matches!(
2269             vmm.vm_add_device(device_config.clone()),
2270             Err(VmError::VmNotCreated)
2271         ));
2272 
2273         let _ = vmm.vm_create(create_dummy_vm_config());
2274         assert!(vmm
2275             .vm_config
2276             .as_ref()
2277             .unwrap()
2278             .lock()
2279             .unwrap()
2280             .devices
2281             .is_none());
2282 
2283         let result = vmm.vm_add_device(device_config.clone());
2284         assert!(result.is_ok());
2285         assert!(result.unwrap().is_none());
2286         assert_eq!(
2287             vmm.vm_config
2288                 .as_ref()
2289                 .unwrap()
2290                 .lock()
2291                 .unwrap()
2292                 .devices
2293                 .clone()
2294                 .unwrap()
2295                 .len(),
2296             1
2297         );
2298         assert_eq!(
2299             vmm.vm_config
2300                 .as_ref()
2301                 .unwrap()
2302                 .lock()
2303                 .unwrap()
2304                 .devices
2305                 .clone()
2306                 .unwrap()[0],
2307             device_config
2308         );
2309     }
2310 
2311     #[test]
2312     fn test_vmm_vm_cold_add_user_device() {
2313         let mut vmm = create_dummy_vmm();
2314         let user_device_config =
2315             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2316 
2317         assert!(matches!(
2318             vmm.vm_add_user_device(user_device_config.clone()),
2319             Err(VmError::VmNotCreated)
2320         ));
2321 
2322         let _ = vmm.vm_create(create_dummy_vm_config());
2323         assert!(vmm
2324             .vm_config
2325             .as_ref()
2326             .unwrap()
2327             .lock()
2328             .unwrap()
2329             .user_devices
2330             .is_none());
2331 
2332         let result = vmm.vm_add_user_device(user_device_config.clone());
2333         assert!(result.is_ok());
2334         assert!(result.unwrap().is_none());
2335         assert_eq!(
2336             vmm.vm_config
2337                 .as_ref()
2338                 .unwrap()
2339                 .lock()
2340                 .unwrap()
2341                 .user_devices
2342                 .clone()
2343                 .unwrap()
2344                 .len(),
2345             1
2346         );
2347         assert_eq!(
2348             vmm.vm_config
2349                 .as_ref()
2350                 .unwrap()
2351                 .lock()
2352                 .unwrap()
2353                 .user_devices
2354                 .clone()
2355                 .unwrap()[0],
2356             user_device_config
2357         );
2358     }
2359 
2360     #[test]
2361     fn test_vmm_vm_cold_add_disk() {
2362         let mut vmm = create_dummy_vmm();
2363         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2364 
2365         assert!(matches!(
2366             vmm.vm_add_disk(disk_config.clone()),
2367             Err(VmError::VmNotCreated)
2368         ));
2369 
2370         let _ = vmm.vm_create(create_dummy_vm_config());
2371         assert!(vmm
2372             .vm_config
2373             .as_ref()
2374             .unwrap()
2375             .lock()
2376             .unwrap()
2377             .disks
2378             .is_none());
2379 
2380         let result = vmm.vm_add_disk(disk_config.clone());
2381         assert!(result.is_ok());
2382         assert!(result.unwrap().is_none());
2383         assert_eq!(
2384             vmm.vm_config
2385                 .as_ref()
2386                 .unwrap()
2387                 .lock()
2388                 .unwrap()
2389                 .disks
2390                 .clone()
2391                 .unwrap()
2392                 .len(),
2393             1
2394         );
2395         assert_eq!(
2396             vmm.vm_config
2397                 .as_ref()
2398                 .unwrap()
2399                 .lock()
2400                 .unwrap()
2401                 .disks
2402                 .clone()
2403                 .unwrap()[0],
2404             disk_config
2405         );
2406     }
2407 
2408     #[test]
2409     fn test_vmm_vm_cold_add_fs() {
2410         let mut vmm = create_dummy_vmm();
2411         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2412 
2413         assert!(matches!(
2414             vmm.vm_add_fs(fs_config.clone()),
2415             Err(VmError::VmNotCreated)
2416         ));
2417 
2418         let _ = vmm.vm_create(create_dummy_vm_config());
2419         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2420 
2421         let result = vmm.vm_add_fs(fs_config.clone());
2422         assert!(result.is_ok());
2423         assert!(result.unwrap().is_none());
2424         assert_eq!(
2425             vmm.vm_config
2426                 .as_ref()
2427                 .unwrap()
2428                 .lock()
2429                 .unwrap()
2430                 .fs
2431                 .clone()
2432                 .unwrap()
2433                 .len(),
2434             1
2435         );
2436         assert_eq!(
2437             vmm.vm_config
2438                 .as_ref()
2439                 .unwrap()
2440                 .lock()
2441                 .unwrap()
2442                 .fs
2443                 .clone()
2444                 .unwrap()[0],
2445             fs_config
2446         );
2447     }
2448 
2449     #[test]
2450     fn test_vmm_vm_cold_add_pmem() {
2451         let mut vmm = create_dummy_vmm();
2452         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2453 
2454         assert!(matches!(
2455             vmm.vm_add_pmem(pmem_config.clone()),
2456             Err(VmError::VmNotCreated)
2457         ));
2458 
2459         let _ = vmm.vm_create(create_dummy_vm_config());
2460         assert!(vmm
2461             .vm_config
2462             .as_ref()
2463             .unwrap()
2464             .lock()
2465             .unwrap()
2466             .pmem
2467             .is_none());
2468 
2469         let result = vmm.vm_add_pmem(pmem_config.clone());
2470         assert!(result.is_ok());
2471         assert!(result.unwrap().is_none());
2472         assert_eq!(
2473             vmm.vm_config
2474                 .as_ref()
2475                 .unwrap()
2476                 .lock()
2477                 .unwrap()
2478                 .pmem
2479                 .clone()
2480                 .unwrap()
2481                 .len(),
2482             1
2483         );
2484         assert_eq!(
2485             vmm.vm_config
2486                 .as_ref()
2487                 .unwrap()
2488                 .lock()
2489                 .unwrap()
2490                 .pmem
2491                 .clone()
2492                 .unwrap()[0],
2493             pmem_config
2494         );
2495     }
2496 
2497     #[test]
2498     fn test_vmm_vm_cold_add_net() {
2499         let mut vmm = create_dummy_vmm();
2500         let net_config = NetConfig::parse(
2501             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2502         )
2503         .unwrap();
2504 
2505         assert!(matches!(
2506             vmm.vm_add_net(net_config.clone()),
2507             Err(VmError::VmNotCreated)
2508         ));
2509 
2510         let _ = vmm.vm_create(create_dummy_vm_config());
2511         assert!(vmm
2512             .vm_config
2513             .as_ref()
2514             .unwrap()
2515             .lock()
2516             .unwrap()
2517             .net
2518             .is_none());
2519 
2520         let result = vmm.vm_add_net(net_config.clone());
2521         assert!(result.is_ok());
2522         assert!(result.unwrap().is_none());
2523         assert_eq!(
2524             vmm.vm_config
2525                 .as_ref()
2526                 .unwrap()
2527                 .lock()
2528                 .unwrap()
2529                 .net
2530                 .clone()
2531                 .unwrap()
2532                 .len(),
2533             1
2534         );
2535         assert_eq!(
2536             vmm.vm_config
2537                 .as_ref()
2538                 .unwrap()
2539                 .lock()
2540                 .unwrap()
2541                 .net
2542                 .clone()
2543                 .unwrap()[0],
2544             net_config
2545         );
2546     }
2547 
2548     #[test]
2549     fn test_vmm_vm_cold_add_vdpa() {
2550         let mut vmm = create_dummy_vmm();
2551         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2552 
2553         assert!(matches!(
2554             vmm.vm_add_vdpa(vdpa_config.clone()),
2555             Err(VmError::VmNotCreated)
2556         ));
2557 
2558         let _ = vmm.vm_create(create_dummy_vm_config());
2559         assert!(vmm
2560             .vm_config
2561             .as_ref()
2562             .unwrap()
2563             .lock()
2564             .unwrap()
2565             .vdpa
2566             .is_none());
2567 
2568         let result = vmm.vm_add_vdpa(vdpa_config.clone());
2569         assert!(result.is_ok());
2570         assert!(result.unwrap().is_none());
2571         assert_eq!(
2572             vmm.vm_config
2573                 .as_ref()
2574                 .unwrap()
2575                 .lock()
2576                 .unwrap()
2577                 .vdpa
2578                 .clone()
2579                 .unwrap()
2580                 .len(),
2581             1
2582         );
2583         assert_eq!(
2584             vmm.vm_config
2585                 .as_ref()
2586                 .unwrap()
2587                 .lock()
2588                 .unwrap()
2589                 .vdpa
2590                 .clone()
2591                 .unwrap()[0],
2592             vdpa_config
2593         );
2594     }
2595 
2596     #[test]
2597     fn test_vmm_vm_cold_add_vsock() {
2598         let mut vmm = create_dummy_vmm();
2599         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2600 
2601         assert!(matches!(
2602             vmm.vm_add_vsock(vsock_config.clone()),
2603             Err(VmError::VmNotCreated)
2604         ));
2605 
2606         let _ = vmm.vm_create(create_dummy_vm_config());
2607         assert!(vmm
2608             .vm_config
2609             .as_ref()
2610             .unwrap()
2611             .lock()
2612             .unwrap()
2613             .vsock
2614             .is_none());
2615 
2616         let result = vmm.vm_add_vsock(vsock_config.clone());
2617         assert!(result.is_ok());
2618         assert!(result.unwrap().is_none());
2619         assert_eq!(
2620             vmm.vm_config
2621                 .as_ref()
2622                 .unwrap()
2623                 .lock()
2624                 .unwrap()
2625                 .vsock
2626                 .clone()
2627                 .unwrap(),
2628             vsock_config
2629         );
2630     }
2631 }
2632