xref: /cloud-hypervisor/vmm/src/lib.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use std::collections::HashMap;
12 use std::fs::File;
13 use std::io;
14 use std::io::{stdout, Read, Write};
15 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
16 use std::os::unix::net::UnixListener;
17 use std::os::unix::net::UnixStream;
18 use std::panic::AssertUnwindSafe;
19 use std::path::PathBuf;
20 use std::rc::Rc;
21 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
22 use std::sync::{Arc, Mutex};
23 use std::time::Instant;
24 use std::{result, thread};
25 
26 use anyhow::anyhow;
27 #[cfg(feature = "dbus_api")]
28 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
29 use api::http::HttpApiHandle;
30 use console_devices::{pre_create_console_devices, ConsoleInfo};
31 use landlock::LandlockError;
32 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
33 use memory_manager::MemoryManagerSnapshotData;
34 use pci::PciBdf;
35 use seccompiler::{apply_filter, SeccompAction};
36 use serde::ser::{SerializeStruct, Serializer};
37 use serde::{Deserialize, Serialize};
38 use signal_hook::iterator::{Handle, Signals};
39 use thiserror::Error;
40 use tracer::trace_scoped;
41 use vm_memory::bitmap::AtomicBitmap;
42 use vm_memory::{ReadVolatile, WriteVolatile};
43 use vm_migration::{protocol::*, Migratable};
44 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
45 use vmm_sys_util::eventfd::EventFd;
46 use vmm_sys_util::signal::unblock_signal;
47 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
48 
49 use crate::api::{
50     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
51     VmSendMigrationData, VmmPingResponse,
52 };
53 use crate::config::{
54     add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig,
55     UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
56 };
57 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
58 use crate::coredump::GuestDebuggable;
59 use crate::landlock::Landlock;
60 use crate::memory_manager::MemoryManager;
61 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
62 use crate::migration::get_vm_snapshot;
63 use crate::migration::{recv_vm_config, recv_vm_state};
64 use crate::seccomp_filters::{get_seccomp_filter, Thread};
65 use crate::vm::{Error as VmError, Vm, VmState};
66 
67 mod acpi;
68 pub mod api;
69 mod clone3;
70 pub mod config;
71 pub mod console_devices;
72 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
73 mod coredump;
74 pub mod cpu;
75 pub mod device_manager;
76 pub mod device_tree;
77 #[cfg(feature = "guest_debug")]
78 mod gdb;
79 #[cfg(feature = "igvm")]
80 mod igvm;
81 pub mod interrupt;
82 pub mod landlock;
83 pub mod memory_manager;
84 pub mod migration;
85 mod pci_segment;
86 pub mod seccomp_filters;
87 mod serial_manager;
88 mod sigwinch_listener;
89 pub mod vm;
90 pub mod vm_config;
91 
92 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
93 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
94 
95 /// Errors associated with VMM management
96 #[derive(Debug, Error)]
97 pub enum Error {
98     /// API request receive error
99     #[error("Error receiving API request: {0}")]
100     ApiRequestRecv(#[source] RecvError),
101 
102     /// API response send error
103     #[error("Error sending API request: {0}")]
104     ApiResponseSend(#[source] SendError<ApiResponse>),
105 
106     /// Cannot bind to the UNIX domain socket path
107     #[error("Error binding to UNIX domain socket: {0}")]
108     Bind(#[source] io::Error),
109 
110     /// Cannot clone EventFd.
111     #[error("Error cloning EventFd: {0}")]
112     EventFdClone(#[source] io::Error),
113 
114     /// Cannot create EventFd.
115     #[error("Error creating EventFd: {0}")]
116     EventFdCreate(#[source] io::Error),
117 
118     /// Cannot read from EventFd.
119     #[error("Error reading from EventFd: {0}")]
120     EventFdRead(#[source] io::Error),
121 
122     /// Cannot create epoll context.
123     #[error("Error creating epoll context: {0}")]
124     Epoll(#[source] io::Error),
125 
126     /// Cannot create HTTP thread
127     #[error("Error spawning HTTP thread: {0}")]
128     HttpThreadSpawn(#[source] io::Error),
129 
130     /// Cannot create D-Bus thread
131     #[cfg(feature = "dbus_api")]
132     #[error("Error spawning D-Bus thread: {0}")]
133     DBusThreadSpawn(#[source] io::Error),
134 
135     /// Cannot start D-Bus session
136     #[cfg(feature = "dbus_api")]
137     #[error("Error starting D-Bus session: {0}")]
138     CreateDBusSession(#[source] zbus::Error),
139 
140     /// Cannot create `event-monitor` thread
141     #[error("Error spawning `event-monitor` thread: {0}")]
142     EventMonitorThreadSpawn(#[source] io::Error),
143 
144     /// Cannot handle the VM STDIN stream
145     #[error("Error handling VM stdin: {0:?}")]
146     Stdin(VmError),
147 
148     /// Cannot handle the VM pty stream
149     #[error("Error handling VM pty: {0:?}")]
150     Pty(VmError),
151 
152     /// Cannot reboot the VM
153     #[error("Error rebooting VM: {0:?}")]
154     VmReboot(VmError),
155 
156     /// Cannot create VMM thread
157     #[error("Error spawning VMM thread {0:?}")]
158     VmmThreadSpawn(#[source] io::Error),
159 
160     /// Cannot shut the VMM down
161     #[error("Error shutting down VMM: {0:?}")]
162     VmmShutdown(VmError),
163 
164     /// Cannot create seccomp filter
165     #[error("Error creating seccomp filter: {0}")]
166     CreateSeccompFilter(seccompiler::Error),
167 
168     /// Cannot apply seccomp filter
169     #[error("Error applying seccomp filter: {0}")]
170     ApplySeccompFilter(seccompiler::Error),
171 
172     /// Error activating virtio devices
173     #[error("Error activating virtio devices: {0:?}")]
174     ActivateVirtioDevices(VmError),
175 
176     /// Error creating API server
177     #[error("Error creating API server {0:?}")]
178     CreateApiServer(micro_http::ServerError),
179 
180     /// Error binding API server socket
181     #[error("Error creation API server's socket {0:?}")]
182     CreateApiServerSocket(#[source] io::Error),
183 
184     #[cfg(feature = "guest_debug")]
185     #[error("Failed to start the GDB thread: {0}")]
186     GdbThreadSpawn(io::Error),
187 
188     /// GDB request receive error
189     #[cfg(feature = "guest_debug")]
190     #[error("Error receiving GDB request: {0}")]
191     GdbRequestRecv(#[source] RecvError),
192 
193     /// GDB response send error
194     #[cfg(feature = "guest_debug")]
195     #[error("Error sending GDB request: {0}")]
196     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
197 
198     #[error("Cannot spawn a signal handler thread: {0}")]
199     SignalHandlerSpawn(#[source] io::Error),
200 
201     #[error("Failed to join on threads: {0:?}")]
202     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
203 
204     /// Cannot create Landlock object
205     #[error("Error creating landlock object: {0}")]
206     CreateLandlock(LandlockError),
207 
208     /// Cannot apply landlock based sandboxing
209     #[error("Error applying landlock: {0}")]
210     ApplyLandlock(LandlockError),
211 }
212 pub type Result<T> = result::Result<T, Error>;
213 
214 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
215 #[repr(u64)]
216 pub enum EpollDispatch {
217     Exit = 0,
218     Reset = 1,
219     Api = 2,
220     ActivateVirtioDevices = 3,
221     Debug = 4,
222     Unknown,
223 }
224 
225 impl From<u64> for EpollDispatch {
226     fn from(v: u64) -> Self {
227         use EpollDispatch::*;
228         match v {
229             0 => Exit,
230             1 => Reset,
231             2 => Api,
232             3 => ActivateVirtioDevices,
233             4 => Debug,
234             _ => Unknown,
235         }
236     }
237 }
238 
239 pub struct EpollContext {
240     epoll_file: File,
241 }
242 
243 impl EpollContext {
244     pub fn new() -> result::Result<EpollContext, io::Error> {
245         let epoll_fd = epoll::create(true)?;
246         // Use 'File' to enforce closing on 'epoll_fd'
247         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
248         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
249 
250         Ok(EpollContext { epoll_file })
251     }
252 
253     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
254     where
255         T: AsRawFd,
256     {
257         let dispatch_index = token as u64;
258         epoll::ctl(
259             self.epoll_file.as_raw_fd(),
260             epoll::ControlOptions::EPOLL_CTL_ADD,
261             fd.as_raw_fd(),
262             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
263         )?;
264 
265         Ok(())
266     }
267 
268     #[cfg(fuzzing)]
269     pub fn add_event_custom<T>(
270         &mut self,
271         fd: &T,
272         id: u64,
273         evts: epoll::Events,
274     ) -> result::Result<(), io::Error>
275     where
276         T: AsRawFd,
277     {
278         epoll::ctl(
279             self.epoll_file.as_raw_fd(),
280             epoll::ControlOptions::EPOLL_CTL_ADD,
281             fd.as_raw_fd(),
282             epoll::Event::new(evts, id),
283         )?;
284 
285         Ok(())
286     }
287 }
288 
289 impl AsRawFd for EpollContext {
290     fn as_raw_fd(&self) -> RawFd {
291         self.epoll_file.as_raw_fd()
292     }
293 }
294 
295 pub struct PciDeviceInfo {
296     pub id: String,
297     pub bdf: PciBdf,
298 }
299 
300 impl Serialize for PciDeviceInfo {
301     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
302     where
303         S: Serializer,
304     {
305         let bdf_str = self.bdf.to_string();
306 
307         // Serialize the structure.
308         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
309         state.serialize_field("id", &self.id)?;
310         state.serialize_field("bdf", &bdf_str)?;
311         state.end()
312     }
313 }
314 
315 pub fn feature_list() -> Vec<String> {
316     vec![
317         #[cfg(feature = "dbus_api")]
318         "dbus_api".to_string(),
319         #[cfg(feature = "dhat-heap")]
320         "dhat-heap".to_string(),
321         #[cfg(feature = "guest_debug")]
322         "guest_debug".to_string(),
323         #[cfg(feature = "igvm")]
324         "igvm".to_string(),
325         #[cfg(feature = "io_uring")]
326         "io_uring".to_string(),
327         #[cfg(feature = "kvm")]
328         "kvm".to_string(),
329         #[cfg(feature = "mshv")]
330         "mshv".to_string(),
331         #[cfg(feature = "sev_snp")]
332         "sev_snp".to_string(),
333         #[cfg(feature = "tdx")]
334         "tdx".to_string(),
335         #[cfg(feature = "tracing")]
336         "tracing".to_string(),
337     ]
338 }
339 
340 pub fn start_event_monitor_thread(
341     mut monitor: event_monitor::Monitor,
342     seccomp_action: &SeccompAction,
343     landlock_enable: bool,
344     hypervisor_type: hypervisor::HypervisorType,
345     exit_event: EventFd,
346 ) -> Result<thread::JoinHandle<Result<()>>> {
347     // Retrieve seccomp filter
348     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
349         .map_err(Error::CreateSeccompFilter)?;
350 
351     thread::Builder::new()
352         .name("event-monitor".to_owned())
353         .spawn(move || {
354             // Apply seccomp filter
355             if !seccomp_filter.is_empty() {
356                 apply_filter(&seccomp_filter)
357                     .map_err(Error::ApplySeccompFilter)
358                     .map_err(|e| {
359                         error!("Error applying seccomp filter: {:?}", e);
360                         exit_event.write(1).ok();
361                         e
362                     })?;
363             }
364             if landlock_enable {
365                 Landlock::new()
366                     .map_err(Error::CreateLandlock)?
367                     .restrict_self()
368                     .map_err(Error::ApplyLandlock)
369                     .map_err(|e| {
370                         error!("Error applying landlock to event monitor thread: {:?}", e);
371                         exit_event.write(1).ok();
372                         e
373                     })?;
374             }
375 
376             std::panic::catch_unwind(AssertUnwindSafe(move || {
377                 while let Ok(event) = monitor.rx.recv() {
378                     let event = Arc::new(event);
379 
380                     if let Some(ref mut file) = monitor.file {
381                         file.write_all(event.as_bytes().as_ref()).ok();
382                         file.write_all(b"\n\n").ok();
383                     }
384 
385                     for tx in monitor.broadcast.iter() {
386                         tx.send(event.clone()).ok();
387                     }
388                 }
389             }))
390             .map_err(|_| {
391                 error!("`event-monitor` thread panicked");
392                 exit_event.write(1).ok();
393             })
394             .ok();
395 
396             Ok(())
397         })
398         .map_err(Error::EventMonitorThreadSpawn)
399 }
400 
401 #[allow(unused_variables)]
402 #[allow(clippy::too_many_arguments)]
403 pub fn start_vmm_thread(
404     vmm_version: VmmVersionInfo,
405     http_path: &Option<String>,
406     http_fd: Option<RawFd>,
407     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
408     api_event: EventFd,
409     api_sender: Sender<ApiRequest>,
410     api_receiver: Receiver<ApiRequest>,
411     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
412     #[cfg(feature = "guest_debug")] debug_event: EventFd,
413     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
414     exit_event: EventFd,
415     seccomp_action: &SeccompAction,
416     hypervisor: Arc<dyn hypervisor::Hypervisor>,
417     landlock_enable: bool,
418 ) -> Result<VmmThreadHandle> {
419     #[cfg(feature = "guest_debug")]
420     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
421     #[cfg(feature = "guest_debug")]
422     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
423     #[cfg(feature = "guest_debug")]
424     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
425     #[cfg(feature = "guest_debug")]
426     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
427 
428     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
429     let hypervisor_type = hypervisor.hypervisor_type();
430 
431     // Retrieve seccomp filter
432     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
433         .map_err(Error::CreateSeccompFilter)?;
434 
435     let vmm_seccomp_action = seccomp_action.clone();
436     let thread = {
437         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
438         thread::Builder::new()
439             .name("vmm".to_string())
440             .spawn(move || {
441                 // Apply seccomp filter for VMM thread.
442                 if !vmm_seccomp_filter.is_empty() {
443                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
444                 }
445 
446                 let mut vmm = Vmm::new(
447                     vmm_version,
448                     api_event,
449                     #[cfg(feature = "guest_debug")]
450                     debug_event,
451                     #[cfg(feature = "guest_debug")]
452                     vm_debug_event,
453                     vmm_seccomp_action,
454                     hypervisor,
455                     exit_event,
456                 )?;
457 
458                 vmm.setup_signal_handler(landlock_enable)?;
459 
460                 vmm.control_loop(
461                     Rc::new(api_receiver),
462                     #[cfg(feature = "guest_debug")]
463                     Rc::new(gdb_receiver),
464                 )
465             })
466             .map_err(Error::VmmThreadSpawn)?
467     };
468 
469     // The VMM thread is started, we can start the dbus thread
470     // and start serving HTTP requests
471     #[cfg(feature = "dbus_api")]
472     let dbus_shutdown_chs = match dbus_options {
473         Some(opts) => {
474             let (_, chs) = api::start_dbus_thread(
475                 opts,
476                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
477                 api_sender.clone(),
478                 seccomp_action,
479                 exit_event.try_clone().map_err(Error::EventFdClone)?,
480                 hypervisor_type,
481             )?;
482             Some(chs)
483         }
484         None => None,
485     };
486 
487     let http_api_handle = if let Some(http_path) = http_path {
488         Some(api::start_http_path_thread(
489             http_path,
490             api_event_clone,
491             api_sender,
492             seccomp_action,
493             exit_event,
494             hypervisor_type,
495             landlock_enable,
496         )?)
497     } else if let Some(http_fd) = http_fd {
498         Some(api::start_http_fd_thread(
499             http_fd,
500             api_event_clone,
501             api_sender,
502             seccomp_action,
503             exit_event,
504             hypervisor_type,
505             landlock_enable,
506         )?)
507     } else {
508         None
509     };
510 
511     #[cfg(feature = "guest_debug")]
512     if let Some(debug_path) = debug_path {
513         let target = gdb::GdbStub::new(
514             gdb_sender,
515             gdb_debug_event,
516             gdb_vm_debug_event,
517             gdb_hw_breakpoints,
518         );
519         thread::Builder::new()
520             .name("gdb".to_owned())
521             .spawn(move || gdb::gdb_thread(target, &debug_path))
522             .map_err(Error::GdbThreadSpawn)?;
523     }
524 
525     Ok(VmmThreadHandle {
526         thread_handle: thread,
527         #[cfg(feature = "dbus_api")]
528         dbus_shutdown_chs,
529         http_api_handle,
530     })
531 }
532 
533 #[derive(Clone, Deserialize, Serialize)]
534 struct VmMigrationConfig {
535     vm_config: Arc<Mutex<VmConfig>>,
536     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
537     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
538     memory_manager_data: MemoryManagerSnapshotData,
539 }
540 
541 #[derive(Debug, Clone)]
542 pub struct VmmVersionInfo {
543     pub build_version: String,
544     pub version: String,
545 }
546 
547 impl VmmVersionInfo {
548     pub fn new(build_version: &str, version: &str) -> Self {
549         Self {
550             build_version: build_version.to_owned(),
551             version: version.to_owned(),
552         }
553     }
554 }
555 
556 pub struct VmmThreadHandle {
557     pub thread_handle: thread::JoinHandle<Result<()>>,
558     #[cfg(feature = "dbus_api")]
559     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
560     pub http_api_handle: Option<HttpApiHandle>,
561 }
562 
563 pub struct Vmm {
564     epoll: EpollContext,
565     exit_evt: EventFd,
566     reset_evt: EventFd,
567     api_evt: EventFd,
568     #[cfg(feature = "guest_debug")]
569     debug_evt: EventFd,
570     #[cfg(feature = "guest_debug")]
571     vm_debug_evt: EventFd,
572     version: VmmVersionInfo,
573     vm: Option<Vm>,
574     vm_config: Option<Arc<Mutex<VmConfig>>>,
575     seccomp_action: SeccompAction,
576     hypervisor: Arc<dyn hypervisor::Hypervisor>,
577     activate_evt: EventFd,
578     signals: Option<Handle>,
579     threads: Vec<thread::JoinHandle<()>>,
580     original_termios_opt: Arc<Mutex<Option<termios>>>,
581     console_resize_pipe: Option<Arc<File>>,
582     console_info: Option<ConsoleInfo>,
583 }
584 
585 impl Vmm {
586     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
587 
588     fn signal_handler(
589         mut signals: Signals,
590         original_termios_opt: Arc<Mutex<Option<termios>>>,
591         exit_evt: &EventFd,
592     ) {
593         for sig in &Self::HANDLED_SIGNALS {
594             unblock_signal(*sig).unwrap();
595         }
596 
597         for signal in signals.forever() {
598             match signal {
599                 SIGTERM | SIGINT => {
600                     if exit_evt.write(1).is_err() {
601                         // Resetting the terminal is usually done as the VMM exits
602                         if let Ok(lock) = original_termios_opt.lock() {
603                             if let Some(termios) = *lock {
604                                 // SAFETY: FFI call
605                                 let _ = unsafe {
606                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
607                                 };
608                             }
609                         } else {
610                             warn!("Failed to lock original termios");
611                         }
612 
613                         std::process::exit(1);
614                     }
615                 }
616                 _ => (),
617             }
618         }
619     }
620 
621     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
622         let signals = Signals::new(Self::HANDLED_SIGNALS);
623         match signals {
624             Ok(signals) => {
625                 self.signals = Some(signals.handle());
626                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
627                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
628 
629                 let signal_handler_seccomp_filter = get_seccomp_filter(
630                     &self.seccomp_action,
631                     Thread::SignalHandler,
632                     self.hypervisor.hypervisor_type(),
633                 )
634                 .map_err(Error::CreateSeccompFilter)?;
635                 self.threads.push(
636                     thread::Builder::new()
637                         .name("vmm_signal_handler".to_string())
638                         .spawn(move || {
639                             if !signal_handler_seccomp_filter.is_empty() {
640                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
641                                     .map_err(Error::ApplySeccompFilter)
642                                 {
643                                     error!("Error applying seccomp filter: {:?}", e);
644                                     exit_evt.write(1).ok();
645                                     return;
646                                 }
647                             }
648                             if landlock_enable{
649                                 match Landlock::new() {
650                                     Ok(landlock) => {
651                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
652                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
653                                             exit_evt.write(1).ok();
654                                         });
655                                     }
656                                     Err(e) => {
657                                         error!("Error creating Landlock object: {:?}", e);
658                                         exit_evt.write(1).ok();
659                                     }
660                                 };
661                             }
662 
663                             std::panic::catch_unwind(AssertUnwindSafe(|| {
664                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
665                             }))
666                             .map_err(|_| {
667                                 error!("vmm signal_handler thread panicked");
668                                 exit_evt.write(1).ok()
669                             })
670                             .ok();
671                         })
672                         .map_err(Error::SignalHandlerSpawn)?,
673                 );
674             }
675             Err(e) => error!("Signal not found {}", e),
676         }
677         Ok(())
678     }
679 
680     #[allow(clippy::too_many_arguments)]
681     fn new(
682         vmm_version: VmmVersionInfo,
683         api_evt: EventFd,
684         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
685         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
686         seccomp_action: SeccompAction,
687         hypervisor: Arc<dyn hypervisor::Hypervisor>,
688         exit_evt: EventFd,
689     ) -> Result<Self> {
690         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
691         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
692         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
693 
694         epoll
695             .add_event(&exit_evt, EpollDispatch::Exit)
696             .map_err(Error::Epoll)?;
697 
698         epoll
699             .add_event(&reset_evt, EpollDispatch::Reset)
700             .map_err(Error::Epoll)?;
701 
702         epoll
703             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
704             .map_err(Error::Epoll)?;
705 
706         epoll
707             .add_event(&api_evt, EpollDispatch::Api)
708             .map_err(Error::Epoll)?;
709 
710         #[cfg(feature = "guest_debug")]
711         epoll
712             .add_event(&debug_evt, EpollDispatch::Debug)
713             .map_err(Error::Epoll)?;
714 
715         Ok(Vmm {
716             epoll,
717             exit_evt,
718             reset_evt,
719             api_evt,
720             #[cfg(feature = "guest_debug")]
721             debug_evt,
722             #[cfg(feature = "guest_debug")]
723             vm_debug_evt,
724             version: vmm_version,
725             vm: None,
726             vm_config: None,
727             seccomp_action,
728             hypervisor,
729             activate_evt,
730             signals: None,
731             threads: vec![],
732             original_termios_opt: Arc::new(Mutex::new(None)),
733             console_resize_pipe: None,
734             console_info: None,
735         })
736     }
737 
738     fn vm_receive_config<T>(
739         &mut self,
740         req: &Request,
741         socket: &mut T,
742         existing_memory_files: Option<HashMap<u32, File>>,
743     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
744     where
745         T: Read + Write,
746     {
747         // Read in config data along with memory manager data
748         let mut data: Vec<u8> = Vec::new();
749         data.resize_with(req.length() as usize, Default::default);
750         socket
751             .read_exact(&mut data)
752             .map_err(MigratableError::MigrateSocket)?;
753 
754         let vm_migration_config: VmMigrationConfig =
755             serde_json::from_slice(&data).map_err(|e| {
756                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
757             })?;
758 
759         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
760         self.vm_check_cpuid_compatibility(
761             &vm_migration_config.vm_config,
762             &vm_migration_config.common_cpuid,
763         )?;
764 
765         let config = vm_migration_config.vm_config.clone();
766         self.vm_config = Some(vm_migration_config.vm_config);
767         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
768             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
769         })?);
770 
771         if self
772             .vm_config
773             .as_ref()
774             .unwrap()
775             .lock()
776             .unwrap()
777             .landlock_enable
778         {
779             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
780                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
781             })?;
782         }
783 
784         let vm = Vm::create_hypervisor_vm(
785             &self.hypervisor,
786             #[cfg(feature = "tdx")]
787             false,
788             #[cfg(feature = "sev_snp")]
789             false,
790         )
791         .map_err(|e| {
792             MigratableError::MigrateReceive(anyhow!(
793                 "Error creating hypervisor VM from snapshot: {:?}",
794                 e
795             ))
796         })?;
797 
798         let phys_bits =
799             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
800 
801         let memory_manager = MemoryManager::new(
802             vm,
803             &config.lock().unwrap().memory.clone(),
804             None,
805             phys_bits,
806             #[cfg(feature = "tdx")]
807             false,
808             Some(&vm_migration_config.memory_manager_data),
809             existing_memory_files,
810             #[cfg(target_arch = "x86_64")]
811             None,
812         )
813         .map_err(|e| {
814             MigratableError::MigrateReceive(anyhow!(
815                 "Error creating MemoryManager from snapshot: {:?}",
816                 e
817             ))
818         })?;
819 
820         Response::ok().write_to(socket)?;
821 
822         Ok(memory_manager)
823     }
824 
825     fn vm_receive_state<T>(
826         &mut self,
827         req: &Request,
828         socket: &mut T,
829         mm: Arc<Mutex<MemoryManager>>,
830     ) -> std::result::Result<(), MigratableError>
831     where
832         T: Read + Write,
833     {
834         // Read in state data
835         let mut data: Vec<u8> = Vec::new();
836         data.resize_with(req.length() as usize, Default::default);
837         socket
838             .read_exact(&mut data)
839             .map_err(MigratableError::MigrateSocket)?;
840         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
841             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
842         })?;
843 
844         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
845             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
846         })?;
847         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
848             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
849         })?;
850         #[cfg(feature = "guest_debug")]
851         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
852             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
853         })?;
854         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
855             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
856         })?;
857 
858         let timestamp = Instant::now();
859         let hypervisor_vm = mm.lock().unwrap().vm.clone();
860         let mut vm = Vm::new_from_memory_manager(
861             self.vm_config.clone().unwrap(),
862             mm,
863             hypervisor_vm,
864             exit_evt,
865             reset_evt,
866             #[cfg(feature = "guest_debug")]
867             debug_evt,
868             &self.seccomp_action,
869             self.hypervisor.clone(),
870             activate_evt,
871             timestamp,
872             self.console_info.clone(),
873             self.console_resize_pipe.as_ref().map(Arc::clone),
874             Arc::clone(&self.original_termios_opt),
875             Some(snapshot),
876         )
877         .map_err(|e| {
878             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
879         })?;
880 
881         // Create VM
882         vm.restore().map_err(|e| {
883             Response::error().write_to(socket).ok();
884             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
885         })?;
886         self.vm = Some(vm);
887 
888         Response::ok().write_to(socket)?;
889 
890         Ok(())
891     }
892 
893     fn vm_receive_memory<T>(
894         &mut self,
895         req: &Request,
896         socket: &mut T,
897         memory_manager: &mut MemoryManager,
898     ) -> std::result::Result<(), MigratableError>
899     where
900         T: Read + ReadVolatile + Write,
901     {
902         // Read table
903         let table = MemoryRangeTable::read_from(socket, req.length())?;
904 
905         // And then read the memory itself
906         memory_manager
907             .receive_memory_regions(&table, socket)
908             .inspect_err(|_| {
909                 Response::error().write_to(socket).ok();
910             })?;
911         Response::ok().write_to(socket)?;
912         Ok(())
913     }
914 
915     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
916         url.strip_prefix("unix:")
917             .ok_or_else(|| {
918                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
919             })
920             .map(|s| s.into())
921     }
922 
923     // Returns true if there were dirty pages to send
924     fn vm_maybe_send_dirty_pages<T>(
925         vm: &mut Vm,
926         socket: &mut T,
927     ) -> result::Result<bool, MigratableError>
928     where
929         T: Read + Write + WriteVolatile,
930     {
931         // Send (dirty) memory table
932         let table = vm.dirty_log()?;
933 
934         // But if there are no regions go straight to pause
935         if table.regions().is_empty() {
936             return Ok(false);
937         }
938 
939         Request::memory(table.length()).write_to(socket).unwrap();
940         table.write_to(socket)?;
941         // And then the memory itself
942         vm.send_memory_regions(&table, socket)?;
943         Response::read_from(socket)?.ok_or_abandon(
944             socket,
945             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
946         )?;
947 
948         Ok(true)
949     }
950 
951     fn send_migration(
952         vm: &mut Vm,
953         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
954             dyn hypervisor::Hypervisor,
955         >,
956         send_data_migration: VmSendMigrationData,
957     ) -> result::Result<(), MigratableError> {
958         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
959         let mut socket = UnixStream::connect(path).map_err(|e| {
960             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
961         })?;
962 
963         // Start the migration
964         Request::start().write_to(&mut socket)?;
965         Response::read_from(&mut socket)?.ok_or_abandon(
966             &mut socket,
967             MigratableError::MigrateSend(anyhow!("Error starting migration")),
968         )?;
969 
970         // Send config
971         let vm_config = vm.get_config();
972         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
973         let common_cpuid = {
974             #[cfg(feature = "tdx")]
975             if vm_config.lock().unwrap().is_tdx_enabled() {
976                 return Err(MigratableError::MigrateSend(anyhow!(
977                     "Live Migration is not supported when TDX is enabled"
978                 )));
979             };
980 
981             let amx = vm_config.lock().unwrap().cpus.features.amx;
982             let phys_bits =
983                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
984             arch::generate_common_cpuid(
985                 &hypervisor,
986                 &arch::CpuidConfig {
987                     sgx_epc_sections: None,
988                     phys_bits,
989                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
990                     #[cfg(feature = "tdx")]
991                     tdx: false,
992                     amx,
993                 },
994             )
995             .map_err(|e| {
996                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
997             })?
998         };
999 
1000         if send_data_migration.local {
1001             vm.send_memory_fds(&mut socket)?;
1002         }
1003 
1004         let vm_migration_config = VmMigrationConfig {
1005             vm_config,
1006             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1007             common_cpuid,
1008             memory_manager_data: vm.memory_manager_data(),
1009         };
1010         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1011         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1012         socket
1013             .write_all(&config_data)
1014             .map_err(MigratableError::MigrateSocket)?;
1015         Response::read_from(&mut socket)?.ok_or_abandon(
1016             &mut socket,
1017             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1018         )?;
1019 
1020         // Let every Migratable object know about the migration being started.
1021         vm.start_migration()?;
1022 
1023         if send_data_migration.local {
1024             // Now pause VM
1025             vm.pause()?;
1026         } else {
1027             // Start logging dirty pages
1028             vm.start_dirty_log()?;
1029 
1030             // Send memory table
1031             let table = vm.memory_range_table()?;
1032             Request::memory(table.length())
1033                 .write_to(&mut socket)
1034                 .unwrap();
1035             table.write_to(&mut socket)?;
1036             // And then the memory itself
1037             vm.send_memory_regions(&table, &mut socket)?;
1038             Response::read_from(&mut socket)?.ok_or_abandon(
1039                 &mut socket,
1040                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1041             )?;
1042 
1043             // Try at most 5 passes of dirty memory sending
1044             const MAX_DIRTY_MIGRATIONS: usize = 5;
1045             for i in 0..MAX_DIRTY_MIGRATIONS {
1046                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1047                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1048                     break;
1049                 }
1050             }
1051 
1052             // Now pause VM
1053             vm.pause()?;
1054 
1055             // Send last batch of dirty pages
1056             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1057 
1058             // Stop logging dirty pages
1059             vm.stop_dirty_log()?;
1060         }
1061         // Capture snapshot and send it
1062         let vm_snapshot = vm.snapshot()?;
1063         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1064         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1065         socket
1066             .write_all(&snapshot_data)
1067             .map_err(MigratableError::MigrateSocket)?;
1068         Response::read_from(&mut socket)?.ok_or_abandon(
1069             &mut socket,
1070             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1071         )?;
1072         // Complete the migration
1073         Request::complete().write_to(&mut socket)?;
1074         Response::read_from(&mut socket)?.ok_or_abandon(
1075             &mut socket,
1076             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1077         )?;
1078 
1079         info!("Migration complete");
1080 
1081         // Let every Migratable object know about the migration being complete
1082         vm.complete_migration()
1083     }
1084 
1085     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1086     fn vm_check_cpuid_compatibility(
1087         &self,
1088         src_vm_config: &Arc<Mutex<VmConfig>>,
1089         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1090     ) -> result::Result<(), MigratableError> {
1091         #[cfg(feature = "tdx")]
1092         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1093             return Err(MigratableError::MigrateReceive(anyhow!(
1094                 "Live Migration is not supported when TDX is enabled"
1095             )));
1096         };
1097 
1098         // We check the `CPUID` compatibility of between the source vm and destination, which is
1099         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1100         let dest_cpuid = &{
1101             let vm_config = &src_vm_config.lock().unwrap();
1102 
1103             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1104             arch::generate_common_cpuid(
1105                 &self.hypervisor.clone(),
1106                 &arch::CpuidConfig {
1107                     sgx_epc_sections: None,
1108                     phys_bits,
1109                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1110                     #[cfg(feature = "tdx")]
1111                     tdx: false,
1112                     amx: vm_config.cpus.features.amx,
1113                 },
1114             )
1115             .map_err(|e| {
1116                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1117             })?
1118         };
1119         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1120             MigratableError::MigrateReceive(anyhow!(
1121                 "Error checking cpu feature compatibility': {:?}",
1122                 e
1123             ))
1124         })
1125     }
1126 
1127     fn control_loop(
1128         &mut self,
1129         api_receiver: Rc<Receiver<ApiRequest>>,
1130         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1131     ) -> Result<()> {
1132         const EPOLL_EVENTS_LEN: usize = 100;
1133 
1134         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1135         let epoll_fd = self.epoll.as_raw_fd();
1136 
1137         'outer: loop {
1138             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1139                 Ok(res) => res,
1140                 Err(e) => {
1141                     if e.kind() == io::ErrorKind::Interrupted {
1142                         // It's well defined from the epoll_wait() syscall
1143                         // documentation that the epoll loop can be interrupted
1144                         // before any of the requested events occurred or the
1145                         // timeout expired. In both those cases, epoll_wait()
1146                         // returns an error of type EINTR, but this should not
1147                         // be considered as a regular error. Instead it is more
1148                         // appropriate to retry, by calling into epoll_wait().
1149                         continue;
1150                     }
1151                     return Err(Error::Epoll(e));
1152                 }
1153             };
1154 
1155             for event in events.iter().take(num_events) {
1156                 let dispatch_event: EpollDispatch = event.data.into();
1157                 match dispatch_event {
1158                     EpollDispatch::Unknown => {
1159                         let event = event.data;
1160                         warn!("Unknown VMM loop event: {}", event);
1161                     }
1162                     EpollDispatch::Exit => {
1163                         info!("VM exit event");
1164                         // Consume the event.
1165                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1166                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1167 
1168                         break 'outer;
1169                     }
1170                     EpollDispatch::Reset => {
1171                         info!("VM reset event");
1172                         // Consume the event.
1173                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1174                         self.vm_reboot().map_err(Error::VmReboot)?;
1175                     }
1176                     EpollDispatch::ActivateVirtioDevices => {
1177                         if let Some(ref vm) = self.vm {
1178                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1179                             info!(
1180                                 "Trying to activate pending virtio devices: count = {}",
1181                                 count
1182                             );
1183                             vm.activate_virtio_devices()
1184                                 .map_err(Error::ActivateVirtioDevices)?;
1185                         }
1186                     }
1187                     EpollDispatch::Api => {
1188                         // Consume the events.
1189                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1190                             // Read from the API receiver channel
1191                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1192 
1193                             if api_request(self)? {
1194                                 break 'outer;
1195                             }
1196                         }
1197                     }
1198                     #[cfg(feature = "guest_debug")]
1199                     EpollDispatch::Debug => {
1200                         // Consume the events.
1201                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1202                             // Read from the API receiver channel
1203                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1204 
1205                             let response = if let Some(ref mut vm) = self.vm {
1206                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1207                             } else {
1208                                 Err(VmError::VmNotRunning)
1209                             }
1210                             .map_err(gdb::Error::Vm);
1211 
1212                             gdb_request
1213                                 .sender
1214                                 .send(response)
1215                                 .map_err(Error::GdbResponseSend)?;
1216                         }
1217                     }
1218                     #[cfg(not(feature = "guest_debug"))]
1219                     EpollDispatch::Debug => {}
1220                 }
1221             }
1222         }
1223 
1224         // Trigger the termination of the signal_handler thread
1225         if let Some(signals) = self.signals.take() {
1226             signals.close();
1227         }
1228 
1229         // Wait for all the threads to finish
1230         for thread in self.threads.drain(..) {
1231             thread.join().map_err(Error::ThreadCleanup)?
1232         }
1233 
1234         Ok(())
1235     }
1236 }
1237 
1238 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1239     vm_config.lock().unwrap().apply_landlock()?;
1240     Ok(())
1241 }
1242 
1243 impl RequestHandler for Vmm {
1244     fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> {
1245         // We only store the passed VM config.
1246         // The VM will be created when being asked to boot it.
1247         if self.vm_config.is_none() {
1248             self.vm_config = Some(Arc::new(Mutex::new(*config)));
1249             self.console_info =
1250                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1251 
1252             if self
1253                 .vm_config
1254                 .as_ref()
1255                 .unwrap()
1256                 .lock()
1257                 .unwrap()
1258                 .landlock_enable
1259             {
1260                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1261                     .map_err(VmError::ApplyLandlock)?;
1262             }
1263             Ok(())
1264         } else {
1265             Err(VmError::VmAlreadyCreated)
1266         }
1267     }
1268 
1269     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1270         tracer::start();
1271         info!("Booting VM");
1272         event!("vm", "booting");
1273         let r = {
1274             trace_scoped!("vm_boot");
1275             // If we don't have a config, we cannot boot a VM.
1276             if self.vm_config.is_none() {
1277                 return Err(VmError::VmMissingConfig);
1278             };
1279 
1280             // console_info is set to None in vm_shutdown. re-populate here if empty
1281             if self.console_info.is_none() {
1282                 self.console_info =
1283                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1284             }
1285 
1286             // Create a new VM if we don't have one yet.
1287             if self.vm.is_none() {
1288                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1289                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1290                 #[cfg(feature = "guest_debug")]
1291                 let vm_debug_evt = self
1292                     .vm_debug_evt
1293                     .try_clone()
1294                     .map_err(VmError::EventFdClone)?;
1295                 let activate_evt = self
1296                     .activate_evt
1297                     .try_clone()
1298                     .map_err(VmError::EventFdClone)?;
1299 
1300                 if let Some(ref vm_config) = self.vm_config {
1301                     let vm = Vm::new(
1302                         Arc::clone(vm_config),
1303                         exit_evt,
1304                         reset_evt,
1305                         #[cfg(feature = "guest_debug")]
1306                         vm_debug_evt,
1307                         &self.seccomp_action,
1308                         self.hypervisor.clone(),
1309                         activate_evt,
1310                         self.console_info.clone(),
1311                         self.console_resize_pipe.as_ref().map(Arc::clone),
1312                         Arc::clone(&self.original_termios_opt),
1313                         None,
1314                         None,
1315                         None,
1316                     )?;
1317 
1318                     self.vm = Some(vm);
1319                 }
1320             }
1321 
1322             // Now we can boot the VM.
1323             if let Some(ref mut vm) = self.vm {
1324                 vm.boot()
1325             } else {
1326                 Err(VmError::VmNotCreated)
1327             }
1328         };
1329         tracer::end();
1330         if r.is_ok() {
1331             event!("vm", "booted");
1332         }
1333         r
1334     }
1335 
1336     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1337         if let Some(ref mut vm) = self.vm {
1338             vm.pause().map_err(VmError::Pause)
1339         } else {
1340             Err(VmError::VmNotRunning)
1341         }
1342     }
1343 
1344     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1345         if let Some(ref mut vm) = self.vm {
1346             vm.resume().map_err(VmError::Resume)
1347         } else {
1348             Err(VmError::VmNotRunning)
1349         }
1350     }
1351 
1352     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1353         if let Some(ref mut vm) = self.vm {
1354             // Drain console_info so that FDs are not reused
1355             let _ = self.console_info.take();
1356             vm.snapshot()
1357                 .map_err(VmError::Snapshot)
1358                 .and_then(|snapshot| {
1359                     vm.send(&snapshot, destination_url)
1360                         .map_err(VmError::SnapshotSend)
1361                 })
1362         } else {
1363             Err(VmError::VmNotRunning)
1364         }
1365     }
1366 
1367     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1368         if self.vm.is_some() || self.vm_config.is_some() {
1369             return Err(VmError::VmAlreadyCreated);
1370         }
1371 
1372         let source_url = restore_cfg.source_url.as_path().to_str();
1373         if source_url.is_none() {
1374             return Err(VmError::InvalidRestoreSourceUrl);
1375         }
1376         // Safe to unwrap as we checked it was Some(&str).
1377         let source_url = source_url.unwrap();
1378 
1379         let vm_config = Arc::new(Mutex::new(
1380             recv_vm_config(source_url).map_err(VmError::Restore)?,
1381         ));
1382         restore_cfg
1383             .validate(&vm_config.lock().unwrap().clone())
1384             .map_err(VmError::ConfigValidation)?;
1385 
1386         // Update VM's net configurations with new fds received for restore operation
1387         if let (Some(restored_nets), Some(vm_net_configs)) =
1388             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1389         {
1390             for net in restored_nets.iter() {
1391                 for net_config in vm_net_configs.iter_mut() {
1392                     // update only if the net dev is backed by FDs
1393                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1394                         net_config.fds.clone_from(&net.fds);
1395                     }
1396                 }
1397             }
1398         }
1399 
1400         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1401         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1402         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1403 
1404         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1405         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1406             .map_err(VmError::Restore)?;
1407 
1408         self.vm_config = Some(Arc::clone(&vm_config));
1409 
1410         // console_info is set to None in vm_snapshot. re-populate here if empty
1411         if self.console_info.is_none() {
1412             self.console_info =
1413                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1414         }
1415 
1416         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1417         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1418         #[cfg(feature = "guest_debug")]
1419         let debug_evt = self
1420             .vm_debug_evt
1421             .try_clone()
1422             .map_err(VmError::EventFdClone)?;
1423         let activate_evt = self
1424             .activate_evt
1425             .try_clone()
1426             .map_err(VmError::EventFdClone)?;
1427 
1428         let vm = Vm::new(
1429             vm_config,
1430             exit_evt,
1431             reset_evt,
1432             #[cfg(feature = "guest_debug")]
1433             debug_evt,
1434             &self.seccomp_action,
1435             self.hypervisor.clone(),
1436             activate_evt,
1437             self.console_info.clone(),
1438             self.console_resize_pipe.as_ref().map(Arc::clone),
1439             Arc::clone(&self.original_termios_opt),
1440             Some(snapshot),
1441             Some(source_url),
1442             Some(restore_cfg.prefault),
1443         )?;
1444         self.vm = Some(vm);
1445 
1446         if self
1447             .vm_config
1448             .as_ref()
1449             .unwrap()
1450             .lock()
1451             .unwrap()
1452             .landlock_enable
1453         {
1454             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1455                 .map_err(VmError::ApplyLandlock)?;
1456         }
1457 
1458         // Now we can restore the rest of the VM.
1459         if let Some(ref mut vm) = self.vm {
1460             vm.restore()
1461         } else {
1462             Err(VmError::VmNotCreated)
1463         }
1464     }
1465 
1466     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1467     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1468         if let Some(ref mut vm) = self.vm {
1469             vm.coredump(destination_url).map_err(VmError::Coredump)
1470         } else {
1471             Err(VmError::VmNotRunning)
1472         }
1473     }
1474 
1475     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1476         let r = if let Some(ref mut vm) = self.vm.take() {
1477             // Drain console_info so that the FDs are not reused
1478             let _ = self.console_info.take();
1479             vm.shutdown()
1480         } else {
1481             Err(VmError::VmNotRunning)
1482         };
1483 
1484         if r.is_ok() {
1485             event!("vm", "shutdown");
1486         }
1487 
1488         r
1489     }
1490 
1491     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1492         event!("vm", "rebooting");
1493 
1494         // First we stop the current VM
1495         let config = if let Some(mut vm) = self.vm.take() {
1496             let config = vm.get_config();
1497             vm.shutdown()?;
1498             config
1499         } else {
1500             return Err(VmError::VmNotCreated);
1501         };
1502 
1503         // vm.shutdown() closes all the console devices, so set console_info to None
1504         // so that the closed FD #s are not reused.
1505         let _ = self.console_info.take();
1506 
1507         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1508         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1509         #[cfg(feature = "guest_debug")]
1510         let debug_evt = self
1511             .vm_debug_evt
1512             .try_clone()
1513             .map_err(VmError::EventFdClone)?;
1514         let activate_evt = self
1515             .activate_evt
1516             .try_clone()
1517             .map_err(VmError::EventFdClone)?;
1518 
1519         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1520         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1521         // during the boot process.
1522         if self.reset_evt.read().is_ok() {
1523             warn!("Spurious second reset event received. Ignoring.");
1524         }
1525 
1526         self.console_info =
1527             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1528 
1529         // Then we create the new VM
1530         let mut vm = Vm::new(
1531             config,
1532             exit_evt,
1533             reset_evt,
1534             #[cfg(feature = "guest_debug")]
1535             debug_evt,
1536             &self.seccomp_action,
1537             self.hypervisor.clone(),
1538             activate_evt,
1539             self.console_info.clone(),
1540             self.console_resize_pipe.as_ref().map(Arc::clone),
1541             Arc::clone(&self.original_termios_opt),
1542             None,
1543             None,
1544             None,
1545         )?;
1546 
1547         // And we boot it
1548         vm.boot()?;
1549 
1550         self.vm = Some(vm);
1551 
1552         event!("vm", "rebooted");
1553 
1554         Ok(())
1555     }
1556 
1557     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1558         match &self.vm_config {
1559             Some(vm_config) => {
1560                 let state = match &self.vm {
1561                     Some(vm) => vm.get_state()?,
1562                     None => VmState::Created,
1563                 };
1564                 let config = vm_config.lock().unwrap().clone();
1565 
1566                 let mut memory_actual_size = config.memory.total_size();
1567                 if let Some(vm) = &self.vm {
1568                     memory_actual_size -= vm.balloon_size();
1569                 }
1570 
1571                 let device_tree = self
1572                     .vm
1573                     .as_ref()
1574                     .map(|vm| vm.device_tree().lock().unwrap().clone());
1575 
1576                 Ok(VmInfoResponse {
1577                     config: Box::new(config),
1578                     state,
1579                     memory_actual_size,
1580                     device_tree,
1581                 })
1582             }
1583             None => Err(VmError::VmNotCreated),
1584         }
1585     }
1586 
1587     fn vmm_ping(&self) -> VmmPingResponse {
1588         let VmmVersionInfo {
1589             build_version,
1590             version,
1591         } = self.version.clone();
1592 
1593         VmmPingResponse {
1594             build_version,
1595             version,
1596             pid: std::process::id() as i64,
1597             features: feature_list(),
1598         }
1599     }
1600 
1601     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1602         if self.vm_config.is_none() {
1603             return Ok(());
1604         }
1605 
1606         // If a VM is booted, we first try to shut it down.
1607         if self.vm.is_some() {
1608             self.vm_shutdown()?;
1609         }
1610 
1611         self.vm_config = None;
1612 
1613         event!("vm", "deleted");
1614 
1615         Ok(())
1616     }
1617 
1618     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1619         self.vm_delete()?;
1620         event!("vmm", "shutdown");
1621         Ok(())
1622     }
1623 
1624     fn vm_resize(
1625         &mut self,
1626         desired_vcpus: Option<u8>,
1627         desired_ram: Option<u64>,
1628         desired_balloon: Option<u64>,
1629     ) -> result::Result<(), VmError> {
1630         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1631 
1632         if let Some(ref mut vm) = self.vm {
1633             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1634                 error!("Error when resizing VM: {:?}", e);
1635                 Err(e)
1636             } else {
1637                 Ok(())
1638             }
1639         } else {
1640             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1641             if let Some(desired_vcpus) = desired_vcpus {
1642                 config.cpus.boot_vcpus = desired_vcpus;
1643             }
1644             if let Some(desired_ram) = desired_ram {
1645                 config.memory.size = desired_ram;
1646             }
1647             if let Some(desired_balloon) = desired_balloon {
1648                 if let Some(balloon_config) = &mut config.balloon {
1649                     balloon_config.size = desired_balloon;
1650                 }
1651             }
1652             Ok(())
1653         }
1654     }
1655 
1656     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1657         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1658 
1659         if let Some(ref mut vm) = self.vm {
1660             if let Err(e) = vm.resize_zone(id, desired_ram) {
1661                 error!("Error when resizing VM: {:?}", e);
1662                 Err(e)
1663             } else {
1664                 Ok(())
1665             }
1666         } else {
1667             // Update VmConfig by setting the new desired ram.
1668             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1669 
1670             if let Some(zones) = &mut memory_config.zones {
1671                 for zone in zones.iter_mut() {
1672                     if zone.id == id {
1673                         zone.size = desired_ram;
1674                         return Ok(());
1675                     }
1676                 }
1677             }
1678 
1679             error!("Could not find the memory zone {} for the resize", id);
1680             Err(VmError::ResizeZone)
1681         }
1682     }
1683 
1684     fn vm_add_device(
1685         &mut self,
1686         device_cfg: DeviceConfig,
1687     ) -> result::Result<Option<Vec<u8>>, VmError> {
1688         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1689 
1690         {
1691             // Validate the configuration change in a cloned configuration
1692             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1693             add_to_config(&mut config.devices, device_cfg.clone());
1694             config.validate().map_err(VmError::ConfigValidation)?;
1695         }
1696 
1697         if let Some(ref mut vm) = self.vm {
1698             let info = vm.add_device(device_cfg).map_err(|e| {
1699                 error!("Error when adding new device to the VM: {:?}", e);
1700                 e
1701             })?;
1702             serde_json::to_vec(&info)
1703                 .map(Some)
1704                 .map_err(VmError::SerializeJson)
1705         } else {
1706             // Update VmConfig by adding the new device.
1707             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1708             add_to_config(&mut config.devices, device_cfg);
1709             Ok(None)
1710         }
1711     }
1712 
1713     fn vm_add_user_device(
1714         &mut self,
1715         device_cfg: UserDeviceConfig,
1716     ) -> result::Result<Option<Vec<u8>>, VmError> {
1717         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1718 
1719         {
1720             // Validate the configuration change in a cloned configuration
1721             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1722             add_to_config(&mut config.user_devices, device_cfg.clone());
1723             config.validate().map_err(VmError::ConfigValidation)?;
1724         }
1725 
1726         if let Some(ref mut vm) = self.vm {
1727             let info = vm.add_user_device(device_cfg).map_err(|e| {
1728                 error!("Error when adding new user device to the VM: {:?}", e);
1729                 e
1730             })?;
1731             serde_json::to_vec(&info)
1732                 .map(Some)
1733                 .map_err(VmError::SerializeJson)
1734         } else {
1735             // Update VmConfig by adding the new device.
1736             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1737             add_to_config(&mut config.user_devices, device_cfg);
1738             Ok(None)
1739         }
1740     }
1741 
1742     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1743         if let Some(ref mut vm) = self.vm {
1744             if let Err(e) = vm.remove_device(id) {
1745                 error!("Error when removing device from the VM: {:?}", e);
1746                 Err(e)
1747             } else {
1748                 Ok(())
1749             }
1750         } else if let Some(ref config) = self.vm_config {
1751             let mut config = config.lock().unwrap();
1752             if config.remove_device(&id) {
1753                 Ok(())
1754             } else {
1755                 Err(VmError::NoDeviceToRemove(id))
1756             }
1757         } else {
1758             Err(VmError::VmNotCreated)
1759         }
1760     }
1761 
1762     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1763         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1764 
1765         {
1766             // Validate the configuration change in a cloned configuration
1767             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1768             add_to_config(&mut config.disks, disk_cfg.clone());
1769             config.validate().map_err(VmError::ConfigValidation)?;
1770         }
1771 
1772         if let Some(ref mut vm) = self.vm {
1773             let info = vm.add_disk(disk_cfg).map_err(|e| {
1774                 error!("Error when adding new disk to the VM: {:?}", e);
1775                 e
1776             })?;
1777             serde_json::to_vec(&info)
1778                 .map(Some)
1779                 .map_err(VmError::SerializeJson)
1780         } else {
1781             // Update VmConfig by adding the new device.
1782             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1783             add_to_config(&mut config.disks, disk_cfg);
1784             Ok(None)
1785         }
1786     }
1787 
1788     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1789         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1790 
1791         {
1792             // Validate the configuration change in a cloned configuration
1793             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1794             add_to_config(&mut config.fs, fs_cfg.clone());
1795             config.validate().map_err(VmError::ConfigValidation)?;
1796         }
1797 
1798         if let Some(ref mut vm) = self.vm {
1799             let info = vm.add_fs(fs_cfg).map_err(|e| {
1800                 error!("Error when adding new fs to the VM: {:?}", e);
1801                 e
1802             })?;
1803             serde_json::to_vec(&info)
1804                 .map(Some)
1805                 .map_err(VmError::SerializeJson)
1806         } else {
1807             // Update VmConfig by adding the new device.
1808             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1809             add_to_config(&mut config.fs, fs_cfg);
1810             Ok(None)
1811         }
1812     }
1813 
1814     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1815         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1816 
1817         {
1818             // Validate the configuration change in a cloned configuration
1819             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1820             add_to_config(&mut config.pmem, pmem_cfg.clone());
1821             config.validate().map_err(VmError::ConfigValidation)?;
1822         }
1823 
1824         if let Some(ref mut vm) = self.vm {
1825             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1826                 error!("Error when adding new pmem device to the VM: {:?}", e);
1827                 e
1828             })?;
1829             serde_json::to_vec(&info)
1830                 .map(Some)
1831                 .map_err(VmError::SerializeJson)
1832         } else {
1833             // Update VmConfig by adding the new device.
1834             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1835             add_to_config(&mut config.pmem, pmem_cfg);
1836             Ok(None)
1837         }
1838     }
1839 
1840     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1841         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1842 
1843         {
1844             // Validate the configuration change in a cloned configuration
1845             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1846             add_to_config(&mut config.net, net_cfg.clone());
1847             config.validate().map_err(VmError::ConfigValidation)?;
1848         }
1849 
1850         if let Some(ref mut vm) = self.vm {
1851             let info = vm.add_net(net_cfg).map_err(|e| {
1852                 error!("Error when adding new network device to the VM: {:?}", e);
1853                 e
1854             })?;
1855             serde_json::to_vec(&info)
1856                 .map(Some)
1857                 .map_err(VmError::SerializeJson)
1858         } else {
1859             // Update VmConfig by adding the new device.
1860             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1861             add_to_config(&mut config.net, net_cfg);
1862             Ok(None)
1863         }
1864     }
1865 
1866     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1867         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1868 
1869         {
1870             // Validate the configuration change in a cloned configuration
1871             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1872             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1873             config.validate().map_err(VmError::ConfigValidation)?;
1874         }
1875 
1876         if let Some(ref mut vm) = self.vm {
1877             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1878                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1879                 e
1880             })?;
1881             serde_json::to_vec(&info)
1882                 .map(Some)
1883                 .map_err(VmError::SerializeJson)
1884         } else {
1885             // Update VmConfig by adding the new device.
1886             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1887             add_to_config(&mut config.vdpa, vdpa_cfg);
1888             Ok(None)
1889         }
1890     }
1891 
1892     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1893         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1894 
1895         {
1896             // Validate the configuration change in a cloned configuration
1897             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1898 
1899             if config.vsock.is_some() {
1900                 return Err(VmError::TooManyVsockDevices);
1901             }
1902 
1903             config.vsock = Some(vsock_cfg.clone());
1904             config.validate().map_err(VmError::ConfigValidation)?;
1905         }
1906 
1907         if let Some(ref mut vm) = self.vm {
1908             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1909                 error!("Error when adding new vsock device to the VM: {:?}", e);
1910                 e
1911             })?;
1912             serde_json::to_vec(&info)
1913                 .map(Some)
1914                 .map_err(VmError::SerializeJson)
1915         } else {
1916             // Update VmConfig by adding the new device.
1917             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1918             config.vsock = Some(vsock_cfg);
1919             Ok(None)
1920         }
1921     }
1922 
1923     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1924         if let Some(ref mut vm) = self.vm {
1925             let info = vm.counters().map_err(|e| {
1926                 error!("Error when getting counters from the VM: {:?}", e);
1927                 e
1928             })?;
1929             serde_json::to_vec(&info)
1930                 .map(Some)
1931                 .map_err(VmError::SerializeJson)
1932         } else {
1933             Err(VmError::VmNotRunning)
1934         }
1935     }
1936 
1937     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1938         if let Some(ref mut vm) = self.vm {
1939             vm.power_button()
1940         } else {
1941             Err(VmError::VmNotRunning)
1942         }
1943     }
1944 
1945     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1946         if let Some(ref mut vm) = self.vm {
1947             vm.nmi()
1948         } else {
1949             Err(VmError::VmNotRunning)
1950         }
1951     }
1952 
1953     fn vm_receive_migration(
1954         &mut self,
1955         receive_data_migration: VmReceiveMigrationData,
1956     ) -> result::Result<(), MigratableError> {
1957         info!(
1958             "Receiving migration: receiver_url = {}",
1959             receive_data_migration.receiver_url
1960         );
1961 
1962         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1963         let listener = UnixListener::bind(&path).map_err(|e| {
1964             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1965         })?;
1966         let (mut socket, _addr) = listener.accept().map_err(|e| {
1967             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1968         })?;
1969         std::fs::remove_file(&path).map_err(|e| {
1970             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1971         })?;
1972 
1973         let mut started = false;
1974         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1975         let mut existing_memory_files = None;
1976         loop {
1977             let req = Request::read_from(&mut socket)?;
1978             match req.command() {
1979                 Command::Invalid => info!("Invalid Command Received"),
1980                 Command::Start => {
1981                     info!("Start Command Received");
1982                     started = true;
1983 
1984                     Response::ok().write_to(&mut socket)?;
1985                 }
1986                 Command::Config => {
1987                     info!("Config Command Received");
1988 
1989                     if !started {
1990                         warn!("Migration not started yet");
1991                         Response::error().write_to(&mut socket)?;
1992                         continue;
1993                     }
1994                     memory_manager = Some(self.vm_receive_config(
1995                         &req,
1996                         &mut socket,
1997                         existing_memory_files.take(),
1998                     )?);
1999                 }
2000                 Command::State => {
2001                     info!("State Command Received");
2002 
2003                     if !started {
2004                         warn!("Migration not started yet");
2005                         Response::error().write_to(&mut socket)?;
2006                         continue;
2007                     }
2008                     if let Some(mm) = memory_manager.take() {
2009                         self.vm_receive_state(&req, &mut socket, mm)?;
2010                     } else {
2011                         warn!("Configuration not sent yet");
2012                         Response::error().write_to(&mut socket)?;
2013                     }
2014                 }
2015                 Command::Memory => {
2016                     info!("Memory Command Received");
2017 
2018                     if !started {
2019                         warn!("Migration not started yet");
2020                         Response::error().write_to(&mut socket)?;
2021                         continue;
2022                     }
2023                     if let Some(mm) = memory_manager.as_ref() {
2024                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2025                     } else {
2026                         warn!("Configuration not sent yet");
2027                         Response::error().write_to(&mut socket)?;
2028                     }
2029                 }
2030                 Command::MemoryFd => {
2031                     info!("MemoryFd Command Received");
2032 
2033                     if !started {
2034                         warn!("Migration not started yet");
2035                         Response::error().write_to(&mut socket)?;
2036                         continue;
2037                     }
2038 
2039                     let mut buf = [0u8; 4];
2040                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2041                         MigratableError::MigrateReceive(anyhow!(
2042                             "Error receiving slot from socket: {}",
2043                             e
2044                         ))
2045                     })?;
2046 
2047                     if existing_memory_files.is_none() {
2048                         existing_memory_files = Some(HashMap::default())
2049                     }
2050 
2051                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2052                         let slot = u32::from_le_bytes(buf);
2053                         existing_memory_files.insert(slot, file.unwrap());
2054                     }
2055 
2056                     Response::ok().write_to(&mut socket)?;
2057                 }
2058                 Command::Complete => {
2059                     info!("Complete Command Received");
2060                     if let Some(ref mut vm) = self.vm.as_mut() {
2061                         vm.resume()?;
2062                         Response::ok().write_to(&mut socket)?;
2063                     } else {
2064                         warn!("VM not created yet");
2065                         Response::error().write_to(&mut socket)?;
2066                     }
2067                     break;
2068                 }
2069                 Command::Abandon => {
2070                     info!("Abandon Command Received");
2071                     self.vm = None;
2072                     self.vm_config = None;
2073                     Response::ok().write_to(&mut socket).ok();
2074                     break;
2075                 }
2076             }
2077         }
2078 
2079         Ok(())
2080     }
2081 
2082     fn vm_send_migration(
2083         &mut self,
2084         send_data_migration: VmSendMigrationData,
2085     ) -> result::Result<(), MigratableError> {
2086         info!(
2087             "Sending migration: destination_url = {}, local = {}",
2088             send_data_migration.destination_url, send_data_migration.local
2089         );
2090 
2091         if !self
2092             .vm_config
2093             .as_ref()
2094             .unwrap()
2095             .lock()
2096             .unwrap()
2097             .backed_by_shared_memory()
2098             && send_data_migration.local
2099         {
2100             return Err(MigratableError::MigrateSend(anyhow!(
2101                 "Local migration requires shared memory or hugepages enabled"
2102             )));
2103         }
2104 
2105         if let Some(vm) = self.vm.as_mut() {
2106             Self::send_migration(
2107                 vm,
2108                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2109                 self.hypervisor.clone(),
2110                 send_data_migration,
2111             )
2112             .map_err(|migration_err| {
2113                 error!("Migration failed: {:?}", migration_err);
2114 
2115                 // Stop logging dirty pages
2116                 if let Err(e) = vm.stop_dirty_log() {
2117                     return e;
2118                 }
2119 
2120                 if vm.get_state().unwrap() == VmState::Paused {
2121                     if let Err(e) = vm.resume() {
2122                         return e;
2123                     }
2124                 }
2125 
2126                 migration_err
2127             })?;
2128 
2129             // Shutdown the VM after the migration succeeded
2130             self.exit_evt.write(1).map_err(|e| {
2131                 MigratableError::MigrateSend(anyhow!(
2132                     "Failed shutting down the VM after migration: {:?}",
2133                     e
2134                 ))
2135             })
2136         } else {
2137             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2138         }
2139     }
2140 }
2141 
2142 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2143 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2144 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2145 
2146 #[cfg(test)]
2147 mod unit_tests {
2148     use config::{
2149         ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig,
2150         RngConfig,
2151     };
2152 
2153     use super::*;
2154     #[cfg(target_arch = "x86_64")]
2155     use crate::config::DebugConsoleConfig;
2156 
2157     fn create_dummy_vmm() -> Vmm {
2158         Vmm::new(
2159             VmmVersionInfo::new("dummy", "dummy"),
2160             EventFd::new(EFD_NONBLOCK).unwrap(),
2161             #[cfg(feature = "guest_debug")]
2162             EventFd::new(EFD_NONBLOCK).unwrap(),
2163             #[cfg(feature = "guest_debug")]
2164             EventFd::new(EFD_NONBLOCK).unwrap(),
2165             SeccompAction::Allow,
2166             hypervisor::new().unwrap(),
2167             EventFd::new(EFD_NONBLOCK).unwrap(),
2168         )
2169         .unwrap()
2170     }
2171 
2172     fn create_dummy_vm_config() -> Box<VmConfig> {
2173         Box::new(VmConfig {
2174             cpus: CpusConfig {
2175                 boot_vcpus: 1,
2176                 max_vcpus: 1,
2177                 topology: None,
2178                 kvm_hyperv: false,
2179                 max_phys_bits: 46,
2180                 affinity: None,
2181                 features: config::CpuFeatures::default(),
2182             },
2183             memory: MemoryConfig {
2184                 size: 536_870_912,
2185                 mergeable: false,
2186                 hotplug_method: HotplugMethod::Acpi,
2187                 hotplug_size: None,
2188                 hotplugged_size: None,
2189                 shared: true,
2190                 hugepages: false,
2191                 hugepage_size: None,
2192                 prefault: false,
2193                 zones: None,
2194                 thp: true,
2195             },
2196             payload: Some(PayloadConfig {
2197                 kernel: Some(PathBuf::from("/path/to/kernel")),
2198                 firmware: None,
2199                 cmdline: None,
2200                 initramfs: None,
2201                 #[cfg(feature = "igvm")]
2202                 igvm: None,
2203                 #[cfg(feature = "sev_snp")]
2204                 host_data: None,
2205             }),
2206             rate_limit_groups: None,
2207             disks: None,
2208             net: None,
2209             rng: RngConfig {
2210                 src: PathBuf::from("/dev/urandom"),
2211                 iommu: false,
2212             },
2213             balloon: None,
2214             fs: None,
2215             pmem: None,
2216             serial: ConsoleConfig {
2217                 file: None,
2218                 mode: ConsoleOutputMode::Null,
2219                 iommu: false,
2220                 socket: None,
2221             },
2222             console: ConsoleConfig {
2223                 file: None,
2224                 mode: ConsoleOutputMode::Tty,
2225                 iommu: false,
2226                 socket: None,
2227             },
2228             #[cfg(target_arch = "x86_64")]
2229             debug_console: DebugConsoleConfig::default(),
2230             devices: None,
2231             user_devices: None,
2232             vdpa: None,
2233             vsock: None,
2234             #[cfg(feature = "pvmemcontrol")]
2235             pvmemcontrol: None,
2236             pvpanic: false,
2237             iommu: false,
2238             #[cfg(target_arch = "x86_64")]
2239             sgx_epc: None,
2240             numa: None,
2241             watchdog: false,
2242             #[cfg(feature = "guest_debug")]
2243             gdb: false,
2244             pci_segments: None,
2245             platform: None,
2246             tpm: None,
2247             preserved_fds: None,
2248             landlock_enable: false,
2249             landlock_rules: None,
2250         })
2251     }
2252 
2253     #[test]
2254     fn test_vmm_vm_create() {
2255         let mut vmm = create_dummy_vmm();
2256         let config = create_dummy_vm_config();
2257 
2258         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2259         assert!(matches!(
2260             vmm.vm_create(config),
2261             Err(VmError::VmAlreadyCreated)
2262         ));
2263     }
2264 
2265     #[test]
2266     fn test_vmm_vm_cold_add_device() {
2267         let mut vmm = create_dummy_vmm();
2268         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2269 
2270         assert!(matches!(
2271             vmm.vm_add_device(device_config.clone()),
2272             Err(VmError::VmNotCreated)
2273         ));
2274 
2275         let _ = vmm.vm_create(create_dummy_vm_config());
2276         assert!(vmm
2277             .vm_config
2278             .as_ref()
2279             .unwrap()
2280             .lock()
2281             .unwrap()
2282             .devices
2283             .is_none());
2284 
2285         let result = vmm.vm_add_device(device_config.clone());
2286         assert!(result.is_ok());
2287         assert!(result.unwrap().is_none());
2288         assert_eq!(
2289             vmm.vm_config
2290                 .as_ref()
2291                 .unwrap()
2292                 .lock()
2293                 .unwrap()
2294                 .devices
2295                 .clone()
2296                 .unwrap()
2297                 .len(),
2298             1
2299         );
2300         assert_eq!(
2301             vmm.vm_config
2302                 .as_ref()
2303                 .unwrap()
2304                 .lock()
2305                 .unwrap()
2306                 .devices
2307                 .clone()
2308                 .unwrap()[0],
2309             device_config
2310         );
2311     }
2312 
2313     #[test]
2314     fn test_vmm_vm_cold_add_user_device() {
2315         let mut vmm = create_dummy_vmm();
2316         let user_device_config =
2317             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2318 
2319         assert!(matches!(
2320             vmm.vm_add_user_device(user_device_config.clone()),
2321             Err(VmError::VmNotCreated)
2322         ));
2323 
2324         let _ = vmm.vm_create(create_dummy_vm_config());
2325         assert!(vmm
2326             .vm_config
2327             .as_ref()
2328             .unwrap()
2329             .lock()
2330             .unwrap()
2331             .user_devices
2332             .is_none());
2333 
2334         let result = vmm.vm_add_user_device(user_device_config.clone());
2335         assert!(result.is_ok());
2336         assert!(result.unwrap().is_none());
2337         assert_eq!(
2338             vmm.vm_config
2339                 .as_ref()
2340                 .unwrap()
2341                 .lock()
2342                 .unwrap()
2343                 .user_devices
2344                 .clone()
2345                 .unwrap()
2346                 .len(),
2347             1
2348         );
2349         assert_eq!(
2350             vmm.vm_config
2351                 .as_ref()
2352                 .unwrap()
2353                 .lock()
2354                 .unwrap()
2355                 .user_devices
2356                 .clone()
2357                 .unwrap()[0],
2358             user_device_config
2359         );
2360     }
2361 
2362     #[test]
2363     fn test_vmm_vm_cold_add_disk() {
2364         let mut vmm = create_dummy_vmm();
2365         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2366 
2367         assert!(matches!(
2368             vmm.vm_add_disk(disk_config.clone()),
2369             Err(VmError::VmNotCreated)
2370         ));
2371 
2372         let _ = vmm.vm_create(create_dummy_vm_config());
2373         assert!(vmm
2374             .vm_config
2375             .as_ref()
2376             .unwrap()
2377             .lock()
2378             .unwrap()
2379             .disks
2380             .is_none());
2381 
2382         let result = vmm.vm_add_disk(disk_config.clone());
2383         assert!(result.is_ok());
2384         assert!(result.unwrap().is_none());
2385         assert_eq!(
2386             vmm.vm_config
2387                 .as_ref()
2388                 .unwrap()
2389                 .lock()
2390                 .unwrap()
2391                 .disks
2392                 .clone()
2393                 .unwrap()
2394                 .len(),
2395             1
2396         );
2397         assert_eq!(
2398             vmm.vm_config
2399                 .as_ref()
2400                 .unwrap()
2401                 .lock()
2402                 .unwrap()
2403                 .disks
2404                 .clone()
2405                 .unwrap()[0],
2406             disk_config
2407         );
2408     }
2409 
2410     #[test]
2411     fn test_vmm_vm_cold_add_fs() {
2412         let mut vmm = create_dummy_vmm();
2413         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2414 
2415         assert!(matches!(
2416             vmm.vm_add_fs(fs_config.clone()),
2417             Err(VmError::VmNotCreated)
2418         ));
2419 
2420         let _ = vmm.vm_create(create_dummy_vm_config());
2421         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2422 
2423         let result = vmm.vm_add_fs(fs_config.clone());
2424         assert!(result.is_ok());
2425         assert!(result.unwrap().is_none());
2426         assert_eq!(
2427             vmm.vm_config
2428                 .as_ref()
2429                 .unwrap()
2430                 .lock()
2431                 .unwrap()
2432                 .fs
2433                 .clone()
2434                 .unwrap()
2435                 .len(),
2436             1
2437         );
2438         assert_eq!(
2439             vmm.vm_config
2440                 .as_ref()
2441                 .unwrap()
2442                 .lock()
2443                 .unwrap()
2444                 .fs
2445                 .clone()
2446                 .unwrap()[0],
2447             fs_config
2448         );
2449     }
2450 
2451     #[test]
2452     fn test_vmm_vm_cold_add_pmem() {
2453         let mut vmm = create_dummy_vmm();
2454         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2455 
2456         assert!(matches!(
2457             vmm.vm_add_pmem(pmem_config.clone()),
2458             Err(VmError::VmNotCreated)
2459         ));
2460 
2461         let _ = vmm.vm_create(create_dummy_vm_config());
2462         assert!(vmm
2463             .vm_config
2464             .as_ref()
2465             .unwrap()
2466             .lock()
2467             .unwrap()
2468             .pmem
2469             .is_none());
2470 
2471         let result = vmm.vm_add_pmem(pmem_config.clone());
2472         assert!(result.is_ok());
2473         assert!(result.unwrap().is_none());
2474         assert_eq!(
2475             vmm.vm_config
2476                 .as_ref()
2477                 .unwrap()
2478                 .lock()
2479                 .unwrap()
2480                 .pmem
2481                 .clone()
2482                 .unwrap()
2483                 .len(),
2484             1
2485         );
2486         assert_eq!(
2487             vmm.vm_config
2488                 .as_ref()
2489                 .unwrap()
2490                 .lock()
2491                 .unwrap()
2492                 .pmem
2493                 .clone()
2494                 .unwrap()[0],
2495             pmem_config
2496         );
2497     }
2498 
2499     #[test]
2500     fn test_vmm_vm_cold_add_net() {
2501         let mut vmm = create_dummy_vmm();
2502         let net_config = NetConfig::parse(
2503             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2504         )
2505         .unwrap();
2506 
2507         assert!(matches!(
2508             vmm.vm_add_net(net_config.clone()),
2509             Err(VmError::VmNotCreated)
2510         ));
2511 
2512         let _ = vmm.vm_create(create_dummy_vm_config());
2513         assert!(vmm
2514             .vm_config
2515             .as_ref()
2516             .unwrap()
2517             .lock()
2518             .unwrap()
2519             .net
2520             .is_none());
2521 
2522         let result = vmm.vm_add_net(net_config.clone());
2523         assert!(result.is_ok());
2524         assert!(result.unwrap().is_none());
2525         assert_eq!(
2526             vmm.vm_config
2527                 .as_ref()
2528                 .unwrap()
2529                 .lock()
2530                 .unwrap()
2531                 .net
2532                 .clone()
2533                 .unwrap()
2534                 .len(),
2535             1
2536         );
2537         assert_eq!(
2538             vmm.vm_config
2539                 .as_ref()
2540                 .unwrap()
2541                 .lock()
2542                 .unwrap()
2543                 .net
2544                 .clone()
2545                 .unwrap()[0],
2546             net_config
2547         );
2548     }
2549 
2550     #[test]
2551     fn test_vmm_vm_cold_add_vdpa() {
2552         let mut vmm = create_dummy_vmm();
2553         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2554 
2555         assert!(matches!(
2556             vmm.vm_add_vdpa(vdpa_config.clone()),
2557             Err(VmError::VmNotCreated)
2558         ));
2559 
2560         let _ = vmm.vm_create(create_dummy_vm_config());
2561         assert!(vmm
2562             .vm_config
2563             .as_ref()
2564             .unwrap()
2565             .lock()
2566             .unwrap()
2567             .vdpa
2568             .is_none());
2569 
2570         let result = vmm.vm_add_vdpa(vdpa_config.clone());
2571         assert!(result.is_ok());
2572         assert!(result.unwrap().is_none());
2573         assert_eq!(
2574             vmm.vm_config
2575                 .as_ref()
2576                 .unwrap()
2577                 .lock()
2578                 .unwrap()
2579                 .vdpa
2580                 .clone()
2581                 .unwrap()
2582                 .len(),
2583             1
2584         );
2585         assert_eq!(
2586             vmm.vm_config
2587                 .as_ref()
2588                 .unwrap()
2589                 .lock()
2590                 .unwrap()
2591                 .vdpa
2592                 .clone()
2593                 .unwrap()[0],
2594             vdpa_config
2595         );
2596     }
2597 
2598     #[test]
2599     fn test_vmm_vm_cold_add_vsock() {
2600         let mut vmm = create_dummy_vmm();
2601         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2602 
2603         assert!(matches!(
2604             vmm.vm_add_vsock(vsock_config.clone()),
2605             Err(VmError::VmNotCreated)
2606         ));
2607 
2608         let _ = vmm.vm_create(create_dummy_vm_config());
2609         assert!(vmm
2610             .vm_config
2611             .as_ref()
2612             .unwrap()
2613             .lock()
2614             .unwrap()
2615             .vsock
2616             .is_none());
2617 
2618         let result = vmm.vm_add_vsock(vsock_config.clone());
2619         assert!(result.is_ok());
2620         assert!(result.unwrap().is_none());
2621         assert_eq!(
2622             vmm.vm_config
2623                 .as_ref()
2624                 .unwrap()
2625                 .lock()
2626                 .unwrap()
2627                 .vsock
2628                 .clone()
2629                 .unwrap(),
2630             vsock_config
2631         );
2632     }
2633 }
2634