xref: /cloud-hypervisor/vmm/src/lib.rs (revision 894a4dee6ec35bdd9df848a3a88bf5b96771c943)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use std::collections::HashMap;
12 use std::fs::File;
13 use std::io::{stdout, Read, Write};
14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
15 use std::os::unix::net::{UnixListener, UnixStream};
16 use std::panic::AssertUnwindSafe;
17 use std::path::PathBuf;
18 use std::rc::Rc;
19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
20 use std::sync::{Arc, Mutex};
21 use std::time::Instant;
22 use std::{io, result, thread};
23 
24 use anyhow::anyhow;
25 #[cfg(feature = "dbus_api")]
26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
27 use api::http::HttpApiHandle;
28 use console_devices::{pre_create_console_devices, ConsoleInfo};
29 use landlock::LandlockError;
30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
31 use memory_manager::MemoryManagerSnapshotData;
32 use pci::PciBdf;
33 use seccompiler::{apply_filter, SeccompAction};
34 use serde::ser::{SerializeStruct, Serializer};
35 use serde::{Deserialize, Serialize};
36 use signal_hook::iterator::{Handle, Signals};
37 use thiserror::Error;
38 use tracer::trace_scoped;
39 use vm_memory::bitmap::AtomicBitmap;
40 use vm_memory::{ReadVolatile, WriteVolatile};
41 use vm_migration::protocol::*;
42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
43 use vmm_sys_util::eventfd::EventFd;
44 use vmm_sys_util::signal::unblock_signal;
45 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
46 
47 use crate::api::{
48     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
49     VmSendMigrationData, VmmPingResponse,
50 };
51 use crate::config::{add_to_config, RestoreConfig};
52 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
53 use crate::coredump::GuestDebuggable;
54 use crate::landlock::Landlock;
55 use crate::memory_manager::MemoryManager;
56 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
57 use crate::migration::get_vm_snapshot;
58 use crate::migration::{recv_vm_config, recv_vm_state};
59 use crate::seccomp_filters::{get_seccomp_filter, Thread};
60 use crate::vm::{Error as VmError, Vm, VmState};
61 use crate::vm_config::{
62     DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
63     VmConfig, VsockConfig,
64 };
65 
66 mod acpi;
67 pub mod api;
68 mod clone3;
69 pub mod config;
70 pub mod console_devices;
71 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
72 mod coredump;
73 pub mod cpu;
74 pub mod device_manager;
75 pub mod device_tree;
76 #[cfg(feature = "guest_debug")]
77 mod gdb;
78 #[cfg(feature = "igvm")]
79 mod igvm;
80 pub mod interrupt;
81 pub mod landlock;
82 pub mod memory_manager;
83 pub mod migration;
84 mod pci_segment;
85 pub mod seccomp_filters;
86 mod serial_manager;
87 mod sigwinch_listener;
88 pub mod vm;
89 pub mod vm_config;
90 
91 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
92 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
93 
94 /// Errors associated with VMM management
95 #[derive(Debug, Error)]
96 pub enum Error {
97     /// API request receive error
98     #[error("Error receiving API request: {0}")]
99     ApiRequestRecv(#[source] RecvError),
100 
101     /// API response send error
102     #[error("Error sending API request: {0}")]
103     ApiResponseSend(#[source] SendError<ApiResponse>),
104 
105     /// Cannot bind to the UNIX domain socket path
106     #[error("Error binding to UNIX domain socket: {0}")]
107     Bind(#[source] io::Error),
108 
109     /// Cannot clone EventFd.
110     #[error("Error cloning EventFd: {0}")]
111     EventFdClone(#[source] io::Error),
112 
113     /// Cannot create EventFd.
114     #[error("Error creating EventFd: {0}")]
115     EventFdCreate(#[source] io::Error),
116 
117     /// Cannot read from EventFd.
118     #[error("Error reading from EventFd: {0}")]
119     EventFdRead(#[source] io::Error),
120 
121     /// Cannot create epoll context.
122     #[error("Error creating epoll context: {0}")]
123     Epoll(#[source] io::Error),
124 
125     /// Cannot create HTTP thread
126     #[error("Error spawning HTTP thread: {0}")]
127     HttpThreadSpawn(#[source] io::Error),
128 
129     /// Cannot create D-Bus thread
130     #[cfg(feature = "dbus_api")]
131     #[error("Error spawning D-Bus thread: {0}")]
132     DBusThreadSpawn(#[source] io::Error),
133 
134     /// Cannot start D-Bus session
135     #[cfg(feature = "dbus_api")]
136     #[error("Error starting D-Bus session: {0}")]
137     CreateDBusSession(#[source] zbus::Error),
138 
139     /// Cannot create `event-monitor` thread
140     #[error("Error spawning `event-monitor` thread: {0}")]
141     EventMonitorThreadSpawn(#[source] io::Error),
142 
143     /// Cannot handle the VM STDIN stream
144     #[error("Error handling VM stdin: {0:?}")]
145     Stdin(VmError),
146 
147     /// Cannot handle the VM pty stream
148     #[error("Error handling VM pty: {0:?}")]
149     Pty(VmError),
150 
151     /// Cannot reboot the VM
152     #[error("Error rebooting VM: {0:?}")]
153     VmReboot(VmError),
154 
155     /// Cannot create VMM thread
156     #[error("Error spawning VMM thread {0:?}")]
157     VmmThreadSpawn(#[source] io::Error),
158 
159     /// Cannot shut the VMM down
160     #[error("Error shutting down VMM: {0:?}")]
161     VmmShutdown(VmError),
162 
163     /// Cannot create seccomp filter
164     #[error("Error creating seccomp filter: {0}")]
165     CreateSeccompFilter(seccompiler::Error),
166 
167     /// Cannot apply seccomp filter
168     #[error("Error applying seccomp filter: {0}")]
169     ApplySeccompFilter(seccompiler::Error),
170 
171     /// Error activating virtio devices
172     #[error("Error activating virtio devices: {0:?}")]
173     ActivateVirtioDevices(VmError),
174 
175     /// Error creating API server
176     #[error("Error creating API server {0:?}")]
177     CreateApiServer(micro_http::ServerError),
178 
179     /// Error binding API server socket
180     #[error("Error creation API server's socket {0:?}")]
181     CreateApiServerSocket(#[source] io::Error),
182 
183     #[cfg(feature = "guest_debug")]
184     #[error("Failed to start the GDB thread: {0}")]
185     GdbThreadSpawn(io::Error),
186 
187     /// GDB request receive error
188     #[cfg(feature = "guest_debug")]
189     #[error("Error receiving GDB request: {0}")]
190     GdbRequestRecv(#[source] RecvError),
191 
192     /// GDB response send error
193     #[cfg(feature = "guest_debug")]
194     #[error("Error sending GDB request: {0}")]
195     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
196 
197     #[error("Cannot spawn a signal handler thread: {0}")]
198     SignalHandlerSpawn(#[source] io::Error),
199 
200     #[error("Failed to join on threads: {0:?}")]
201     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
202 
203     /// Cannot create Landlock object
204     #[error("Error creating landlock object: {0}")]
205     CreateLandlock(LandlockError),
206 
207     /// Cannot apply landlock based sandboxing
208     #[error("Error applying landlock: {0}")]
209     ApplyLandlock(LandlockError),
210 }
211 pub type Result<T> = result::Result<T, Error>;
212 
213 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
214 #[repr(u64)]
215 pub enum EpollDispatch {
216     Exit = 0,
217     Reset = 1,
218     Api = 2,
219     ActivateVirtioDevices = 3,
220     Debug = 4,
221     Unknown,
222 }
223 
224 impl From<u64> for EpollDispatch {
225     fn from(v: u64) -> Self {
226         use EpollDispatch::*;
227         match v {
228             0 => Exit,
229             1 => Reset,
230             2 => Api,
231             3 => ActivateVirtioDevices,
232             4 => Debug,
233             _ => Unknown,
234         }
235     }
236 }
237 
238 pub struct EpollContext {
239     epoll_file: File,
240 }
241 
242 impl EpollContext {
243     pub fn new() -> result::Result<EpollContext, io::Error> {
244         let epoll_fd = epoll::create(true)?;
245         // Use 'File' to enforce closing on 'epoll_fd'
246         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
247         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
248 
249         Ok(EpollContext { epoll_file })
250     }
251 
252     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
253     where
254         T: AsRawFd,
255     {
256         let dispatch_index = token as u64;
257         epoll::ctl(
258             self.epoll_file.as_raw_fd(),
259             epoll::ControlOptions::EPOLL_CTL_ADD,
260             fd.as_raw_fd(),
261             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
262         )?;
263 
264         Ok(())
265     }
266 
267     #[cfg(fuzzing)]
268     pub fn add_event_custom<T>(
269         &mut self,
270         fd: &T,
271         id: u64,
272         evts: epoll::Events,
273     ) -> result::Result<(), io::Error>
274     where
275         T: AsRawFd,
276     {
277         epoll::ctl(
278             self.epoll_file.as_raw_fd(),
279             epoll::ControlOptions::EPOLL_CTL_ADD,
280             fd.as_raw_fd(),
281             epoll::Event::new(evts, id),
282         )?;
283 
284         Ok(())
285     }
286 }
287 
288 impl AsRawFd for EpollContext {
289     fn as_raw_fd(&self) -> RawFd {
290         self.epoll_file.as_raw_fd()
291     }
292 }
293 
294 pub struct PciDeviceInfo {
295     pub id: String,
296     pub bdf: PciBdf,
297 }
298 
299 impl Serialize for PciDeviceInfo {
300     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
301     where
302         S: Serializer,
303     {
304         let bdf_str = self.bdf.to_string();
305 
306         // Serialize the structure.
307         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
308         state.serialize_field("id", &self.id)?;
309         state.serialize_field("bdf", &bdf_str)?;
310         state.end()
311     }
312 }
313 
314 pub fn feature_list() -> Vec<String> {
315     vec![
316         #[cfg(feature = "dbus_api")]
317         "dbus_api".to_string(),
318         #[cfg(feature = "dhat-heap")]
319         "dhat-heap".to_string(),
320         #[cfg(feature = "guest_debug")]
321         "guest_debug".to_string(),
322         #[cfg(feature = "igvm")]
323         "igvm".to_string(),
324         #[cfg(feature = "io_uring")]
325         "io_uring".to_string(),
326         #[cfg(feature = "kvm")]
327         "kvm".to_string(),
328         #[cfg(feature = "mshv")]
329         "mshv".to_string(),
330         #[cfg(feature = "sev_snp")]
331         "sev_snp".to_string(),
332         #[cfg(feature = "tdx")]
333         "tdx".to_string(),
334         #[cfg(feature = "tracing")]
335         "tracing".to_string(),
336     ]
337 }
338 
339 pub fn start_event_monitor_thread(
340     mut monitor: event_monitor::Monitor,
341     seccomp_action: &SeccompAction,
342     landlock_enable: bool,
343     hypervisor_type: hypervisor::HypervisorType,
344     exit_event: EventFd,
345 ) -> Result<thread::JoinHandle<Result<()>>> {
346     // Retrieve seccomp filter
347     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
348         .map_err(Error::CreateSeccompFilter)?;
349 
350     thread::Builder::new()
351         .name("event-monitor".to_owned())
352         .spawn(move || {
353             // Apply seccomp filter
354             if !seccomp_filter.is_empty() {
355                 apply_filter(&seccomp_filter)
356                     .map_err(Error::ApplySeccompFilter)
357                     .map_err(|e| {
358                         error!("Error applying seccomp filter: {:?}", e);
359                         exit_event.write(1).ok();
360                         e
361                     })?;
362             }
363             if landlock_enable {
364                 Landlock::new()
365                     .map_err(Error::CreateLandlock)?
366                     .restrict_self()
367                     .map_err(Error::ApplyLandlock)
368                     .map_err(|e| {
369                         error!("Error applying landlock to event monitor thread: {:?}", e);
370                         exit_event.write(1).ok();
371                         e
372                     })?;
373             }
374 
375             std::panic::catch_unwind(AssertUnwindSafe(move || {
376                 while let Ok(event) = monitor.rx.recv() {
377                     let event = Arc::new(event);
378 
379                     if let Some(ref mut file) = monitor.file {
380                         file.write_all(event.as_bytes().as_ref()).ok();
381                         file.write_all(b"\n\n").ok();
382                     }
383 
384                     for tx in monitor.broadcast.iter() {
385                         tx.send(event.clone()).ok();
386                     }
387                 }
388             }))
389             .map_err(|_| {
390                 error!("`event-monitor` thread panicked");
391                 exit_event.write(1).ok();
392             })
393             .ok();
394 
395             Ok(())
396         })
397         .map_err(Error::EventMonitorThreadSpawn)
398 }
399 
400 #[allow(unused_variables)]
401 #[allow(clippy::too_many_arguments)]
402 pub fn start_vmm_thread(
403     vmm_version: VmmVersionInfo,
404     http_path: &Option<String>,
405     http_fd: Option<RawFd>,
406     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
407     api_event: EventFd,
408     api_sender: Sender<ApiRequest>,
409     api_receiver: Receiver<ApiRequest>,
410     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
411     #[cfg(feature = "guest_debug")] debug_event: EventFd,
412     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
413     exit_event: EventFd,
414     seccomp_action: &SeccompAction,
415     hypervisor: Arc<dyn hypervisor::Hypervisor>,
416     landlock_enable: bool,
417 ) -> Result<VmmThreadHandle> {
418     #[cfg(feature = "guest_debug")]
419     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
420     #[cfg(feature = "guest_debug")]
421     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
422     #[cfg(feature = "guest_debug")]
423     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
424     #[cfg(feature = "guest_debug")]
425     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
426 
427     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
428     let hypervisor_type = hypervisor.hypervisor_type();
429 
430     // Retrieve seccomp filter
431     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
432         .map_err(Error::CreateSeccompFilter)?;
433 
434     let vmm_seccomp_action = seccomp_action.clone();
435     let thread = {
436         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
437         thread::Builder::new()
438             .name("vmm".to_string())
439             .spawn(move || {
440                 // Apply seccomp filter for VMM thread.
441                 if !vmm_seccomp_filter.is_empty() {
442                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
443                 }
444 
445                 let mut vmm = Vmm::new(
446                     vmm_version,
447                     api_event,
448                     #[cfg(feature = "guest_debug")]
449                     debug_event,
450                     #[cfg(feature = "guest_debug")]
451                     vm_debug_event,
452                     vmm_seccomp_action,
453                     hypervisor,
454                     exit_event,
455                 )?;
456 
457                 vmm.setup_signal_handler(landlock_enable)?;
458 
459                 vmm.control_loop(
460                     Rc::new(api_receiver),
461                     #[cfg(feature = "guest_debug")]
462                     Rc::new(gdb_receiver),
463                 )
464             })
465             .map_err(Error::VmmThreadSpawn)?
466     };
467 
468     // The VMM thread is started, we can start the dbus thread
469     // and start serving HTTP requests
470     #[cfg(feature = "dbus_api")]
471     let dbus_shutdown_chs = match dbus_options {
472         Some(opts) => {
473             let (_, chs) = api::start_dbus_thread(
474                 opts,
475                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
476                 api_sender.clone(),
477                 seccomp_action,
478                 exit_event.try_clone().map_err(Error::EventFdClone)?,
479                 hypervisor_type,
480             )?;
481             Some(chs)
482         }
483         None => None,
484     };
485 
486     let http_api_handle = if let Some(http_path) = http_path {
487         Some(api::start_http_path_thread(
488             http_path,
489             api_event_clone,
490             api_sender,
491             seccomp_action,
492             exit_event,
493             hypervisor_type,
494             landlock_enable,
495         )?)
496     } else if let Some(http_fd) = http_fd {
497         Some(api::start_http_fd_thread(
498             http_fd,
499             api_event_clone,
500             api_sender,
501             seccomp_action,
502             exit_event,
503             hypervisor_type,
504             landlock_enable,
505         )?)
506     } else {
507         None
508     };
509 
510     #[cfg(feature = "guest_debug")]
511     if let Some(debug_path) = debug_path {
512         let target = gdb::GdbStub::new(
513             gdb_sender,
514             gdb_debug_event,
515             gdb_vm_debug_event,
516             gdb_hw_breakpoints,
517         );
518         thread::Builder::new()
519             .name("gdb".to_owned())
520             .spawn(move || gdb::gdb_thread(target, &debug_path))
521             .map_err(Error::GdbThreadSpawn)?;
522     }
523 
524     Ok(VmmThreadHandle {
525         thread_handle: thread,
526         #[cfg(feature = "dbus_api")]
527         dbus_shutdown_chs,
528         http_api_handle,
529     })
530 }
531 
532 #[derive(Clone, Deserialize, Serialize)]
533 struct VmMigrationConfig {
534     vm_config: Arc<Mutex<VmConfig>>,
535     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
536     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
537     memory_manager_data: MemoryManagerSnapshotData,
538 }
539 
540 #[derive(Debug, Clone)]
541 pub struct VmmVersionInfo {
542     pub build_version: String,
543     pub version: String,
544 }
545 
546 impl VmmVersionInfo {
547     pub fn new(build_version: &str, version: &str) -> Self {
548         Self {
549             build_version: build_version.to_owned(),
550             version: version.to_owned(),
551         }
552     }
553 }
554 
555 pub struct VmmThreadHandle {
556     pub thread_handle: thread::JoinHandle<Result<()>>,
557     #[cfg(feature = "dbus_api")]
558     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
559     pub http_api_handle: Option<HttpApiHandle>,
560 }
561 
562 pub struct Vmm {
563     epoll: EpollContext,
564     exit_evt: EventFd,
565     reset_evt: EventFd,
566     api_evt: EventFd,
567     #[cfg(feature = "guest_debug")]
568     debug_evt: EventFd,
569     #[cfg(feature = "guest_debug")]
570     vm_debug_evt: EventFd,
571     version: VmmVersionInfo,
572     vm: Option<Vm>,
573     vm_config: Option<Arc<Mutex<VmConfig>>>,
574     seccomp_action: SeccompAction,
575     hypervisor: Arc<dyn hypervisor::Hypervisor>,
576     activate_evt: EventFd,
577     signals: Option<Handle>,
578     threads: Vec<thread::JoinHandle<()>>,
579     original_termios_opt: Arc<Mutex<Option<termios>>>,
580     console_resize_pipe: Option<Arc<File>>,
581     console_info: Option<ConsoleInfo>,
582 }
583 
584 impl Vmm {
585     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
586 
587     fn signal_handler(
588         mut signals: Signals,
589         original_termios_opt: Arc<Mutex<Option<termios>>>,
590         exit_evt: &EventFd,
591     ) {
592         for sig in &Self::HANDLED_SIGNALS {
593             unblock_signal(*sig).unwrap();
594         }
595 
596         for signal in signals.forever() {
597             match signal {
598                 SIGTERM | SIGINT => {
599                     if exit_evt.write(1).is_err() {
600                         // Resetting the terminal is usually done as the VMM exits
601                         if let Ok(lock) = original_termios_opt.lock() {
602                             if let Some(termios) = *lock {
603                                 // SAFETY: FFI call
604                                 let _ = unsafe {
605                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
606                                 };
607                             }
608                         } else {
609                             warn!("Failed to lock original termios");
610                         }
611 
612                         std::process::exit(1);
613                     }
614                 }
615                 _ => (),
616             }
617         }
618     }
619 
620     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
621         let signals = Signals::new(Self::HANDLED_SIGNALS);
622         match signals {
623             Ok(signals) => {
624                 self.signals = Some(signals.handle());
625                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
626                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
627 
628                 let signal_handler_seccomp_filter = get_seccomp_filter(
629                     &self.seccomp_action,
630                     Thread::SignalHandler,
631                     self.hypervisor.hypervisor_type(),
632                 )
633                 .map_err(Error::CreateSeccompFilter)?;
634                 self.threads.push(
635                     thread::Builder::new()
636                         .name("vmm_signal_handler".to_string())
637                         .spawn(move || {
638                             if !signal_handler_seccomp_filter.is_empty() {
639                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
640                                     .map_err(Error::ApplySeccompFilter)
641                                 {
642                                     error!("Error applying seccomp filter: {:?}", e);
643                                     exit_evt.write(1).ok();
644                                     return;
645                                 }
646                             }
647                             if landlock_enable{
648                                 match Landlock::new() {
649                                     Ok(landlock) => {
650                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
651                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
652                                             exit_evt.write(1).ok();
653                                         });
654                                     }
655                                     Err(e) => {
656                                         error!("Error creating Landlock object: {:?}", e);
657                                         exit_evt.write(1).ok();
658                                     }
659                                 };
660                             }
661 
662                             std::panic::catch_unwind(AssertUnwindSafe(|| {
663                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
664                             }))
665                             .map_err(|_| {
666                                 error!("vmm signal_handler thread panicked");
667                                 exit_evt.write(1).ok()
668                             })
669                             .ok();
670                         })
671                         .map_err(Error::SignalHandlerSpawn)?,
672                 );
673             }
674             Err(e) => error!("Signal not found {}", e),
675         }
676         Ok(())
677     }
678 
679     #[allow(clippy::too_many_arguments)]
680     fn new(
681         vmm_version: VmmVersionInfo,
682         api_evt: EventFd,
683         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
684         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
685         seccomp_action: SeccompAction,
686         hypervisor: Arc<dyn hypervisor::Hypervisor>,
687         exit_evt: EventFd,
688     ) -> Result<Self> {
689         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
690         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
692 
693         epoll
694             .add_event(&exit_evt, EpollDispatch::Exit)
695             .map_err(Error::Epoll)?;
696 
697         epoll
698             .add_event(&reset_evt, EpollDispatch::Reset)
699             .map_err(Error::Epoll)?;
700 
701         epoll
702             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
703             .map_err(Error::Epoll)?;
704 
705         epoll
706             .add_event(&api_evt, EpollDispatch::Api)
707             .map_err(Error::Epoll)?;
708 
709         #[cfg(feature = "guest_debug")]
710         epoll
711             .add_event(&debug_evt, EpollDispatch::Debug)
712             .map_err(Error::Epoll)?;
713 
714         Ok(Vmm {
715             epoll,
716             exit_evt,
717             reset_evt,
718             api_evt,
719             #[cfg(feature = "guest_debug")]
720             debug_evt,
721             #[cfg(feature = "guest_debug")]
722             vm_debug_evt,
723             version: vmm_version,
724             vm: None,
725             vm_config: None,
726             seccomp_action,
727             hypervisor,
728             activate_evt,
729             signals: None,
730             threads: vec![],
731             original_termios_opt: Arc::new(Mutex::new(None)),
732             console_resize_pipe: None,
733             console_info: None,
734         })
735     }
736 
737     fn vm_receive_config<T>(
738         &mut self,
739         req: &Request,
740         socket: &mut T,
741         existing_memory_files: Option<HashMap<u32, File>>,
742     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
743     where
744         T: Read + Write,
745     {
746         // Read in config data along with memory manager data
747         let mut data: Vec<u8> = Vec::new();
748         data.resize_with(req.length() as usize, Default::default);
749         socket
750             .read_exact(&mut data)
751             .map_err(MigratableError::MigrateSocket)?;
752 
753         let vm_migration_config: VmMigrationConfig =
754             serde_json::from_slice(&data).map_err(|e| {
755                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
756             })?;
757 
758         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
759         self.vm_check_cpuid_compatibility(
760             &vm_migration_config.vm_config,
761             &vm_migration_config.common_cpuid,
762         )?;
763 
764         let config = vm_migration_config.vm_config.clone();
765         self.vm_config = Some(vm_migration_config.vm_config);
766         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
767             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
768         })?);
769 
770         if self
771             .vm_config
772             .as_ref()
773             .unwrap()
774             .lock()
775             .unwrap()
776             .landlock_enable
777         {
778             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
779                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
780             })?;
781         }
782 
783         let vm = Vm::create_hypervisor_vm(
784             &self.hypervisor,
785             #[cfg(feature = "tdx")]
786             false,
787             #[cfg(feature = "sev_snp")]
788             false,
789         )
790         .map_err(|e| {
791             MigratableError::MigrateReceive(anyhow!(
792                 "Error creating hypervisor VM from snapshot: {:?}",
793                 e
794             ))
795         })?;
796 
797         let phys_bits =
798             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
799 
800         let memory_manager = MemoryManager::new(
801             vm,
802             &config.lock().unwrap().memory.clone(),
803             None,
804             phys_bits,
805             #[cfg(feature = "tdx")]
806             false,
807             Some(&vm_migration_config.memory_manager_data),
808             existing_memory_files,
809             #[cfg(target_arch = "x86_64")]
810             None,
811         )
812         .map_err(|e| {
813             MigratableError::MigrateReceive(anyhow!(
814                 "Error creating MemoryManager from snapshot: {:?}",
815                 e
816             ))
817         })?;
818 
819         Response::ok().write_to(socket)?;
820 
821         Ok(memory_manager)
822     }
823 
824     fn vm_receive_state<T>(
825         &mut self,
826         req: &Request,
827         socket: &mut T,
828         mm: Arc<Mutex<MemoryManager>>,
829     ) -> std::result::Result<(), MigratableError>
830     where
831         T: Read + Write,
832     {
833         // Read in state data
834         let mut data: Vec<u8> = Vec::new();
835         data.resize_with(req.length() as usize, Default::default);
836         socket
837             .read_exact(&mut data)
838             .map_err(MigratableError::MigrateSocket)?;
839         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
840             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
841         })?;
842 
843         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
844             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
845         })?;
846         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
847             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
848         })?;
849         #[cfg(feature = "guest_debug")]
850         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
851             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
852         })?;
853         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
854             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
855         })?;
856 
857         let timestamp = Instant::now();
858         let hypervisor_vm = mm.lock().unwrap().vm.clone();
859         let mut vm = Vm::new_from_memory_manager(
860             self.vm_config.clone().unwrap(),
861             mm,
862             hypervisor_vm,
863             exit_evt,
864             reset_evt,
865             #[cfg(feature = "guest_debug")]
866             debug_evt,
867             &self.seccomp_action,
868             self.hypervisor.clone(),
869             activate_evt,
870             timestamp,
871             self.console_info.clone(),
872             self.console_resize_pipe.clone(),
873             Arc::clone(&self.original_termios_opt),
874             Some(snapshot),
875         )
876         .map_err(|e| {
877             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
878         })?;
879 
880         // Create VM
881         vm.restore().map_err(|e| {
882             Response::error().write_to(socket).ok();
883             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
884         })?;
885         self.vm = Some(vm);
886 
887         Response::ok().write_to(socket)?;
888 
889         Ok(())
890     }
891 
892     fn vm_receive_memory<T>(
893         &mut self,
894         req: &Request,
895         socket: &mut T,
896         memory_manager: &mut MemoryManager,
897     ) -> std::result::Result<(), MigratableError>
898     where
899         T: Read + ReadVolatile + Write,
900     {
901         // Read table
902         let table = MemoryRangeTable::read_from(socket, req.length())?;
903 
904         // And then read the memory itself
905         memory_manager
906             .receive_memory_regions(&table, socket)
907             .inspect_err(|_| {
908                 Response::error().write_to(socket).ok();
909             })?;
910         Response::ok().write_to(socket)?;
911         Ok(())
912     }
913 
914     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
915         url.strip_prefix("unix:")
916             .ok_or_else(|| {
917                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
918             })
919             .map(|s| s.into())
920     }
921 
922     // Returns true if there were dirty pages to send
923     fn vm_maybe_send_dirty_pages<T>(
924         vm: &mut Vm,
925         socket: &mut T,
926     ) -> result::Result<bool, MigratableError>
927     where
928         T: Read + Write + WriteVolatile,
929     {
930         // Send (dirty) memory table
931         let table = vm.dirty_log()?;
932 
933         // But if there are no regions go straight to pause
934         if table.regions().is_empty() {
935             return Ok(false);
936         }
937 
938         Request::memory(table.length()).write_to(socket).unwrap();
939         table.write_to(socket)?;
940         // And then the memory itself
941         vm.send_memory_regions(&table, socket)?;
942         Response::read_from(socket)?.ok_or_abandon(
943             socket,
944             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
945         )?;
946 
947         Ok(true)
948     }
949 
950     fn send_migration(
951         vm: &mut Vm,
952         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
953             dyn hypervisor::Hypervisor,
954         >,
955         send_data_migration: VmSendMigrationData,
956     ) -> result::Result<(), MigratableError> {
957         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
958         let mut socket = UnixStream::connect(path).map_err(|e| {
959             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
960         })?;
961 
962         // Start the migration
963         Request::start().write_to(&mut socket)?;
964         Response::read_from(&mut socket)?.ok_or_abandon(
965             &mut socket,
966             MigratableError::MigrateSend(anyhow!("Error starting migration")),
967         )?;
968 
969         // Send config
970         let vm_config = vm.get_config();
971         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
972         let common_cpuid = {
973             #[cfg(feature = "tdx")]
974             if vm_config.lock().unwrap().is_tdx_enabled() {
975                 return Err(MigratableError::MigrateSend(anyhow!(
976                     "Live Migration is not supported when TDX is enabled"
977                 )));
978             };
979 
980             let amx = vm_config.lock().unwrap().cpus.features.amx;
981             let phys_bits =
982                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
983             arch::generate_common_cpuid(
984                 &hypervisor,
985                 &arch::CpuidConfig {
986                     sgx_epc_sections: None,
987                     phys_bits,
988                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
989                     #[cfg(feature = "tdx")]
990                     tdx: false,
991                     amx,
992                 },
993             )
994             .map_err(|e| {
995                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
996             })?
997         };
998 
999         if send_data_migration.local {
1000             vm.send_memory_fds(&mut socket)?;
1001         }
1002 
1003         let vm_migration_config = VmMigrationConfig {
1004             vm_config,
1005             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1006             common_cpuid,
1007             memory_manager_data: vm.memory_manager_data(),
1008         };
1009         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1010         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1011         socket
1012             .write_all(&config_data)
1013             .map_err(MigratableError::MigrateSocket)?;
1014         Response::read_from(&mut socket)?.ok_or_abandon(
1015             &mut socket,
1016             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1017         )?;
1018 
1019         // Let every Migratable object know about the migration being started.
1020         vm.start_migration()?;
1021 
1022         if send_data_migration.local {
1023             // Now pause VM
1024             vm.pause()?;
1025         } else {
1026             // Start logging dirty pages
1027             vm.start_dirty_log()?;
1028 
1029             // Send memory table
1030             let table = vm.memory_range_table()?;
1031             Request::memory(table.length())
1032                 .write_to(&mut socket)
1033                 .unwrap();
1034             table.write_to(&mut socket)?;
1035             // And then the memory itself
1036             vm.send_memory_regions(&table, &mut socket)?;
1037             Response::read_from(&mut socket)?.ok_or_abandon(
1038                 &mut socket,
1039                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1040             )?;
1041 
1042             // Try at most 5 passes of dirty memory sending
1043             const MAX_DIRTY_MIGRATIONS: usize = 5;
1044             for i in 0..MAX_DIRTY_MIGRATIONS {
1045                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1046                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1047                     break;
1048                 }
1049             }
1050 
1051             // Now pause VM
1052             vm.pause()?;
1053 
1054             // Send last batch of dirty pages
1055             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1056 
1057             // Stop logging dirty pages
1058             vm.stop_dirty_log()?;
1059         }
1060         // Capture snapshot and send it
1061         let vm_snapshot = vm.snapshot()?;
1062         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1063         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1064         socket
1065             .write_all(&snapshot_data)
1066             .map_err(MigratableError::MigrateSocket)?;
1067         Response::read_from(&mut socket)?.ok_or_abandon(
1068             &mut socket,
1069             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1070         )?;
1071         // Complete the migration
1072         Request::complete().write_to(&mut socket)?;
1073         Response::read_from(&mut socket)?.ok_or_abandon(
1074             &mut socket,
1075             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1076         )?;
1077 
1078         info!("Migration complete");
1079 
1080         // Let every Migratable object know about the migration being complete
1081         vm.complete_migration()
1082     }
1083 
1084     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1085     fn vm_check_cpuid_compatibility(
1086         &self,
1087         src_vm_config: &Arc<Mutex<VmConfig>>,
1088         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1089     ) -> result::Result<(), MigratableError> {
1090         #[cfg(feature = "tdx")]
1091         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1092             return Err(MigratableError::MigrateReceive(anyhow!(
1093                 "Live Migration is not supported when TDX is enabled"
1094             )));
1095         };
1096 
1097         // We check the `CPUID` compatibility of between the source vm and destination, which is
1098         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1099         let dest_cpuid = &{
1100             let vm_config = &src_vm_config.lock().unwrap();
1101 
1102             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1103             arch::generate_common_cpuid(
1104                 &self.hypervisor.clone(),
1105                 &arch::CpuidConfig {
1106                     sgx_epc_sections: None,
1107                     phys_bits,
1108                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1109                     #[cfg(feature = "tdx")]
1110                     tdx: false,
1111                     amx: vm_config.cpus.features.amx,
1112                 },
1113             )
1114             .map_err(|e| {
1115                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1116             })?
1117         };
1118         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1119             MigratableError::MigrateReceive(anyhow!(
1120                 "Error checking cpu feature compatibility': {:?}",
1121                 e
1122             ))
1123         })
1124     }
1125 
1126     fn control_loop(
1127         &mut self,
1128         api_receiver: Rc<Receiver<ApiRequest>>,
1129         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1130     ) -> Result<()> {
1131         const EPOLL_EVENTS_LEN: usize = 100;
1132 
1133         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1134         let epoll_fd = self.epoll.as_raw_fd();
1135 
1136         'outer: loop {
1137             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1138                 Ok(res) => res,
1139                 Err(e) => {
1140                     if e.kind() == io::ErrorKind::Interrupted {
1141                         // It's well defined from the epoll_wait() syscall
1142                         // documentation that the epoll loop can be interrupted
1143                         // before any of the requested events occurred or the
1144                         // timeout expired. In both those cases, epoll_wait()
1145                         // returns an error of type EINTR, but this should not
1146                         // be considered as a regular error. Instead it is more
1147                         // appropriate to retry, by calling into epoll_wait().
1148                         continue;
1149                     }
1150                     return Err(Error::Epoll(e));
1151                 }
1152             };
1153 
1154             for event in events.iter().take(num_events) {
1155                 let dispatch_event: EpollDispatch = event.data.into();
1156                 match dispatch_event {
1157                     EpollDispatch::Unknown => {
1158                         let event = event.data;
1159                         warn!("Unknown VMM loop event: {}", event);
1160                     }
1161                     EpollDispatch::Exit => {
1162                         info!("VM exit event");
1163                         // Consume the event.
1164                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1165                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1166 
1167                         break 'outer;
1168                     }
1169                     EpollDispatch::Reset => {
1170                         info!("VM reset event");
1171                         // Consume the event.
1172                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1173                         self.vm_reboot().map_err(Error::VmReboot)?;
1174                     }
1175                     EpollDispatch::ActivateVirtioDevices => {
1176                         if let Some(ref vm) = self.vm {
1177                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1178                             info!(
1179                                 "Trying to activate pending virtio devices: count = {}",
1180                                 count
1181                             );
1182                             vm.activate_virtio_devices()
1183                                 .map_err(Error::ActivateVirtioDevices)?;
1184                         }
1185                     }
1186                     EpollDispatch::Api => {
1187                         // Consume the events.
1188                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1189                             // Read from the API receiver channel
1190                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1191 
1192                             if api_request(self)? {
1193                                 break 'outer;
1194                             }
1195                         }
1196                     }
1197                     #[cfg(feature = "guest_debug")]
1198                     EpollDispatch::Debug => {
1199                         // Consume the events.
1200                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1201                             // Read from the API receiver channel
1202                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1203 
1204                             let response = if let Some(ref mut vm) = self.vm {
1205                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1206                             } else {
1207                                 Err(VmError::VmNotRunning)
1208                             }
1209                             .map_err(gdb::Error::Vm);
1210 
1211                             gdb_request
1212                                 .sender
1213                                 .send(response)
1214                                 .map_err(Error::GdbResponseSend)?;
1215                         }
1216                     }
1217                     #[cfg(not(feature = "guest_debug"))]
1218                     EpollDispatch::Debug => {}
1219                 }
1220             }
1221         }
1222 
1223         // Trigger the termination of the signal_handler thread
1224         if let Some(signals) = self.signals.take() {
1225             signals.close();
1226         }
1227 
1228         // Wait for all the threads to finish
1229         for thread in self.threads.drain(..) {
1230             thread.join().map_err(Error::ThreadCleanup)?
1231         }
1232 
1233         Ok(())
1234     }
1235 }
1236 
1237 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1238     vm_config.lock().unwrap().apply_landlock()?;
1239     Ok(())
1240 }
1241 
1242 impl RequestHandler for Vmm {
1243     fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> {
1244         // We only store the passed VM config.
1245         // The VM will be created when being asked to boot it.
1246         if self.vm_config.is_none() {
1247             self.vm_config = Some(Arc::new(Mutex::new(*config)));
1248             self.console_info =
1249                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1250 
1251             if self
1252                 .vm_config
1253                 .as_ref()
1254                 .unwrap()
1255                 .lock()
1256                 .unwrap()
1257                 .landlock_enable
1258             {
1259                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1260                     .map_err(VmError::ApplyLandlock)?;
1261             }
1262             Ok(())
1263         } else {
1264             Err(VmError::VmAlreadyCreated)
1265         }
1266     }
1267 
1268     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1269         tracer::start();
1270         info!("Booting VM");
1271         event!("vm", "booting");
1272         let r = {
1273             trace_scoped!("vm_boot");
1274             // If we don't have a config, we cannot boot a VM.
1275             if self.vm_config.is_none() {
1276                 return Err(VmError::VmMissingConfig);
1277             };
1278 
1279             // console_info is set to None in vm_shutdown. re-populate here if empty
1280             if self.console_info.is_none() {
1281                 self.console_info =
1282                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1283             }
1284 
1285             // Create a new VM if we don't have one yet.
1286             if self.vm.is_none() {
1287                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1288                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1289                 #[cfg(feature = "guest_debug")]
1290                 let vm_debug_evt = self
1291                     .vm_debug_evt
1292                     .try_clone()
1293                     .map_err(VmError::EventFdClone)?;
1294                 let activate_evt = self
1295                     .activate_evt
1296                     .try_clone()
1297                     .map_err(VmError::EventFdClone)?;
1298 
1299                 if let Some(ref vm_config) = self.vm_config {
1300                     let vm = Vm::new(
1301                         Arc::clone(vm_config),
1302                         exit_evt,
1303                         reset_evt,
1304                         #[cfg(feature = "guest_debug")]
1305                         vm_debug_evt,
1306                         &self.seccomp_action,
1307                         self.hypervisor.clone(),
1308                         activate_evt,
1309                         self.console_info.clone(),
1310                         self.console_resize_pipe.clone(),
1311                         Arc::clone(&self.original_termios_opt),
1312                         None,
1313                         None,
1314                         None,
1315                     )?;
1316 
1317                     self.vm = Some(vm);
1318                 }
1319             }
1320 
1321             // Now we can boot the VM.
1322             if let Some(ref mut vm) = self.vm {
1323                 vm.boot()
1324             } else {
1325                 Err(VmError::VmNotCreated)
1326             }
1327         };
1328         tracer::end();
1329         if r.is_ok() {
1330             event!("vm", "booted");
1331         }
1332         r
1333     }
1334 
1335     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1336         if let Some(ref mut vm) = self.vm {
1337             vm.pause().map_err(VmError::Pause)
1338         } else {
1339             Err(VmError::VmNotRunning)
1340         }
1341     }
1342 
1343     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1344         if let Some(ref mut vm) = self.vm {
1345             vm.resume().map_err(VmError::Resume)
1346         } else {
1347             Err(VmError::VmNotRunning)
1348         }
1349     }
1350 
1351     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1352         if let Some(ref mut vm) = self.vm {
1353             // Drain console_info so that FDs are not reused
1354             let _ = self.console_info.take();
1355             vm.snapshot()
1356                 .map_err(VmError::Snapshot)
1357                 .and_then(|snapshot| {
1358                     vm.send(&snapshot, destination_url)
1359                         .map_err(VmError::SnapshotSend)
1360                 })
1361         } else {
1362             Err(VmError::VmNotRunning)
1363         }
1364     }
1365 
1366     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1367         if self.vm.is_some() || self.vm_config.is_some() {
1368             return Err(VmError::VmAlreadyCreated);
1369         }
1370 
1371         let source_url = restore_cfg.source_url.as_path().to_str();
1372         if source_url.is_none() {
1373             return Err(VmError::InvalidRestoreSourceUrl);
1374         }
1375         // Safe to unwrap as we checked it was Some(&str).
1376         let source_url = source_url.unwrap();
1377 
1378         let vm_config = Arc::new(Mutex::new(
1379             recv_vm_config(source_url).map_err(VmError::Restore)?,
1380         ));
1381         restore_cfg
1382             .validate(&vm_config.lock().unwrap().clone())
1383             .map_err(VmError::ConfigValidation)?;
1384 
1385         // Update VM's net configurations with new fds received for restore operation
1386         if let (Some(restored_nets), Some(vm_net_configs)) =
1387             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1388         {
1389             for net in restored_nets.iter() {
1390                 for net_config in vm_net_configs.iter_mut() {
1391                     // update only if the net dev is backed by FDs
1392                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1393                         net_config.fds.clone_from(&net.fds);
1394                     }
1395                 }
1396             }
1397         }
1398 
1399         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1400         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1401         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1402 
1403         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1404         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1405             .map_err(VmError::Restore)?;
1406 
1407         self.vm_config = Some(Arc::clone(&vm_config));
1408 
1409         // console_info is set to None in vm_snapshot. re-populate here if empty
1410         if self.console_info.is_none() {
1411             self.console_info =
1412                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1413         }
1414 
1415         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1416         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1417         #[cfg(feature = "guest_debug")]
1418         let debug_evt = self
1419             .vm_debug_evt
1420             .try_clone()
1421             .map_err(VmError::EventFdClone)?;
1422         let activate_evt = self
1423             .activate_evt
1424             .try_clone()
1425             .map_err(VmError::EventFdClone)?;
1426 
1427         let vm = Vm::new(
1428             vm_config,
1429             exit_evt,
1430             reset_evt,
1431             #[cfg(feature = "guest_debug")]
1432             debug_evt,
1433             &self.seccomp_action,
1434             self.hypervisor.clone(),
1435             activate_evt,
1436             self.console_info.clone(),
1437             self.console_resize_pipe.clone(),
1438             Arc::clone(&self.original_termios_opt),
1439             Some(snapshot),
1440             Some(source_url),
1441             Some(restore_cfg.prefault),
1442         )?;
1443         self.vm = Some(vm);
1444 
1445         if self
1446             .vm_config
1447             .as_ref()
1448             .unwrap()
1449             .lock()
1450             .unwrap()
1451             .landlock_enable
1452         {
1453             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1454                 .map_err(VmError::ApplyLandlock)?;
1455         }
1456 
1457         // Now we can restore the rest of the VM.
1458         if let Some(ref mut vm) = self.vm {
1459             vm.restore()
1460         } else {
1461             Err(VmError::VmNotCreated)
1462         }
1463     }
1464 
1465     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1466     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1467         if let Some(ref mut vm) = self.vm {
1468             vm.coredump(destination_url).map_err(VmError::Coredump)
1469         } else {
1470             Err(VmError::VmNotRunning)
1471         }
1472     }
1473 
1474     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1475         let r = if let Some(ref mut vm) = self.vm.take() {
1476             // Drain console_info so that the FDs are not reused
1477             let _ = self.console_info.take();
1478             vm.shutdown()
1479         } else {
1480             Err(VmError::VmNotRunning)
1481         };
1482 
1483         if r.is_ok() {
1484             event!("vm", "shutdown");
1485         }
1486 
1487         r
1488     }
1489 
1490     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1491         event!("vm", "rebooting");
1492 
1493         // First we stop the current VM
1494         let config = if let Some(mut vm) = self.vm.take() {
1495             let config = vm.get_config();
1496             vm.shutdown()?;
1497             config
1498         } else {
1499             return Err(VmError::VmNotCreated);
1500         };
1501 
1502         // vm.shutdown() closes all the console devices, so set console_info to None
1503         // so that the closed FD #s are not reused.
1504         let _ = self.console_info.take();
1505 
1506         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1507         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1508         #[cfg(feature = "guest_debug")]
1509         let debug_evt = self
1510             .vm_debug_evt
1511             .try_clone()
1512             .map_err(VmError::EventFdClone)?;
1513         let activate_evt = self
1514             .activate_evt
1515             .try_clone()
1516             .map_err(VmError::EventFdClone)?;
1517 
1518         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1519         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1520         // during the boot process.
1521         if self.reset_evt.read().is_ok() {
1522             warn!("Spurious second reset event received. Ignoring.");
1523         }
1524 
1525         self.console_info =
1526             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1527 
1528         // Then we create the new VM
1529         let mut vm = Vm::new(
1530             config,
1531             exit_evt,
1532             reset_evt,
1533             #[cfg(feature = "guest_debug")]
1534             debug_evt,
1535             &self.seccomp_action,
1536             self.hypervisor.clone(),
1537             activate_evt,
1538             self.console_info.clone(),
1539             self.console_resize_pipe.clone(),
1540             Arc::clone(&self.original_termios_opt),
1541             None,
1542             None,
1543             None,
1544         )?;
1545 
1546         // And we boot it
1547         vm.boot()?;
1548 
1549         self.vm = Some(vm);
1550 
1551         event!("vm", "rebooted");
1552 
1553         Ok(())
1554     }
1555 
1556     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1557         match &self.vm_config {
1558             Some(vm_config) => {
1559                 let state = match &self.vm {
1560                     Some(vm) => vm.get_state()?,
1561                     None => VmState::Created,
1562                 };
1563                 let config = vm_config.lock().unwrap().clone();
1564 
1565                 let mut memory_actual_size = config.memory.total_size();
1566                 if let Some(vm) = &self.vm {
1567                     memory_actual_size -= vm.balloon_size();
1568                 }
1569 
1570                 let device_tree = self
1571                     .vm
1572                     .as_ref()
1573                     .map(|vm| vm.device_tree().lock().unwrap().clone());
1574 
1575                 Ok(VmInfoResponse {
1576                     config: Box::new(config),
1577                     state,
1578                     memory_actual_size,
1579                     device_tree,
1580                 })
1581             }
1582             None => Err(VmError::VmNotCreated),
1583         }
1584     }
1585 
1586     fn vmm_ping(&self) -> VmmPingResponse {
1587         let VmmVersionInfo {
1588             build_version,
1589             version,
1590         } = self.version.clone();
1591 
1592         VmmPingResponse {
1593             build_version,
1594             version,
1595             pid: std::process::id() as i64,
1596             features: feature_list(),
1597         }
1598     }
1599 
1600     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1601         if self.vm_config.is_none() {
1602             return Ok(());
1603         }
1604 
1605         // If a VM is booted, we first try to shut it down.
1606         if self.vm.is_some() {
1607             self.vm_shutdown()?;
1608         }
1609 
1610         self.vm_config = None;
1611 
1612         event!("vm", "deleted");
1613 
1614         Ok(())
1615     }
1616 
1617     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1618         self.vm_delete()?;
1619         event!("vmm", "shutdown");
1620         Ok(())
1621     }
1622 
1623     fn vm_resize(
1624         &mut self,
1625         desired_vcpus: Option<u8>,
1626         desired_ram: Option<u64>,
1627         desired_balloon: Option<u64>,
1628     ) -> result::Result<(), VmError> {
1629         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1630 
1631         if let Some(ref mut vm) = self.vm {
1632             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1633                 error!("Error when resizing VM: {:?}", e);
1634                 Err(e)
1635             } else {
1636                 Ok(())
1637             }
1638         } else {
1639             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1640             if let Some(desired_vcpus) = desired_vcpus {
1641                 config.cpus.boot_vcpus = desired_vcpus;
1642             }
1643             if let Some(desired_ram) = desired_ram {
1644                 config.memory.size = desired_ram;
1645             }
1646             if let Some(desired_balloon) = desired_balloon {
1647                 if let Some(balloon_config) = &mut config.balloon {
1648                     balloon_config.size = desired_balloon;
1649                 }
1650             }
1651             Ok(())
1652         }
1653     }
1654 
1655     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1656         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1657 
1658         if let Some(ref mut vm) = self.vm {
1659             if let Err(e) = vm.resize_zone(id, desired_ram) {
1660                 error!("Error when resizing VM: {:?}", e);
1661                 Err(e)
1662             } else {
1663                 Ok(())
1664             }
1665         } else {
1666             // Update VmConfig by setting the new desired ram.
1667             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1668 
1669             if let Some(zones) = &mut memory_config.zones {
1670                 for zone in zones.iter_mut() {
1671                     if zone.id == id {
1672                         zone.size = desired_ram;
1673                         return Ok(());
1674                     }
1675                 }
1676             }
1677 
1678             error!("Could not find the memory zone {} for the resize", id);
1679             Err(VmError::ResizeZone)
1680         }
1681     }
1682 
1683     fn vm_add_device(
1684         &mut self,
1685         device_cfg: DeviceConfig,
1686     ) -> result::Result<Option<Vec<u8>>, VmError> {
1687         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1688 
1689         {
1690             // Validate the configuration change in a cloned configuration
1691             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1692             add_to_config(&mut config.devices, device_cfg.clone());
1693             config.validate().map_err(VmError::ConfigValidation)?;
1694         }
1695 
1696         if let Some(ref mut vm) = self.vm {
1697             let info = vm.add_device(device_cfg).map_err(|e| {
1698                 error!("Error when adding new device to the VM: {:?}", e);
1699                 e
1700             })?;
1701             serde_json::to_vec(&info)
1702                 .map(Some)
1703                 .map_err(VmError::SerializeJson)
1704         } else {
1705             // Update VmConfig by adding the new device.
1706             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1707             add_to_config(&mut config.devices, device_cfg);
1708             Ok(None)
1709         }
1710     }
1711 
1712     fn vm_add_user_device(
1713         &mut self,
1714         device_cfg: UserDeviceConfig,
1715     ) -> result::Result<Option<Vec<u8>>, VmError> {
1716         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1717 
1718         {
1719             // Validate the configuration change in a cloned configuration
1720             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1721             add_to_config(&mut config.user_devices, device_cfg.clone());
1722             config.validate().map_err(VmError::ConfigValidation)?;
1723         }
1724 
1725         if let Some(ref mut vm) = self.vm {
1726             let info = vm.add_user_device(device_cfg).map_err(|e| {
1727                 error!("Error when adding new user device to the VM: {:?}", e);
1728                 e
1729             })?;
1730             serde_json::to_vec(&info)
1731                 .map(Some)
1732                 .map_err(VmError::SerializeJson)
1733         } else {
1734             // Update VmConfig by adding the new device.
1735             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1736             add_to_config(&mut config.user_devices, device_cfg);
1737             Ok(None)
1738         }
1739     }
1740 
1741     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1742         if let Some(ref mut vm) = self.vm {
1743             if let Err(e) = vm.remove_device(id) {
1744                 error!("Error when removing device from the VM: {:?}", e);
1745                 Err(e)
1746             } else {
1747                 Ok(())
1748             }
1749         } else if let Some(ref config) = self.vm_config {
1750             let mut config = config.lock().unwrap();
1751             if config.remove_device(&id) {
1752                 Ok(())
1753             } else {
1754                 Err(VmError::NoDeviceToRemove(id))
1755             }
1756         } else {
1757             Err(VmError::VmNotCreated)
1758         }
1759     }
1760 
1761     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1762         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1763 
1764         {
1765             // Validate the configuration change in a cloned configuration
1766             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1767             add_to_config(&mut config.disks, disk_cfg.clone());
1768             config.validate().map_err(VmError::ConfigValidation)?;
1769         }
1770 
1771         if let Some(ref mut vm) = self.vm {
1772             let info = vm.add_disk(disk_cfg).map_err(|e| {
1773                 error!("Error when adding new disk to the VM: {:?}", e);
1774                 e
1775             })?;
1776             serde_json::to_vec(&info)
1777                 .map(Some)
1778                 .map_err(VmError::SerializeJson)
1779         } else {
1780             // Update VmConfig by adding the new device.
1781             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1782             add_to_config(&mut config.disks, disk_cfg);
1783             Ok(None)
1784         }
1785     }
1786 
1787     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1788         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1789 
1790         {
1791             // Validate the configuration change in a cloned configuration
1792             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1793             add_to_config(&mut config.fs, fs_cfg.clone());
1794             config.validate().map_err(VmError::ConfigValidation)?;
1795         }
1796 
1797         if let Some(ref mut vm) = self.vm {
1798             let info = vm.add_fs(fs_cfg).map_err(|e| {
1799                 error!("Error when adding new fs to the VM: {:?}", e);
1800                 e
1801             })?;
1802             serde_json::to_vec(&info)
1803                 .map(Some)
1804                 .map_err(VmError::SerializeJson)
1805         } else {
1806             // Update VmConfig by adding the new device.
1807             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1808             add_to_config(&mut config.fs, fs_cfg);
1809             Ok(None)
1810         }
1811     }
1812 
1813     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1814         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1815 
1816         {
1817             // Validate the configuration change in a cloned configuration
1818             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1819             add_to_config(&mut config.pmem, pmem_cfg.clone());
1820             config.validate().map_err(VmError::ConfigValidation)?;
1821         }
1822 
1823         if let Some(ref mut vm) = self.vm {
1824             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1825                 error!("Error when adding new pmem device to the VM: {:?}", e);
1826                 e
1827             })?;
1828             serde_json::to_vec(&info)
1829                 .map(Some)
1830                 .map_err(VmError::SerializeJson)
1831         } else {
1832             // Update VmConfig by adding the new device.
1833             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1834             add_to_config(&mut config.pmem, pmem_cfg);
1835             Ok(None)
1836         }
1837     }
1838 
1839     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1840         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1841 
1842         {
1843             // Validate the configuration change in a cloned configuration
1844             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1845             add_to_config(&mut config.net, net_cfg.clone());
1846             config.validate().map_err(VmError::ConfigValidation)?;
1847         }
1848 
1849         if let Some(ref mut vm) = self.vm {
1850             let info = vm.add_net(net_cfg).map_err(|e| {
1851                 error!("Error when adding new network device to the VM: {:?}", e);
1852                 e
1853             })?;
1854             serde_json::to_vec(&info)
1855                 .map(Some)
1856                 .map_err(VmError::SerializeJson)
1857         } else {
1858             // Update VmConfig by adding the new device.
1859             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1860             add_to_config(&mut config.net, net_cfg);
1861             Ok(None)
1862         }
1863     }
1864 
1865     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1866         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1867 
1868         {
1869             // Validate the configuration change in a cloned configuration
1870             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1871             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1872             config.validate().map_err(VmError::ConfigValidation)?;
1873         }
1874 
1875         if let Some(ref mut vm) = self.vm {
1876             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1877                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1878                 e
1879             })?;
1880             serde_json::to_vec(&info)
1881                 .map(Some)
1882                 .map_err(VmError::SerializeJson)
1883         } else {
1884             // Update VmConfig by adding the new device.
1885             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1886             add_to_config(&mut config.vdpa, vdpa_cfg);
1887             Ok(None)
1888         }
1889     }
1890 
1891     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1892         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1893 
1894         {
1895             // Validate the configuration change in a cloned configuration
1896             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1897 
1898             if config.vsock.is_some() {
1899                 return Err(VmError::TooManyVsockDevices);
1900             }
1901 
1902             config.vsock = Some(vsock_cfg.clone());
1903             config.validate().map_err(VmError::ConfigValidation)?;
1904         }
1905 
1906         if let Some(ref mut vm) = self.vm {
1907             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1908                 error!("Error when adding new vsock device to the VM: {:?}", e);
1909                 e
1910             })?;
1911             serde_json::to_vec(&info)
1912                 .map(Some)
1913                 .map_err(VmError::SerializeJson)
1914         } else {
1915             // Update VmConfig by adding the new device.
1916             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1917             config.vsock = Some(vsock_cfg);
1918             Ok(None)
1919         }
1920     }
1921 
1922     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1923         if let Some(ref mut vm) = self.vm {
1924             let info = vm.counters().map_err(|e| {
1925                 error!("Error when getting counters from the VM: {:?}", e);
1926                 e
1927             })?;
1928             serde_json::to_vec(&info)
1929                 .map(Some)
1930                 .map_err(VmError::SerializeJson)
1931         } else {
1932             Err(VmError::VmNotRunning)
1933         }
1934     }
1935 
1936     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1937         if let Some(ref mut vm) = self.vm {
1938             vm.power_button()
1939         } else {
1940             Err(VmError::VmNotRunning)
1941         }
1942     }
1943 
1944     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1945         if let Some(ref mut vm) = self.vm {
1946             vm.nmi()
1947         } else {
1948             Err(VmError::VmNotRunning)
1949         }
1950     }
1951 
1952     fn vm_receive_migration(
1953         &mut self,
1954         receive_data_migration: VmReceiveMigrationData,
1955     ) -> result::Result<(), MigratableError> {
1956         info!(
1957             "Receiving migration: receiver_url = {}",
1958             receive_data_migration.receiver_url
1959         );
1960 
1961         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1962         let listener = UnixListener::bind(&path).map_err(|e| {
1963             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1964         })?;
1965         let (mut socket, _addr) = listener.accept().map_err(|e| {
1966             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1967         })?;
1968         std::fs::remove_file(&path).map_err(|e| {
1969             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1970         })?;
1971 
1972         let mut started = false;
1973         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1974         let mut existing_memory_files = None;
1975         loop {
1976             let req = Request::read_from(&mut socket)?;
1977             match req.command() {
1978                 Command::Invalid => info!("Invalid Command Received"),
1979                 Command::Start => {
1980                     info!("Start Command Received");
1981                     started = true;
1982 
1983                     Response::ok().write_to(&mut socket)?;
1984                 }
1985                 Command::Config => {
1986                     info!("Config Command Received");
1987 
1988                     if !started {
1989                         warn!("Migration not started yet");
1990                         Response::error().write_to(&mut socket)?;
1991                         continue;
1992                     }
1993                     memory_manager = Some(self.vm_receive_config(
1994                         &req,
1995                         &mut socket,
1996                         existing_memory_files.take(),
1997                     )?);
1998                 }
1999                 Command::State => {
2000                     info!("State Command Received");
2001 
2002                     if !started {
2003                         warn!("Migration not started yet");
2004                         Response::error().write_to(&mut socket)?;
2005                         continue;
2006                     }
2007                     if let Some(mm) = memory_manager.take() {
2008                         self.vm_receive_state(&req, &mut socket, mm)?;
2009                     } else {
2010                         warn!("Configuration not sent yet");
2011                         Response::error().write_to(&mut socket)?;
2012                     }
2013                 }
2014                 Command::Memory => {
2015                     info!("Memory Command Received");
2016 
2017                     if !started {
2018                         warn!("Migration not started yet");
2019                         Response::error().write_to(&mut socket)?;
2020                         continue;
2021                     }
2022                     if let Some(mm) = memory_manager.as_ref() {
2023                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2024                     } else {
2025                         warn!("Configuration not sent yet");
2026                         Response::error().write_to(&mut socket)?;
2027                     }
2028                 }
2029                 Command::MemoryFd => {
2030                     info!("MemoryFd Command Received");
2031 
2032                     if !started {
2033                         warn!("Migration not started yet");
2034                         Response::error().write_to(&mut socket)?;
2035                         continue;
2036                     }
2037 
2038                     let mut buf = [0u8; 4];
2039                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2040                         MigratableError::MigrateReceive(anyhow!(
2041                             "Error receiving slot from socket: {}",
2042                             e
2043                         ))
2044                     })?;
2045 
2046                     if existing_memory_files.is_none() {
2047                         existing_memory_files = Some(HashMap::default())
2048                     }
2049 
2050                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2051                         let slot = u32::from_le_bytes(buf);
2052                         existing_memory_files.insert(slot, file.unwrap());
2053                     }
2054 
2055                     Response::ok().write_to(&mut socket)?;
2056                 }
2057                 Command::Complete => {
2058                     info!("Complete Command Received");
2059                     if let Some(ref mut vm) = self.vm.as_mut() {
2060                         vm.resume()?;
2061                         Response::ok().write_to(&mut socket)?;
2062                     } else {
2063                         warn!("VM not created yet");
2064                         Response::error().write_to(&mut socket)?;
2065                     }
2066                     break;
2067                 }
2068                 Command::Abandon => {
2069                     info!("Abandon Command Received");
2070                     self.vm = None;
2071                     self.vm_config = None;
2072                     Response::ok().write_to(&mut socket).ok();
2073                     break;
2074                 }
2075             }
2076         }
2077 
2078         Ok(())
2079     }
2080 
2081     fn vm_send_migration(
2082         &mut self,
2083         send_data_migration: VmSendMigrationData,
2084     ) -> result::Result<(), MigratableError> {
2085         info!(
2086             "Sending migration: destination_url = {}, local = {}",
2087             send_data_migration.destination_url, send_data_migration.local
2088         );
2089 
2090         if !self
2091             .vm_config
2092             .as_ref()
2093             .unwrap()
2094             .lock()
2095             .unwrap()
2096             .backed_by_shared_memory()
2097             && send_data_migration.local
2098         {
2099             return Err(MigratableError::MigrateSend(anyhow!(
2100                 "Local migration requires shared memory or hugepages enabled"
2101             )));
2102         }
2103 
2104         if let Some(vm) = self.vm.as_mut() {
2105             Self::send_migration(
2106                 vm,
2107                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2108                 self.hypervisor.clone(),
2109                 send_data_migration,
2110             )
2111             .map_err(|migration_err| {
2112                 error!("Migration failed: {:?}", migration_err);
2113 
2114                 // Stop logging dirty pages
2115                 if let Err(e) = vm.stop_dirty_log() {
2116                     return e;
2117                 }
2118 
2119                 if vm.get_state().unwrap() == VmState::Paused {
2120                     if let Err(e) = vm.resume() {
2121                         return e;
2122                     }
2123                 }
2124 
2125                 migration_err
2126             })?;
2127 
2128             // Shutdown the VM after the migration succeeded
2129             self.exit_evt.write(1).map_err(|e| {
2130                 MigratableError::MigrateSend(anyhow!(
2131                     "Failed shutting down the VM after migration: {:?}",
2132                     e
2133                 ))
2134             })
2135         } else {
2136             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2137         }
2138     }
2139 }
2140 
2141 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2142 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2143 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2144 
2145 #[cfg(test)]
2146 mod unit_tests {
2147     use super::*;
2148     #[cfg(target_arch = "x86_64")]
2149     use crate::vm_config::DebugConsoleConfig;
2150     use crate::vm_config::{
2151         ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig,
2152         PayloadConfig, RngConfig,
2153     };
2154 
2155     fn create_dummy_vmm() -> Vmm {
2156         Vmm::new(
2157             VmmVersionInfo::new("dummy", "dummy"),
2158             EventFd::new(EFD_NONBLOCK).unwrap(),
2159             #[cfg(feature = "guest_debug")]
2160             EventFd::new(EFD_NONBLOCK).unwrap(),
2161             #[cfg(feature = "guest_debug")]
2162             EventFd::new(EFD_NONBLOCK).unwrap(),
2163             SeccompAction::Allow,
2164             hypervisor::new().unwrap(),
2165             EventFd::new(EFD_NONBLOCK).unwrap(),
2166         )
2167         .unwrap()
2168     }
2169 
2170     fn create_dummy_vm_config() -> Box<VmConfig> {
2171         Box::new(VmConfig {
2172             cpus: CpusConfig {
2173                 boot_vcpus: 1,
2174                 max_vcpus: 1,
2175                 topology: None,
2176                 kvm_hyperv: false,
2177                 max_phys_bits: 46,
2178                 affinity: None,
2179                 features: CpuFeatures::default(),
2180             },
2181             memory: MemoryConfig {
2182                 size: 536_870_912,
2183                 mergeable: false,
2184                 hotplug_method: HotplugMethod::Acpi,
2185                 hotplug_size: None,
2186                 hotplugged_size: None,
2187                 shared: true,
2188                 hugepages: false,
2189                 hugepage_size: None,
2190                 prefault: false,
2191                 zones: None,
2192                 thp: true,
2193             },
2194             payload: Some(PayloadConfig {
2195                 kernel: Some(PathBuf::from("/path/to/kernel")),
2196                 firmware: None,
2197                 cmdline: None,
2198                 initramfs: None,
2199                 #[cfg(feature = "igvm")]
2200                 igvm: None,
2201                 #[cfg(feature = "sev_snp")]
2202                 host_data: None,
2203             }),
2204             rate_limit_groups: None,
2205             disks: None,
2206             net: None,
2207             rng: RngConfig {
2208                 src: PathBuf::from("/dev/urandom"),
2209                 iommu: false,
2210             },
2211             balloon: None,
2212             fs: None,
2213             pmem: None,
2214             serial: ConsoleConfig {
2215                 file: None,
2216                 mode: ConsoleOutputMode::Null,
2217                 iommu: false,
2218                 socket: None,
2219             },
2220             console: ConsoleConfig {
2221                 file: None,
2222                 mode: ConsoleOutputMode::Tty,
2223                 iommu: false,
2224                 socket: None,
2225             },
2226             #[cfg(target_arch = "x86_64")]
2227             debug_console: DebugConsoleConfig::default(),
2228             devices: None,
2229             user_devices: None,
2230             vdpa: None,
2231             vsock: None,
2232             #[cfg(feature = "pvmemcontrol")]
2233             pvmemcontrol: None,
2234             pvpanic: false,
2235             iommu: false,
2236             #[cfg(target_arch = "x86_64")]
2237             sgx_epc: None,
2238             numa: None,
2239             watchdog: false,
2240             #[cfg(feature = "guest_debug")]
2241             gdb: false,
2242             pci_segments: None,
2243             platform: None,
2244             tpm: None,
2245             preserved_fds: None,
2246             landlock_enable: false,
2247             landlock_rules: None,
2248         })
2249     }
2250 
2251     #[test]
2252     fn test_vmm_vm_create() {
2253         let mut vmm = create_dummy_vmm();
2254         let config = create_dummy_vm_config();
2255 
2256         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2257         assert!(matches!(
2258             vmm.vm_create(config),
2259             Err(VmError::VmAlreadyCreated)
2260         ));
2261     }
2262 
2263     #[test]
2264     fn test_vmm_vm_cold_add_device() {
2265         let mut vmm = create_dummy_vmm();
2266         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2267 
2268         assert!(matches!(
2269             vmm.vm_add_device(device_config.clone()),
2270             Err(VmError::VmNotCreated)
2271         ));
2272 
2273         let _ = vmm.vm_create(create_dummy_vm_config());
2274         assert!(vmm
2275             .vm_config
2276             .as_ref()
2277             .unwrap()
2278             .lock()
2279             .unwrap()
2280             .devices
2281             .is_none());
2282 
2283         assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none());
2284         assert_eq!(
2285             vmm.vm_config
2286                 .as_ref()
2287                 .unwrap()
2288                 .lock()
2289                 .unwrap()
2290                 .devices
2291                 .clone()
2292                 .unwrap()
2293                 .len(),
2294             1
2295         );
2296         assert_eq!(
2297             vmm.vm_config
2298                 .as_ref()
2299                 .unwrap()
2300                 .lock()
2301                 .unwrap()
2302                 .devices
2303                 .clone()
2304                 .unwrap()[0],
2305             device_config
2306         );
2307     }
2308 
2309     #[test]
2310     fn test_vmm_vm_cold_add_user_device() {
2311         let mut vmm = create_dummy_vmm();
2312         let user_device_config =
2313             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2314 
2315         assert!(matches!(
2316             vmm.vm_add_user_device(user_device_config.clone()),
2317             Err(VmError::VmNotCreated)
2318         ));
2319 
2320         let _ = vmm.vm_create(create_dummy_vm_config());
2321         assert!(vmm
2322             .vm_config
2323             .as_ref()
2324             .unwrap()
2325             .lock()
2326             .unwrap()
2327             .user_devices
2328             .is_none());
2329 
2330         assert!(vmm
2331             .vm_add_user_device(user_device_config.clone())
2332             .unwrap()
2333             .is_none());
2334         assert_eq!(
2335             vmm.vm_config
2336                 .as_ref()
2337                 .unwrap()
2338                 .lock()
2339                 .unwrap()
2340                 .user_devices
2341                 .clone()
2342                 .unwrap()
2343                 .len(),
2344             1
2345         );
2346         assert_eq!(
2347             vmm.vm_config
2348                 .as_ref()
2349                 .unwrap()
2350                 .lock()
2351                 .unwrap()
2352                 .user_devices
2353                 .clone()
2354                 .unwrap()[0],
2355             user_device_config
2356         );
2357     }
2358 
2359     #[test]
2360     fn test_vmm_vm_cold_add_disk() {
2361         let mut vmm = create_dummy_vmm();
2362         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2363 
2364         assert!(matches!(
2365             vmm.vm_add_disk(disk_config.clone()),
2366             Err(VmError::VmNotCreated)
2367         ));
2368 
2369         let _ = vmm.vm_create(create_dummy_vm_config());
2370         assert!(vmm
2371             .vm_config
2372             .as_ref()
2373             .unwrap()
2374             .lock()
2375             .unwrap()
2376             .disks
2377             .is_none());
2378 
2379         assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none());
2380         assert_eq!(
2381             vmm.vm_config
2382                 .as_ref()
2383                 .unwrap()
2384                 .lock()
2385                 .unwrap()
2386                 .disks
2387                 .clone()
2388                 .unwrap()
2389                 .len(),
2390             1
2391         );
2392         assert_eq!(
2393             vmm.vm_config
2394                 .as_ref()
2395                 .unwrap()
2396                 .lock()
2397                 .unwrap()
2398                 .disks
2399                 .clone()
2400                 .unwrap()[0],
2401             disk_config
2402         );
2403     }
2404 
2405     #[test]
2406     fn test_vmm_vm_cold_add_fs() {
2407         let mut vmm = create_dummy_vmm();
2408         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2409 
2410         assert!(matches!(
2411             vmm.vm_add_fs(fs_config.clone()),
2412             Err(VmError::VmNotCreated)
2413         ));
2414 
2415         let _ = vmm.vm_create(create_dummy_vm_config());
2416         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2417 
2418         assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none());
2419         assert_eq!(
2420             vmm.vm_config
2421                 .as_ref()
2422                 .unwrap()
2423                 .lock()
2424                 .unwrap()
2425                 .fs
2426                 .clone()
2427                 .unwrap()
2428                 .len(),
2429             1
2430         );
2431         assert_eq!(
2432             vmm.vm_config
2433                 .as_ref()
2434                 .unwrap()
2435                 .lock()
2436                 .unwrap()
2437                 .fs
2438                 .clone()
2439                 .unwrap()[0],
2440             fs_config
2441         );
2442     }
2443 
2444     #[test]
2445     fn test_vmm_vm_cold_add_pmem() {
2446         let mut vmm = create_dummy_vmm();
2447         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2448 
2449         assert!(matches!(
2450             vmm.vm_add_pmem(pmem_config.clone()),
2451             Err(VmError::VmNotCreated)
2452         ));
2453 
2454         let _ = vmm.vm_create(create_dummy_vm_config());
2455         assert!(vmm
2456             .vm_config
2457             .as_ref()
2458             .unwrap()
2459             .lock()
2460             .unwrap()
2461             .pmem
2462             .is_none());
2463 
2464         assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none());
2465         assert_eq!(
2466             vmm.vm_config
2467                 .as_ref()
2468                 .unwrap()
2469                 .lock()
2470                 .unwrap()
2471                 .pmem
2472                 .clone()
2473                 .unwrap()
2474                 .len(),
2475             1
2476         );
2477         assert_eq!(
2478             vmm.vm_config
2479                 .as_ref()
2480                 .unwrap()
2481                 .lock()
2482                 .unwrap()
2483                 .pmem
2484                 .clone()
2485                 .unwrap()[0],
2486             pmem_config
2487         );
2488     }
2489 
2490     #[test]
2491     fn test_vmm_vm_cold_add_net() {
2492         let mut vmm = create_dummy_vmm();
2493         let net_config = NetConfig::parse(
2494             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2495         )
2496         .unwrap();
2497 
2498         assert!(matches!(
2499             vmm.vm_add_net(net_config.clone()),
2500             Err(VmError::VmNotCreated)
2501         ));
2502 
2503         let _ = vmm.vm_create(create_dummy_vm_config());
2504         assert!(vmm
2505             .vm_config
2506             .as_ref()
2507             .unwrap()
2508             .lock()
2509             .unwrap()
2510             .net
2511             .is_none());
2512 
2513         assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none());
2514         assert_eq!(
2515             vmm.vm_config
2516                 .as_ref()
2517                 .unwrap()
2518                 .lock()
2519                 .unwrap()
2520                 .net
2521                 .clone()
2522                 .unwrap()
2523                 .len(),
2524             1
2525         );
2526         assert_eq!(
2527             vmm.vm_config
2528                 .as_ref()
2529                 .unwrap()
2530                 .lock()
2531                 .unwrap()
2532                 .net
2533                 .clone()
2534                 .unwrap()[0],
2535             net_config
2536         );
2537     }
2538 
2539     #[test]
2540     fn test_vmm_vm_cold_add_vdpa() {
2541         let mut vmm = create_dummy_vmm();
2542         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2543 
2544         assert!(matches!(
2545             vmm.vm_add_vdpa(vdpa_config.clone()),
2546             Err(VmError::VmNotCreated)
2547         ));
2548 
2549         let _ = vmm.vm_create(create_dummy_vm_config());
2550         assert!(vmm
2551             .vm_config
2552             .as_ref()
2553             .unwrap()
2554             .lock()
2555             .unwrap()
2556             .vdpa
2557             .is_none());
2558 
2559         assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none());
2560         assert_eq!(
2561             vmm.vm_config
2562                 .as_ref()
2563                 .unwrap()
2564                 .lock()
2565                 .unwrap()
2566                 .vdpa
2567                 .clone()
2568                 .unwrap()
2569                 .len(),
2570             1
2571         );
2572         assert_eq!(
2573             vmm.vm_config
2574                 .as_ref()
2575                 .unwrap()
2576                 .lock()
2577                 .unwrap()
2578                 .vdpa
2579                 .clone()
2580                 .unwrap()[0],
2581             vdpa_config
2582         );
2583     }
2584 
2585     #[test]
2586     fn test_vmm_vm_cold_add_vsock() {
2587         let mut vmm = create_dummy_vmm();
2588         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2589 
2590         assert!(matches!(
2591             vmm.vm_add_vsock(vsock_config.clone()),
2592             Err(VmError::VmNotCreated)
2593         ));
2594 
2595         let _ = vmm.vm_create(create_dummy_vm_config());
2596         assert!(vmm
2597             .vm_config
2598             .as_ref()
2599             .unwrap()
2600             .lock()
2601             .unwrap()
2602             .vsock
2603             .is_none());
2604 
2605         assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none());
2606         assert_eq!(
2607             vmm.vm_config
2608                 .as_ref()
2609                 .unwrap()
2610                 .lock()
2611                 .unwrap()
2612                 .vsock
2613                 .clone()
2614                 .unwrap(),
2615             vsock_config
2616         );
2617     }
2618 }
2619