xref: /cloud-hypervisor/vmm/src/lib.rs (revision 68468b8519fe25ad68e6a1fffb86aa534c8f8a72)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use crate::api::{
12     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
13     VmSendMigrationData, VmmPingResponse,
14 };
15 use crate::config::{
16     add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig,
17     UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
18 };
19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
20 use crate::coredump::GuestDebuggable;
21 use crate::landlock::Landlock;
22 use crate::memory_manager::MemoryManager;
23 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
24 use crate::migration::get_vm_snapshot;
25 use crate::migration::{recv_vm_config, recv_vm_state};
26 use crate::seccomp_filters::{get_seccomp_filter, Thread};
27 use crate::vm::{Error as VmError, Vm, VmState};
28 use anyhow::anyhow;
29 #[cfg(feature = "dbus_api")]
30 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
31 use api::http::HttpApiHandle;
32 use console_devices::{pre_create_console_devices, ConsoleInfo};
33 use landlock::LandlockError;
34 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
35 use memory_manager::MemoryManagerSnapshotData;
36 use pci::PciBdf;
37 use seccompiler::{apply_filter, SeccompAction};
38 use serde::ser::{SerializeStruct, Serializer};
39 use serde::{Deserialize, Serialize};
40 use signal_hook::iterator::{Handle, Signals};
41 use std::collections::HashMap;
42 use std::fs::File;
43 use std::io;
44 use std::io::{stdout, Read, Write};
45 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
46 use std::os::unix::net::UnixListener;
47 use std::os::unix::net::UnixStream;
48 use std::panic::AssertUnwindSafe;
49 use std::path::PathBuf;
50 use std::rc::Rc;
51 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
52 use std::sync::{Arc, Mutex};
53 use std::time::Instant;
54 use std::{result, thread};
55 use thiserror::Error;
56 use tracer::trace_scoped;
57 use vm_memory::bitmap::AtomicBitmap;
58 use vm_memory::{ReadVolatile, WriteVolatile};
59 use vm_migration::{protocol::*, Migratable};
60 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
61 use vmm_sys_util::eventfd::EventFd;
62 use vmm_sys_util::signal::unblock_signal;
63 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
64 
65 mod acpi;
66 pub mod api;
67 mod clone3;
68 pub mod config;
69 pub mod console_devices;
70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
71 mod coredump;
72 pub mod cpu;
73 pub mod device_manager;
74 pub mod device_tree;
75 #[cfg(feature = "guest_debug")]
76 mod gdb;
77 #[cfg(feature = "igvm")]
78 mod igvm;
79 pub mod interrupt;
80 pub mod landlock;
81 pub mod memory_manager;
82 pub mod migration;
83 mod pci_segment;
84 pub mod seccomp_filters;
85 mod serial_manager;
86 mod sigwinch_listener;
87 pub mod vm;
88 pub mod vm_config;
89 
90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
92 
93 /// Errors associated with VMM management
94 #[derive(Debug, Error)]
95 pub enum Error {
96     /// API request receive error
97     #[error("Error receiving API request: {0}")]
98     ApiRequestRecv(#[source] RecvError),
99 
100     /// API response send error
101     #[error("Error sending API request: {0}")]
102     ApiResponseSend(#[source] SendError<ApiResponse>),
103 
104     /// Cannot bind to the UNIX domain socket path
105     #[error("Error binding to UNIX domain socket: {0}")]
106     Bind(#[source] io::Error),
107 
108     /// Cannot clone EventFd.
109     #[error("Error cloning EventFd: {0}")]
110     EventFdClone(#[source] io::Error),
111 
112     /// Cannot create EventFd.
113     #[error("Error creating EventFd: {0}")]
114     EventFdCreate(#[source] io::Error),
115 
116     /// Cannot read from EventFd.
117     #[error("Error reading from EventFd: {0}")]
118     EventFdRead(#[source] io::Error),
119 
120     /// Cannot create epoll context.
121     #[error("Error creating epoll context: {0}")]
122     Epoll(#[source] io::Error),
123 
124     /// Cannot create HTTP thread
125     #[error("Error spawning HTTP thread: {0}")]
126     HttpThreadSpawn(#[source] io::Error),
127 
128     /// Cannot create D-Bus thread
129     #[cfg(feature = "dbus_api")]
130     #[error("Error spawning D-Bus thread: {0}")]
131     DBusThreadSpawn(#[source] io::Error),
132 
133     /// Cannot start D-Bus session
134     #[cfg(feature = "dbus_api")]
135     #[error("Error starting D-Bus session: {0}")]
136     CreateDBusSession(#[source] zbus::Error),
137 
138     /// Cannot create `event-monitor` thread
139     #[error("Error spawning `event-monitor` thread: {0}")]
140     EventMonitorThreadSpawn(#[source] io::Error),
141 
142     /// Cannot handle the VM STDIN stream
143     #[error("Error handling VM stdin: {0:?}")]
144     Stdin(VmError),
145 
146     /// Cannot handle the VM pty stream
147     #[error("Error handling VM pty: {0:?}")]
148     Pty(VmError),
149 
150     /// Cannot reboot the VM
151     #[error("Error rebooting VM: {0:?}")]
152     VmReboot(VmError),
153 
154     /// Cannot create VMM thread
155     #[error("Error spawning VMM thread {0:?}")]
156     VmmThreadSpawn(#[source] io::Error),
157 
158     /// Cannot shut the VMM down
159     #[error("Error shutting down VMM: {0:?}")]
160     VmmShutdown(VmError),
161 
162     /// Cannot create seccomp filter
163     #[error("Error creating seccomp filter: {0}")]
164     CreateSeccompFilter(seccompiler::Error),
165 
166     /// Cannot apply seccomp filter
167     #[error("Error applying seccomp filter: {0}")]
168     ApplySeccompFilter(seccompiler::Error),
169 
170     /// Error activating virtio devices
171     #[error("Error activating virtio devices: {0:?}")]
172     ActivateVirtioDevices(VmError),
173 
174     /// Error creating API server
175     #[error("Error creating API server {0:?}")]
176     CreateApiServer(micro_http::ServerError),
177 
178     /// Error binding API server socket
179     #[error("Error creation API server's socket {0:?}")]
180     CreateApiServerSocket(#[source] io::Error),
181 
182     #[cfg(feature = "guest_debug")]
183     #[error("Failed to start the GDB thread: {0}")]
184     GdbThreadSpawn(io::Error),
185 
186     /// GDB request receive error
187     #[cfg(feature = "guest_debug")]
188     #[error("Error receiving GDB request: {0}")]
189     GdbRequestRecv(#[source] RecvError),
190 
191     /// GDB response send error
192     #[cfg(feature = "guest_debug")]
193     #[error("Error sending GDB request: {0}")]
194     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
195 
196     #[error("Cannot spawn a signal handler thread: {0}")]
197     SignalHandlerSpawn(#[source] io::Error),
198 
199     #[error("Failed to join on threads: {0:?}")]
200     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
201 
202     /// Cannot create Landlock object
203     #[error("Error creating landlock object: {0}")]
204     CreateLandlock(LandlockError),
205 
206     /// Cannot apply landlock based sandboxing
207     #[error("Error applying landlock: {0}")]
208     ApplyLandlock(LandlockError),
209 }
210 pub type Result<T> = result::Result<T, Error>;
211 
212 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
213 #[repr(u64)]
214 pub enum EpollDispatch {
215     Exit = 0,
216     Reset = 1,
217     Api = 2,
218     ActivateVirtioDevices = 3,
219     Debug = 4,
220     Unknown,
221 }
222 
223 impl From<u64> for EpollDispatch {
224     fn from(v: u64) -> Self {
225         use EpollDispatch::*;
226         match v {
227             0 => Exit,
228             1 => Reset,
229             2 => Api,
230             3 => ActivateVirtioDevices,
231             4 => Debug,
232             _ => Unknown,
233         }
234     }
235 }
236 
237 pub struct EpollContext {
238     epoll_file: File,
239 }
240 
241 impl EpollContext {
242     pub fn new() -> result::Result<EpollContext, io::Error> {
243         let epoll_fd = epoll::create(true)?;
244         // Use 'File' to enforce closing on 'epoll_fd'
245         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
246         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
247 
248         Ok(EpollContext { epoll_file })
249     }
250 
251     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
252     where
253         T: AsRawFd,
254     {
255         let dispatch_index = token as u64;
256         epoll::ctl(
257             self.epoll_file.as_raw_fd(),
258             epoll::ControlOptions::EPOLL_CTL_ADD,
259             fd.as_raw_fd(),
260             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
261         )?;
262 
263         Ok(())
264     }
265 
266     #[cfg(fuzzing)]
267     pub fn add_event_custom<T>(
268         &mut self,
269         fd: &T,
270         id: u64,
271         evts: epoll::Events,
272     ) -> result::Result<(), io::Error>
273     where
274         T: AsRawFd,
275     {
276         epoll::ctl(
277             self.epoll_file.as_raw_fd(),
278             epoll::ControlOptions::EPOLL_CTL_ADD,
279             fd.as_raw_fd(),
280             epoll::Event::new(evts, id),
281         )?;
282 
283         Ok(())
284     }
285 }
286 
287 impl AsRawFd for EpollContext {
288     fn as_raw_fd(&self) -> RawFd {
289         self.epoll_file.as_raw_fd()
290     }
291 }
292 
293 pub struct PciDeviceInfo {
294     pub id: String,
295     pub bdf: PciBdf,
296 }
297 
298 impl Serialize for PciDeviceInfo {
299     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
300     where
301         S: Serializer,
302     {
303         let bdf_str = self.bdf.to_string();
304 
305         // Serialize the structure.
306         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
307         state.serialize_field("id", &self.id)?;
308         state.serialize_field("bdf", &bdf_str)?;
309         state.end()
310     }
311 }
312 
313 pub fn feature_list() -> Vec<String> {
314     vec![
315         #[cfg(feature = "dbus_api")]
316         "dbus_api".to_string(),
317         #[cfg(feature = "dhat-heap")]
318         "dhat-heap".to_string(),
319         #[cfg(feature = "guest_debug")]
320         "guest_debug".to_string(),
321         #[cfg(feature = "igvm")]
322         "igvm".to_string(),
323         #[cfg(feature = "io_uring")]
324         "io_uring".to_string(),
325         #[cfg(feature = "kvm")]
326         "kvm".to_string(),
327         #[cfg(feature = "mshv")]
328         "mshv".to_string(),
329         #[cfg(feature = "sev_snp")]
330         "sev_snp".to_string(),
331         #[cfg(feature = "tdx")]
332         "tdx".to_string(),
333         #[cfg(feature = "tracing")]
334         "tracing".to_string(),
335     ]
336 }
337 
338 pub fn start_event_monitor_thread(
339     mut monitor: event_monitor::Monitor,
340     seccomp_action: &SeccompAction,
341     landlock_enable: bool,
342     hypervisor_type: hypervisor::HypervisorType,
343     exit_event: EventFd,
344 ) -> Result<thread::JoinHandle<Result<()>>> {
345     // Retrieve seccomp filter
346     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
347         .map_err(Error::CreateSeccompFilter)?;
348 
349     thread::Builder::new()
350         .name("event-monitor".to_owned())
351         .spawn(move || {
352             // Apply seccomp filter
353             if !seccomp_filter.is_empty() {
354                 apply_filter(&seccomp_filter)
355                     .map_err(Error::ApplySeccompFilter)
356                     .map_err(|e| {
357                         error!("Error applying seccomp filter: {:?}", e);
358                         exit_event.write(1).ok();
359                         e
360                     })?;
361             }
362             if landlock_enable {
363                 Landlock::new()
364                     .map_err(Error::CreateLandlock)?
365                     .restrict_self()
366                     .map_err(Error::ApplyLandlock)
367                     .map_err(|e| {
368                         error!("Error applying landlock to event monitor thread: {:?}", e);
369                         exit_event.write(1).ok();
370                         e
371                     })?;
372             }
373 
374             std::panic::catch_unwind(AssertUnwindSafe(move || {
375                 while let Ok(event) = monitor.rx.recv() {
376                     let event = Arc::new(event);
377 
378                     if let Some(ref mut file) = monitor.file {
379                         file.write_all(event.as_bytes().as_ref()).ok();
380                         file.write_all(b"\n\n").ok();
381                     }
382 
383                     for tx in monitor.broadcast.iter() {
384                         tx.send(event.clone()).ok();
385                     }
386                 }
387             }))
388             .map_err(|_| {
389                 error!("`event-monitor` thread panicked");
390                 exit_event.write(1).ok();
391             })
392             .ok();
393 
394             Ok(())
395         })
396         .map_err(Error::EventMonitorThreadSpawn)
397 }
398 
399 #[allow(unused_variables)]
400 #[allow(clippy::too_many_arguments)]
401 pub fn start_vmm_thread(
402     vmm_version: VmmVersionInfo,
403     http_path: &Option<String>,
404     http_fd: Option<RawFd>,
405     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
406     api_event: EventFd,
407     api_sender: Sender<ApiRequest>,
408     api_receiver: Receiver<ApiRequest>,
409     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
410     #[cfg(feature = "guest_debug")] debug_event: EventFd,
411     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
412     exit_event: EventFd,
413     seccomp_action: &SeccompAction,
414     hypervisor: Arc<dyn hypervisor::Hypervisor>,
415     landlock_enable: bool,
416 ) -> Result<VmmThreadHandle> {
417     #[cfg(feature = "guest_debug")]
418     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
419     #[cfg(feature = "guest_debug")]
420     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
421     #[cfg(feature = "guest_debug")]
422     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
423     #[cfg(feature = "guest_debug")]
424     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
425 
426     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
427     let hypervisor_type = hypervisor.hypervisor_type();
428 
429     // Retrieve seccomp filter
430     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
431         .map_err(Error::CreateSeccompFilter)?;
432 
433     let vmm_seccomp_action = seccomp_action.clone();
434     let thread = {
435         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
436         thread::Builder::new()
437             .name("vmm".to_string())
438             .spawn(move || {
439                 // Apply seccomp filter for VMM thread.
440                 if !vmm_seccomp_filter.is_empty() {
441                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
442                 }
443 
444                 let mut vmm = Vmm::new(
445                     vmm_version,
446                     api_event,
447                     #[cfg(feature = "guest_debug")]
448                     debug_event,
449                     #[cfg(feature = "guest_debug")]
450                     vm_debug_event,
451                     vmm_seccomp_action,
452                     hypervisor,
453                     exit_event,
454                 )?;
455 
456                 vmm.setup_signal_handler(landlock_enable)?;
457 
458                 vmm.control_loop(
459                     Rc::new(api_receiver),
460                     #[cfg(feature = "guest_debug")]
461                     Rc::new(gdb_receiver),
462                 )
463             })
464             .map_err(Error::VmmThreadSpawn)?
465     };
466 
467     // The VMM thread is started, we can start the dbus thread
468     // and start serving HTTP requests
469     #[cfg(feature = "dbus_api")]
470     let dbus_shutdown_chs = match dbus_options {
471         Some(opts) => {
472             let (_, chs) = api::start_dbus_thread(
473                 opts,
474                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
475                 api_sender.clone(),
476                 seccomp_action,
477                 exit_event.try_clone().map_err(Error::EventFdClone)?,
478                 hypervisor_type,
479             )?;
480             Some(chs)
481         }
482         None => None,
483     };
484 
485     let http_api_handle = if let Some(http_path) = http_path {
486         Some(api::start_http_path_thread(
487             http_path,
488             api_event_clone,
489             api_sender,
490             seccomp_action,
491             exit_event,
492             hypervisor_type,
493             landlock_enable,
494         )?)
495     } else if let Some(http_fd) = http_fd {
496         Some(api::start_http_fd_thread(
497             http_fd,
498             api_event_clone,
499             api_sender,
500             seccomp_action,
501             exit_event,
502             hypervisor_type,
503             landlock_enable,
504         )?)
505     } else {
506         None
507     };
508 
509     #[cfg(feature = "guest_debug")]
510     if let Some(debug_path) = debug_path {
511         let target = gdb::GdbStub::new(
512             gdb_sender,
513             gdb_debug_event,
514             gdb_vm_debug_event,
515             gdb_hw_breakpoints,
516         );
517         thread::Builder::new()
518             .name("gdb".to_owned())
519             .spawn(move || gdb::gdb_thread(target, &debug_path))
520             .map_err(Error::GdbThreadSpawn)?;
521     }
522 
523     Ok(VmmThreadHandle {
524         thread_handle: thread,
525         #[cfg(feature = "dbus_api")]
526         dbus_shutdown_chs,
527         http_api_handle,
528     })
529 }
530 
531 #[derive(Clone, Deserialize, Serialize)]
532 struct VmMigrationConfig {
533     vm_config: Arc<Mutex<VmConfig>>,
534     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
535     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
536     memory_manager_data: MemoryManagerSnapshotData,
537 }
538 
539 #[derive(Debug, Clone)]
540 pub struct VmmVersionInfo {
541     pub build_version: String,
542     pub version: String,
543 }
544 
545 impl VmmVersionInfo {
546     pub fn new(build_version: &str, version: &str) -> Self {
547         Self {
548             build_version: build_version.to_owned(),
549             version: version.to_owned(),
550         }
551     }
552 }
553 
554 pub struct VmmThreadHandle {
555     pub thread_handle: thread::JoinHandle<Result<()>>,
556     #[cfg(feature = "dbus_api")]
557     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
558     pub http_api_handle: Option<HttpApiHandle>,
559 }
560 
561 pub struct Vmm {
562     epoll: EpollContext,
563     exit_evt: EventFd,
564     reset_evt: EventFd,
565     api_evt: EventFd,
566     #[cfg(feature = "guest_debug")]
567     debug_evt: EventFd,
568     #[cfg(feature = "guest_debug")]
569     vm_debug_evt: EventFd,
570     version: VmmVersionInfo,
571     vm: Option<Vm>,
572     vm_config: Option<Arc<Mutex<VmConfig>>>,
573     seccomp_action: SeccompAction,
574     hypervisor: Arc<dyn hypervisor::Hypervisor>,
575     activate_evt: EventFd,
576     signals: Option<Handle>,
577     threads: Vec<thread::JoinHandle<()>>,
578     original_termios_opt: Arc<Mutex<Option<termios>>>,
579     console_resize_pipe: Option<Arc<File>>,
580     console_info: Option<ConsoleInfo>,
581 }
582 
583 impl Vmm {
584     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
585 
586     fn signal_handler(
587         mut signals: Signals,
588         original_termios_opt: Arc<Mutex<Option<termios>>>,
589         exit_evt: &EventFd,
590     ) {
591         for sig in &Self::HANDLED_SIGNALS {
592             unblock_signal(*sig).unwrap();
593         }
594 
595         for signal in signals.forever() {
596             match signal {
597                 SIGTERM | SIGINT => {
598                     if exit_evt.write(1).is_err() {
599                         // Resetting the terminal is usually done as the VMM exits
600                         if let Ok(lock) = original_termios_opt.lock() {
601                             if let Some(termios) = *lock {
602                                 // SAFETY: FFI call
603                                 let _ = unsafe {
604                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
605                                 };
606                             }
607                         } else {
608                             warn!("Failed to lock original termios");
609                         }
610 
611                         std::process::exit(1);
612                     }
613                 }
614                 _ => (),
615             }
616         }
617     }
618 
619     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
620         let signals = Signals::new(Self::HANDLED_SIGNALS);
621         match signals {
622             Ok(signals) => {
623                 self.signals = Some(signals.handle());
624                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
625                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
626 
627                 let signal_handler_seccomp_filter = get_seccomp_filter(
628                     &self.seccomp_action,
629                     Thread::SignalHandler,
630                     self.hypervisor.hypervisor_type(),
631                 )
632                 .map_err(Error::CreateSeccompFilter)?;
633                 self.threads.push(
634                     thread::Builder::new()
635                         .name("vmm_signal_handler".to_string())
636                         .spawn(move || {
637                             if !signal_handler_seccomp_filter.is_empty() {
638                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
639                                     .map_err(Error::ApplySeccompFilter)
640                                 {
641                                     error!("Error applying seccomp filter: {:?}", e);
642                                     exit_evt.write(1).ok();
643                                     return;
644                                 }
645                             }
646                             if landlock_enable{
647                                 match Landlock::new() {
648                                     Ok(landlock) => {
649                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
650                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
651                                             exit_evt.write(1).ok();
652                                         });
653                                     }
654                                     Err(e) => {
655                                         error!("Error creating Landlock object: {:?}", e);
656                                         exit_evt.write(1).ok();
657                                     }
658                                 };
659                             }
660 
661                             std::panic::catch_unwind(AssertUnwindSafe(|| {
662                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
663                             }))
664                             .map_err(|_| {
665                                 error!("vmm signal_handler thread panicked");
666                                 exit_evt.write(1).ok()
667                             })
668                             .ok();
669                         })
670                         .map_err(Error::SignalHandlerSpawn)?,
671                 );
672             }
673             Err(e) => error!("Signal not found {}", e),
674         }
675         Ok(())
676     }
677 
678     #[allow(clippy::too_many_arguments)]
679     fn new(
680         vmm_version: VmmVersionInfo,
681         api_evt: EventFd,
682         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
683         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
684         seccomp_action: SeccompAction,
685         hypervisor: Arc<dyn hypervisor::Hypervisor>,
686         exit_evt: EventFd,
687     ) -> Result<Self> {
688         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
689         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
690         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691 
692         epoll
693             .add_event(&exit_evt, EpollDispatch::Exit)
694             .map_err(Error::Epoll)?;
695 
696         epoll
697             .add_event(&reset_evt, EpollDispatch::Reset)
698             .map_err(Error::Epoll)?;
699 
700         epoll
701             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
702             .map_err(Error::Epoll)?;
703 
704         epoll
705             .add_event(&api_evt, EpollDispatch::Api)
706             .map_err(Error::Epoll)?;
707 
708         #[cfg(feature = "guest_debug")]
709         epoll
710             .add_event(&debug_evt, EpollDispatch::Debug)
711             .map_err(Error::Epoll)?;
712 
713         Ok(Vmm {
714             epoll,
715             exit_evt,
716             reset_evt,
717             api_evt,
718             #[cfg(feature = "guest_debug")]
719             debug_evt,
720             #[cfg(feature = "guest_debug")]
721             vm_debug_evt,
722             version: vmm_version,
723             vm: None,
724             vm_config: None,
725             seccomp_action,
726             hypervisor,
727             activate_evt,
728             signals: None,
729             threads: vec![],
730             original_termios_opt: Arc::new(Mutex::new(None)),
731             console_resize_pipe: None,
732             console_info: None,
733         })
734     }
735 
736     fn vm_receive_config<T>(
737         &mut self,
738         req: &Request,
739         socket: &mut T,
740         existing_memory_files: Option<HashMap<u32, File>>,
741     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
742     where
743         T: Read + Write,
744     {
745         // Read in config data along with memory manager data
746         let mut data: Vec<u8> = Vec::new();
747         data.resize_with(req.length() as usize, Default::default);
748         socket
749             .read_exact(&mut data)
750             .map_err(MigratableError::MigrateSocket)?;
751 
752         let vm_migration_config: VmMigrationConfig =
753             serde_json::from_slice(&data).map_err(|e| {
754                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
755             })?;
756 
757         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
758         self.vm_check_cpuid_compatibility(
759             &vm_migration_config.vm_config,
760             &vm_migration_config.common_cpuid,
761         )?;
762 
763         let config = vm_migration_config.vm_config.clone();
764         self.vm_config = Some(vm_migration_config.vm_config);
765         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
766             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
767         })?);
768 
769         if self
770             .vm_config
771             .as_ref()
772             .unwrap()
773             .lock()
774             .unwrap()
775             .landlock_enable
776         {
777             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
778                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
779             })?;
780         }
781 
782         let vm = Vm::create_hypervisor_vm(
783             &self.hypervisor,
784             #[cfg(feature = "tdx")]
785             false,
786             #[cfg(feature = "sev_snp")]
787             false,
788         )
789         .map_err(|e| {
790             MigratableError::MigrateReceive(anyhow!(
791                 "Error creating hypervisor VM from snapshot: {:?}",
792                 e
793             ))
794         })?;
795 
796         let phys_bits =
797             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
798 
799         let memory_manager = MemoryManager::new(
800             vm,
801             &config.lock().unwrap().memory.clone(),
802             None,
803             phys_bits,
804             #[cfg(feature = "tdx")]
805             false,
806             Some(&vm_migration_config.memory_manager_data),
807             existing_memory_files,
808             #[cfg(target_arch = "x86_64")]
809             None,
810         )
811         .map_err(|e| {
812             MigratableError::MigrateReceive(anyhow!(
813                 "Error creating MemoryManager from snapshot: {:?}",
814                 e
815             ))
816         })?;
817 
818         Response::ok().write_to(socket)?;
819 
820         Ok(memory_manager)
821     }
822 
823     fn vm_receive_state<T>(
824         &mut self,
825         req: &Request,
826         socket: &mut T,
827         mm: Arc<Mutex<MemoryManager>>,
828     ) -> std::result::Result<(), MigratableError>
829     where
830         T: Read + Write,
831     {
832         // Read in state data
833         let mut data: Vec<u8> = Vec::new();
834         data.resize_with(req.length() as usize, Default::default);
835         socket
836             .read_exact(&mut data)
837             .map_err(MigratableError::MigrateSocket)?;
838         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
839             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
840         })?;
841 
842         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
843             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
844         })?;
845         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
846             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
847         })?;
848         #[cfg(feature = "guest_debug")]
849         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
850             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
851         })?;
852         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
853             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
854         })?;
855 
856         let timestamp = Instant::now();
857         let hypervisor_vm = mm.lock().unwrap().vm.clone();
858         let mut vm = Vm::new_from_memory_manager(
859             self.vm_config.clone().unwrap(),
860             mm,
861             hypervisor_vm,
862             exit_evt,
863             reset_evt,
864             #[cfg(feature = "guest_debug")]
865             debug_evt,
866             &self.seccomp_action,
867             self.hypervisor.clone(),
868             activate_evt,
869             timestamp,
870             self.console_info.clone(),
871             self.console_resize_pipe.as_ref().map(Arc::clone),
872             Arc::clone(&self.original_termios_opt),
873             Some(snapshot),
874         )
875         .map_err(|e| {
876             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
877         })?;
878 
879         // Create VM
880         vm.restore().map_err(|e| {
881             Response::error().write_to(socket).ok();
882             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
883         })?;
884         self.vm = Some(vm);
885 
886         Response::ok().write_to(socket)?;
887 
888         Ok(())
889     }
890 
891     fn vm_receive_memory<T>(
892         &mut self,
893         req: &Request,
894         socket: &mut T,
895         memory_manager: &mut MemoryManager,
896     ) -> std::result::Result<(), MigratableError>
897     where
898         T: Read + ReadVolatile + Write,
899     {
900         // Read table
901         let table = MemoryRangeTable::read_from(socket, req.length())?;
902 
903         // And then read the memory itself
904         memory_manager
905             .receive_memory_regions(&table, socket)
906             .inspect_err(|_| {
907                 Response::error().write_to(socket).ok();
908             })?;
909         Response::ok().write_to(socket)?;
910         Ok(())
911     }
912 
913     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
914         url.strip_prefix("unix:")
915             .ok_or_else(|| {
916                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
917             })
918             .map(|s| s.into())
919     }
920 
921     // Returns true if there were dirty pages to send
922     fn vm_maybe_send_dirty_pages<T>(
923         vm: &mut Vm,
924         socket: &mut T,
925     ) -> result::Result<bool, MigratableError>
926     where
927         T: Read + Write + WriteVolatile,
928     {
929         // Send (dirty) memory table
930         let table = vm.dirty_log()?;
931 
932         // But if there are no regions go straight to pause
933         if table.regions().is_empty() {
934             return Ok(false);
935         }
936 
937         Request::memory(table.length()).write_to(socket).unwrap();
938         table.write_to(socket)?;
939         // And then the memory itself
940         vm.send_memory_regions(&table, socket)?;
941         Response::read_from(socket)?.ok_or_abandon(
942             socket,
943             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
944         )?;
945 
946         Ok(true)
947     }
948 
949     fn send_migration(
950         vm: &mut Vm,
951         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
952             dyn hypervisor::Hypervisor,
953         >,
954         send_data_migration: VmSendMigrationData,
955     ) -> result::Result<(), MigratableError> {
956         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
957         let mut socket = UnixStream::connect(path).map_err(|e| {
958             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
959         })?;
960 
961         // Start the migration
962         Request::start().write_to(&mut socket)?;
963         Response::read_from(&mut socket)?.ok_or_abandon(
964             &mut socket,
965             MigratableError::MigrateSend(anyhow!("Error starting migration")),
966         )?;
967 
968         // Send config
969         let vm_config = vm.get_config();
970         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
971         let common_cpuid = {
972             #[cfg(feature = "tdx")]
973             if vm_config.lock().unwrap().is_tdx_enabled() {
974                 return Err(MigratableError::MigrateSend(anyhow!(
975                     "Live Migration is not supported when TDX is enabled"
976                 )));
977             };
978 
979             let amx = vm_config.lock().unwrap().cpus.features.amx;
980             let phys_bits =
981                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
982             arch::generate_common_cpuid(
983                 &hypervisor,
984                 &arch::CpuidConfig {
985                     sgx_epc_sections: None,
986                     phys_bits,
987                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
988                     #[cfg(feature = "tdx")]
989                     tdx: false,
990                     amx,
991                 },
992             )
993             .map_err(|e| {
994                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
995             })?
996         };
997 
998         if send_data_migration.local {
999             vm.send_memory_fds(&mut socket)?;
1000         }
1001 
1002         let vm_migration_config = VmMigrationConfig {
1003             vm_config,
1004             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1005             common_cpuid,
1006             memory_manager_data: vm.memory_manager_data(),
1007         };
1008         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1009         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1010         socket
1011             .write_all(&config_data)
1012             .map_err(MigratableError::MigrateSocket)?;
1013         Response::read_from(&mut socket)?.ok_or_abandon(
1014             &mut socket,
1015             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1016         )?;
1017 
1018         // Let every Migratable object know about the migration being started.
1019         vm.start_migration()?;
1020 
1021         if send_data_migration.local {
1022             // Now pause VM
1023             vm.pause()?;
1024         } else {
1025             // Start logging dirty pages
1026             vm.start_dirty_log()?;
1027 
1028             // Send memory table
1029             let table = vm.memory_range_table()?;
1030             Request::memory(table.length())
1031                 .write_to(&mut socket)
1032                 .unwrap();
1033             table.write_to(&mut socket)?;
1034             // And then the memory itself
1035             vm.send_memory_regions(&table, &mut socket)?;
1036             Response::read_from(&mut socket)?.ok_or_abandon(
1037                 &mut socket,
1038                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1039             )?;
1040 
1041             // Try at most 5 passes of dirty memory sending
1042             const MAX_DIRTY_MIGRATIONS: usize = 5;
1043             for i in 0..MAX_DIRTY_MIGRATIONS {
1044                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1045                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1046                     break;
1047                 }
1048             }
1049 
1050             // Now pause VM
1051             vm.pause()?;
1052 
1053             // Send last batch of dirty pages
1054             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1055 
1056             // Stop logging dirty pages
1057             vm.stop_dirty_log()?;
1058         }
1059         // Capture snapshot and send it
1060         let vm_snapshot = vm.snapshot()?;
1061         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1062         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1063         socket
1064             .write_all(&snapshot_data)
1065             .map_err(MigratableError::MigrateSocket)?;
1066         Response::read_from(&mut socket)?.ok_or_abandon(
1067             &mut socket,
1068             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1069         )?;
1070         // Complete the migration
1071         Request::complete().write_to(&mut socket)?;
1072         Response::read_from(&mut socket)?.ok_or_abandon(
1073             &mut socket,
1074             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1075         )?;
1076 
1077         info!("Migration complete");
1078 
1079         // Let every Migratable object know about the migration being complete
1080         vm.complete_migration()
1081     }
1082 
1083     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1084     fn vm_check_cpuid_compatibility(
1085         &self,
1086         src_vm_config: &Arc<Mutex<VmConfig>>,
1087         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1088     ) -> result::Result<(), MigratableError> {
1089         #[cfg(feature = "tdx")]
1090         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1091             return Err(MigratableError::MigrateReceive(anyhow!(
1092                 "Live Migration is not supported when TDX is enabled"
1093             )));
1094         };
1095 
1096         // We check the `CPUID` compatibility of between the source vm and destination, which is
1097         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1098         let dest_cpuid = &{
1099             let vm_config = &src_vm_config.lock().unwrap();
1100 
1101             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1102             arch::generate_common_cpuid(
1103                 &self.hypervisor.clone(),
1104                 &arch::CpuidConfig {
1105                     sgx_epc_sections: None,
1106                     phys_bits,
1107                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1108                     #[cfg(feature = "tdx")]
1109                     tdx: false,
1110                     amx: vm_config.cpus.features.amx,
1111                 },
1112             )
1113             .map_err(|e| {
1114                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1115             })?
1116         };
1117         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1118             MigratableError::MigrateReceive(anyhow!(
1119                 "Error checking cpu feature compatibility': {:?}",
1120                 e
1121             ))
1122         })
1123     }
1124 
1125     fn control_loop(
1126         &mut self,
1127         api_receiver: Rc<Receiver<ApiRequest>>,
1128         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1129     ) -> Result<()> {
1130         const EPOLL_EVENTS_LEN: usize = 100;
1131 
1132         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1133         let epoll_fd = self.epoll.as_raw_fd();
1134 
1135         'outer: loop {
1136             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1137                 Ok(res) => res,
1138                 Err(e) => {
1139                     if e.kind() == io::ErrorKind::Interrupted {
1140                         // It's well defined from the epoll_wait() syscall
1141                         // documentation that the epoll loop can be interrupted
1142                         // before any of the requested events occurred or the
1143                         // timeout expired. In both those cases, epoll_wait()
1144                         // returns an error of type EINTR, but this should not
1145                         // be considered as a regular error. Instead it is more
1146                         // appropriate to retry, by calling into epoll_wait().
1147                         continue;
1148                     }
1149                     return Err(Error::Epoll(e));
1150                 }
1151             };
1152 
1153             for event in events.iter().take(num_events) {
1154                 let dispatch_event: EpollDispatch = event.data.into();
1155                 match dispatch_event {
1156                     EpollDispatch::Unknown => {
1157                         let event = event.data;
1158                         warn!("Unknown VMM loop event: {}", event);
1159                     }
1160                     EpollDispatch::Exit => {
1161                         info!("VM exit event");
1162                         // Consume the event.
1163                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1164                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1165 
1166                         break 'outer;
1167                     }
1168                     EpollDispatch::Reset => {
1169                         info!("VM reset event");
1170                         // Consume the event.
1171                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1172                         self.vm_reboot().map_err(Error::VmReboot)?;
1173                     }
1174                     EpollDispatch::ActivateVirtioDevices => {
1175                         if let Some(ref vm) = self.vm {
1176                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1177                             info!(
1178                                 "Trying to activate pending virtio devices: count = {}",
1179                                 count
1180                             );
1181                             vm.activate_virtio_devices()
1182                                 .map_err(Error::ActivateVirtioDevices)?;
1183                         }
1184                     }
1185                     EpollDispatch::Api => {
1186                         // Consume the events.
1187                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1188                             // Read from the API receiver channel
1189                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1190 
1191                             if api_request(self)? {
1192                                 break 'outer;
1193                             }
1194                         }
1195                     }
1196                     #[cfg(feature = "guest_debug")]
1197                     EpollDispatch::Debug => {
1198                         // Consume the events.
1199                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1200                             // Read from the API receiver channel
1201                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1202 
1203                             let response = if let Some(ref mut vm) = self.vm {
1204                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1205                             } else {
1206                                 Err(VmError::VmNotRunning)
1207                             }
1208                             .map_err(gdb::Error::Vm);
1209 
1210                             gdb_request
1211                                 .sender
1212                                 .send(response)
1213                                 .map_err(Error::GdbResponseSend)?;
1214                         }
1215                     }
1216                     #[cfg(not(feature = "guest_debug"))]
1217                     EpollDispatch::Debug => {}
1218                 }
1219             }
1220         }
1221 
1222         // Trigger the termination of the signal_handler thread
1223         if let Some(signals) = self.signals.take() {
1224             signals.close();
1225         }
1226 
1227         // Wait for all the threads to finish
1228         for thread in self.threads.drain(..) {
1229             thread.join().map_err(Error::ThreadCleanup)?
1230         }
1231 
1232         Ok(())
1233     }
1234 }
1235 
1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1237     vm_config.lock().unwrap().apply_landlock()?;
1238     Ok(())
1239 }
1240 
1241 impl RequestHandler for Vmm {
1242     fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> {
1243         // We only store the passed VM config.
1244         // The VM will be created when being asked to boot it.
1245         if self.vm_config.is_none() {
1246             self.vm_config = Some(config);
1247             self.console_info =
1248                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1249 
1250             if self
1251                 .vm_config
1252                 .as_ref()
1253                 .unwrap()
1254                 .lock()
1255                 .unwrap()
1256                 .landlock_enable
1257             {
1258                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1259                     .map_err(VmError::ApplyLandlock)?;
1260             }
1261             Ok(())
1262         } else {
1263             Err(VmError::VmAlreadyCreated)
1264         }
1265     }
1266 
1267     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1268         tracer::start();
1269         info!("Booting VM");
1270         event!("vm", "booting");
1271         let r = {
1272             trace_scoped!("vm_boot");
1273             // If we don't have a config, we cannot boot a VM.
1274             if self.vm_config.is_none() {
1275                 return Err(VmError::VmMissingConfig);
1276             };
1277 
1278             // console_info is set to None in vm_shutdown. re-populate here if empty
1279             if self.console_info.is_none() {
1280                 self.console_info =
1281                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1282             }
1283 
1284             // Create a new VM if we don't have one yet.
1285             if self.vm.is_none() {
1286                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1287                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1288                 #[cfg(feature = "guest_debug")]
1289                 let vm_debug_evt = self
1290                     .vm_debug_evt
1291                     .try_clone()
1292                     .map_err(VmError::EventFdClone)?;
1293                 let activate_evt = self
1294                     .activate_evt
1295                     .try_clone()
1296                     .map_err(VmError::EventFdClone)?;
1297 
1298                 if let Some(ref vm_config) = self.vm_config {
1299                     let vm = Vm::new(
1300                         Arc::clone(vm_config),
1301                         exit_evt,
1302                         reset_evt,
1303                         #[cfg(feature = "guest_debug")]
1304                         vm_debug_evt,
1305                         &self.seccomp_action,
1306                         self.hypervisor.clone(),
1307                         activate_evt,
1308                         self.console_info.clone(),
1309                         self.console_resize_pipe.as_ref().map(Arc::clone),
1310                         Arc::clone(&self.original_termios_opt),
1311                         None,
1312                         None,
1313                         None,
1314                     )?;
1315 
1316                     self.vm = Some(vm);
1317                 }
1318             }
1319 
1320             // Now we can boot the VM.
1321             if let Some(ref mut vm) = self.vm {
1322                 vm.boot()
1323             } else {
1324                 Err(VmError::VmNotCreated)
1325             }
1326         };
1327         tracer::end();
1328         if r.is_ok() {
1329             event!("vm", "booted");
1330         }
1331         r
1332     }
1333 
1334     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1335         if let Some(ref mut vm) = self.vm {
1336             vm.pause().map_err(VmError::Pause)
1337         } else {
1338             Err(VmError::VmNotRunning)
1339         }
1340     }
1341 
1342     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1343         if let Some(ref mut vm) = self.vm {
1344             vm.resume().map_err(VmError::Resume)
1345         } else {
1346             Err(VmError::VmNotRunning)
1347         }
1348     }
1349 
1350     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1351         if let Some(ref mut vm) = self.vm {
1352             // Drain console_info so that FDs are not reused
1353             let _ = self.console_info.take();
1354             vm.snapshot()
1355                 .map_err(VmError::Snapshot)
1356                 .and_then(|snapshot| {
1357                     vm.send(&snapshot, destination_url)
1358                         .map_err(VmError::SnapshotSend)
1359                 })
1360         } else {
1361             Err(VmError::VmNotRunning)
1362         }
1363     }
1364 
1365     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1366         if self.vm.is_some() || self.vm_config.is_some() {
1367             return Err(VmError::VmAlreadyCreated);
1368         }
1369 
1370         let source_url = restore_cfg.source_url.as_path().to_str();
1371         if source_url.is_none() {
1372             return Err(VmError::InvalidRestoreSourceUrl);
1373         }
1374         // Safe to unwrap as we checked it was Some(&str).
1375         let source_url = source_url.unwrap();
1376 
1377         let vm_config = Arc::new(Mutex::new(
1378             recv_vm_config(source_url).map_err(VmError::Restore)?,
1379         ));
1380         restore_cfg
1381             .validate(&vm_config.lock().unwrap().clone())
1382             .map_err(VmError::ConfigValidation)?;
1383 
1384         // Update VM's net configurations with new fds received for restore operation
1385         if let (Some(restored_nets), Some(vm_net_configs)) =
1386             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1387         {
1388             for net in restored_nets.iter() {
1389                 for net_config in vm_net_configs.iter_mut() {
1390                     // update only if the net dev is backed by FDs
1391                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1392                         net_config.fds.clone_from(&net.fds);
1393                     }
1394                 }
1395             }
1396         }
1397 
1398         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1399         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1400         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1401 
1402         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1403         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1404             .map_err(VmError::Restore)?;
1405 
1406         self.vm_config = Some(Arc::clone(&vm_config));
1407 
1408         // console_info is set to None in vm_snapshot. re-populate here if empty
1409         if self.console_info.is_none() {
1410             self.console_info =
1411                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1412         }
1413 
1414         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1415         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1416         #[cfg(feature = "guest_debug")]
1417         let debug_evt = self
1418             .vm_debug_evt
1419             .try_clone()
1420             .map_err(VmError::EventFdClone)?;
1421         let activate_evt = self
1422             .activate_evt
1423             .try_clone()
1424             .map_err(VmError::EventFdClone)?;
1425 
1426         let vm = Vm::new(
1427             vm_config,
1428             exit_evt,
1429             reset_evt,
1430             #[cfg(feature = "guest_debug")]
1431             debug_evt,
1432             &self.seccomp_action,
1433             self.hypervisor.clone(),
1434             activate_evt,
1435             self.console_info.clone(),
1436             self.console_resize_pipe.as_ref().map(Arc::clone),
1437             Arc::clone(&self.original_termios_opt),
1438             Some(snapshot),
1439             Some(source_url),
1440             Some(restore_cfg.prefault),
1441         )?;
1442         self.vm = Some(vm);
1443 
1444         if self
1445             .vm_config
1446             .as_ref()
1447             .unwrap()
1448             .lock()
1449             .unwrap()
1450             .landlock_enable
1451         {
1452             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1453                 .map_err(VmError::ApplyLandlock)?;
1454         }
1455 
1456         // Now we can restore the rest of the VM.
1457         if let Some(ref mut vm) = self.vm {
1458             vm.restore()
1459         } else {
1460             Err(VmError::VmNotCreated)
1461         }
1462     }
1463 
1464     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1465     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1466         if let Some(ref mut vm) = self.vm {
1467             vm.coredump(destination_url).map_err(VmError::Coredump)
1468         } else {
1469             Err(VmError::VmNotRunning)
1470         }
1471     }
1472 
1473     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1474         let r = if let Some(ref mut vm) = self.vm.take() {
1475             // Drain console_info so that the FDs are not reused
1476             let _ = self.console_info.take();
1477             vm.shutdown()
1478         } else {
1479             Err(VmError::VmNotRunning)
1480         };
1481 
1482         if r.is_ok() {
1483             event!("vm", "shutdown");
1484         }
1485 
1486         r
1487     }
1488 
1489     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1490         event!("vm", "rebooting");
1491 
1492         // First we stop the current VM
1493         let (config, console_resize_pipe) = if let Some(mut vm) = self.vm.take() {
1494             let config = vm.get_config();
1495             let console_resize_pipe = vm.console_resize_pipe();
1496             vm.shutdown()?;
1497             (config, console_resize_pipe)
1498         } else {
1499             return Err(VmError::VmNotCreated);
1500         };
1501 
1502         // vm.shutdown() closes all the console devices, so set console_info to None
1503         // so that the closed FD #s are not reused.
1504         let _ = self.console_info.take();
1505 
1506         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1507         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1508         #[cfg(feature = "guest_debug")]
1509         let debug_evt = self
1510             .vm_debug_evt
1511             .try_clone()
1512             .map_err(VmError::EventFdClone)?;
1513         let activate_evt = self
1514             .activate_evt
1515             .try_clone()
1516             .map_err(VmError::EventFdClone)?;
1517 
1518         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1519         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1520         // during the boot process.
1521         if self.reset_evt.read().is_ok() {
1522             warn!("Spurious second reset event received. Ignoring.");
1523         }
1524 
1525         self.console_info =
1526             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1527 
1528         // Then we create the new VM
1529         let mut vm = Vm::new(
1530             config,
1531             exit_evt,
1532             reset_evt,
1533             #[cfg(feature = "guest_debug")]
1534             debug_evt,
1535             &self.seccomp_action,
1536             self.hypervisor.clone(),
1537             activate_evt,
1538             self.console_info.clone(),
1539             console_resize_pipe,
1540             Arc::clone(&self.original_termios_opt),
1541             None,
1542             None,
1543             None,
1544         )?;
1545 
1546         // And we boot it
1547         vm.boot()?;
1548 
1549         self.vm = Some(vm);
1550 
1551         event!("vm", "rebooted");
1552 
1553         Ok(())
1554     }
1555 
1556     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1557         match &self.vm_config {
1558             Some(config) => {
1559                 let state = match &self.vm {
1560                     Some(vm) => vm.get_state()?,
1561                     None => VmState::Created,
1562                 };
1563 
1564                 let config = Arc::clone(config);
1565 
1566                 let mut memory_actual_size = config.lock().unwrap().memory.total_size();
1567                 if let Some(vm) = &self.vm {
1568                     memory_actual_size -= vm.balloon_size();
1569                 }
1570 
1571                 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree());
1572 
1573                 Ok(VmInfoResponse {
1574                     config,
1575                     state,
1576                     memory_actual_size,
1577                     device_tree,
1578                 })
1579             }
1580             None => Err(VmError::VmNotCreated),
1581         }
1582     }
1583 
1584     fn vmm_ping(&self) -> VmmPingResponse {
1585         let VmmVersionInfo {
1586             build_version,
1587             version,
1588         } = self.version.clone();
1589 
1590         VmmPingResponse {
1591             build_version,
1592             version,
1593             pid: std::process::id() as i64,
1594             features: feature_list(),
1595         }
1596     }
1597 
1598     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1599         if self.vm_config.is_none() {
1600             return Ok(());
1601         }
1602 
1603         // If a VM is booted, we first try to shut it down.
1604         if self.vm.is_some() {
1605             self.vm_shutdown()?;
1606         }
1607 
1608         self.vm_config = None;
1609 
1610         event!("vm", "deleted");
1611 
1612         Ok(())
1613     }
1614 
1615     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1616         self.vm_delete()?;
1617         event!("vmm", "shutdown");
1618         Ok(())
1619     }
1620 
1621     fn vm_resize(
1622         &mut self,
1623         desired_vcpus: Option<u8>,
1624         desired_ram: Option<u64>,
1625         desired_balloon: Option<u64>,
1626     ) -> result::Result<(), VmError> {
1627         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1628 
1629         if let Some(ref mut vm) = self.vm {
1630             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1631                 error!("Error when resizing VM: {:?}", e);
1632                 Err(e)
1633             } else {
1634                 Ok(())
1635             }
1636         } else {
1637             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1638             if let Some(desired_vcpus) = desired_vcpus {
1639                 config.cpus.boot_vcpus = desired_vcpus;
1640             }
1641             if let Some(desired_ram) = desired_ram {
1642                 config.memory.size = desired_ram;
1643             }
1644             if let Some(desired_balloon) = desired_balloon {
1645                 if let Some(balloon_config) = &mut config.balloon {
1646                     balloon_config.size = desired_balloon;
1647                 }
1648             }
1649             Ok(())
1650         }
1651     }
1652 
1653     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1654         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1655 
1656         if let Some(ref mut vm) = self.vm {
1657             if let Err(e) = vm.resize_zone(id, desired_ram) {
1658                 error!("Error when resizing VM: {:?}", e);
1659                 Err(e)
1660             } else {
1661                 Ok(())
1662             }
1663         } else {
1664             // Update VmConfig by setting the new desired ram.
1665             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1666 
1667             if let Some(zones) = &mut memory_config.zones {
1668                 for zone in zones.iter_mut() {
1669                     if zone.id == id {
1670                         zone.size = desired_ram;
1671                         return Ok(());
1672                     }
1673                 }
1674             }
1675 
1676             error!("Could not find the memory zone {} for the resize", id);
1677             Err(VmError::ResizeZone)
1678         }
1679     }
1680 
1681     fn vm_add_device(
1682         &mut self,
1683         device_cfg: DeviceConfig,
1684     ) -> result::Result<Option<Vec<u8>>, VmError> {
1685         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1686 
1687         {
1688             // Validate the configuration change in a cloned configuration
1689             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1690             add_to_config(&mut config.devices, device_cfg.clone());
1691             config.validate().map_err(VmError::ConfigValidation)?;
1692         }
1693 
1694         if let Some(ref mut vm) = self.vm {
1695             let info = vm.add_device(device_cfg).map_err(|e| {
1696                 error!("Error when adding new device to the VM: {:?}", e);
1697                 e
1698             })?;
1699             serde_json::to_vec(&info)
1700                 .map(Some)
1701                 .map_err(VmError::SerializeJson)
1702         } else {
1703             // Update VmConfig by adding the new device.
1704             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1705             add_to_config(&mut config.devices, device_cfg);
1706             Ok(None)
1707         }
1708     }
1709 
1710     fn vm_add_user_device(
1711         &mut self,
1712         device_cfg: UserDeviceConfig,
1713     ) -> result::Result<Option<Vec<u8>>, VmError> {
1714         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1715 
1716         {
1717             // Validate the configuration change in a cloned configuration
1718             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1719             add_to_config(&mut config.user_devices, device_cfg.clone());
1720             config.validate().map_err(VmError::ConfigValidation)?;
1721         }
1722 
1723         if let Some(ref mut vm) = self.vm {
1724             let info = vm.add_user_device(device_cfg).map_err(|e| {
1725                 error!("Error when adding new user device to the VM: {:?}", e);
1726                 e
1727             })?;
1728             serde_json::to_vec(&info)
1729                 .map(Some)
1730                 .map_err(VmError::SerializeJson)
1731         } else {
1732             // Update VmConfig by adding the new device.
1733             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1734             add_to_config(&mut config.user_devices, device_cfg);
1735             Ok(None)
1736         }
1737     }
1738 
1739     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1740         if let Some(ref mut vm) = self.vm {
1741             if let Err(e) = vm.remove_device(id) {
1742                 error!("Error when removing device from the VM: {:?}", e);
1743                 Err(e)
1744             } else {
1745                 Ok(())
1746             }
1747         } else if let Some(ref config) = self.vm_config {
1748             let mut config = config.lock().unwrap();
1749             if config.remove_device(&id) {
1750                 Ok(())
1751             } else {
1752                 Err(VmError::NoDeviceToRemove(id))
1753             }
1754         } else {
1755             Err(VmError::VmNotCreated)
1756         }
1757     }
1758 
1759     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1760         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1761 
1762         {
1763             // Validate the configuration change in a cloned configuration
1764             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1765             add_to_config(&mut config.disks, disk_cfg.clone());
1766             config.validate().map_err(VmError::ConfigValidation)?;
1767         }
1768 
1769         if let Some(ref mut vm) = self.vm {
1770             let info = vm.add_disk(disk_cfg).map_err(|e| {
1771                 error!("Error when adding new disk to the VM: {:?}", e);
1772                 e
1773             })?;
1774             serde_json::to_vec(&info)
1775                 .map(Some)
1776                 .map_err(VmError::SerializeJson)
1777         } else {
1778             // Update VmConfig by adding the new device.
1779             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1780             add_to_config(&mut config.disks, disk_cfg);
1781             Ok(None)
1782         }
1783     }
1784 
1785     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1786         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1787 
1788         {
1789             // Validate the configuration change in a cloned configuration
1790             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1791             add_to_config(&mut config.fs, fs_cfg.clone());
1792             config.validate().map_err(VmError::ConfigValidation)?;
1793         }
1794 
1795         if let Some(ref mut vm) = self.vm {
1796             let info = vm.add_fs(fs_cfg).map_err(|e| {
1797                 error!("Error when adding new fs to the VM: {:?}", e);
1798                 e
1799             })?;
1800             serde_json::to_vec(&info)
1801                 .map(Some)
1802                 .map_err(VmError::SerializeJson)
1803         } else {
1804             // Update VmConfig by adding the new device.
1805             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1806             add_to_config(&mut config.fs, fs_cfg);
1807             Ok(None)
1808         }
1809     }
1810 
1811     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1812         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1813 
1814         {
1815             // Validate the configuration change in a cloned configuration
1816             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1817             add_to_config(&mut config.pmem, pmem_cfg.clone());
1818             config.validate().map_err(VmError::ConfigValidation)?;
1819         }
1820 
1821         if let Some(ref mut vm) = self.vm {
1822             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1823                 error!("Error when adding new pmem device to the VM: {:?}", e);
1824                 e
1825             })?;
1826             serde_json::to_vec(&info)
1827                 .map(Some)
1828                 .map_err(VmError::SerializeJson)
1829         } else {
1830             // Update VmConfig by adding the new device.
1831             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1832             add_to_config(&mut config.pmem, pmem_cfg);
1833             Ok(None)
1834         }
1835     }
1836 
1837     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1838         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1839 
1840         {
1841             // Validate the configuration change in a cloned configuration
1842             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1843             add_to_config(&mut config.net, net_cfg.clone());
1844             config.validate().map_err(VmError::ConfigValidation)?;
1845         }
1846 
1847         if let Some(ref mut vm) = self.vm {
1848             let info = vm.add_net(net_cfg).map_err(|e| {
1849                 error!("Error when adding new network device to the VM: {:?}", e);
1850                 e
1851             })?;
1852             serde_json::to_vec(&info)
1853                 .map(Some)
1854                 .map_err(VmError::SerializeJson)
1855         } else {
1856             // Update VmConfig by adding the new device.
1857             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1858             add_to_config(&mut config.net, net_cfg);
1859             Ok(None)
1860         }
1861     }
1862 
1863     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1864         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1865 
1866         {
1867             // Validate the configuration change in a cloned configuration
1868             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1869             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1870             config.validate().map_err(VmError::ConfigValidation)?;
1871         }
1872 
1873         if let Some(ref mut vm) = self.vm {
1874             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1875                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1876                 e
1877             })?;
1878             serde_json::to_vec(&info)
1879                 .map(Some)
1880                 .map_err(VmError::SerializeJson)
1881         } else {
1882             // Update VmConfig by adding the new device.
1883             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1884             add_to_config(&mut config.vdpa, vdpa_cfg);
1885             Ok(None)
1886         }
1887     }
1888 
1889     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1890         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1891 
1892         {
1893             // Validate the configuration change in a cloned configuration
1894             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1895 
1896             if config.vsock.is_some() {
1897                 return Err(VmError::TooManyVsockDevices);
1898             }
1899 
1900             config.vsock = Some(vsock_cfg.clone());
1901             config.validate().map_err(VmError::ConfigValidation)?;
1902         }
1903 
1904         if let Some(ref mut vm) = self.vm {
1905             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1906                 error!("Error when adding new vsock device to the VM: {:?}", e);
1907                 e
1908             })?;
1909             serde_json::to_vec(&info)
1910                 .map(Some)
1911                 .map_err(VmError::SerializeJson)
1912         } else {
1913             // Update VmConfig by adding the new device.
1914             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1915             config.vsock = Some(vsock_cfg);
1916             Ok(None)
1917         }
1918     }
1919 
1920     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1921         if let Some(ref mut vm) = self.vm {
1922             let info = vm.counters().map_err(|e| {
1923                 error!("Error when getting counters from the VM: {:?}", e);
1924                 e
1925             })?;
1926             serde_json::to_vec(&info)
1927                 .map(Some)
1928                 .map_err(VmError::SerializeJson)
1929         } else {
1930             Err(VmError::VmNotRunning)
1931         }
1932     }
1933 
1934     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1935         if let Some(ref mut vm) = self.vm {
1936             vm.power_button()
1937         } else {
1938             Err(VmError::VmNotRunning)
1939         }
1940     }
1941 
1942     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1943         if let Some(ref mut vm) = self.vm {
1944             vm.nmi()
1945         } else {
1946             Err(VmError::VmNotRunning)
1947         }
1948     }
1949 
1950     fn vm_receive_migration(
1951         &mut self,
1952         receive_data_migration: VmReceiveMigrationData,
1953     ) -> result::Result<(), MigratableError> {
1954         info!(
1955             "Receiving migration: receiver_url = {}",
1956             receive_data_migration.receiver_url
1957         );
1958 
1959         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1960         let listener = UnixListener::bind(&path).map_err(|e| {
1961             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1962         })?;
1963         let (mut socket, _addr) = listener.accept().map_err(|e| {
1964             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1965         })?;
1966         std::fs::remove_file(&path).map_err(|e| {
1967             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1968         })?;
1969 
1970         let mut started = false;
1971         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1972         let mut existing_memory_files = None;
1973         loop {
1974             let req = Request::read_from(&mut socket)?;
1975             match req.command() {
1976                 Command::Invalid => info!("Invalid Command Received"),
1977                 Command::Start => {
1978                     info!("Start Command Received");
1979                     started = true;
1980 
1981                     Response::ok().write_to(&mut socket)?;
1982                 }
1983                 Command::Config => {
1984                     info!("Config Command Received");
1985 
1986                     if !started {
1987                         warn!("Migration not started yet");
1988                         Response::error().write_to(&mut socket)?;
1989                         continue;
1990                     }
1991                     memory_manager = Some(self.vm_receive_config(
1992                         &req,
1993                         &mut socket,
1994                         existing_memory_files.take(),
1995                     )?);
1996                 }
1997                 Command::State => {
1998                     info!("State Command Received");
1999 
2000                     if !started {
2001                         warn!("Migration not started yet");
2002                         Response::error().write_to(&mut socket)?;
2003                         continue;
2004                     }
2005                     if let Some(mm) = memory_manager.take() {
2006                         self.vm_receive_state(&req, &mut socket, mm)?;
2007                     } else {
2008                         warn!("Configuration not sent yet");
2009                         Response::error().write_to(&mut socket)?;
2010                     }
2011                 }
2012                 Command::Memory => {
2013                     info!("Memory Command Received");
2014 
2015                     if !started {
2016                         warn!("Migration not started yet");
2017                         Response::error().write_to(&mut socket)?;
2018                         continue;
2019                     }
2020                     if let Some(mm) = memory_manager.as_ref() {
2021                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2022                     } else {
2023                         warn!("Configuration not sent yet");
2024                         Response::error().write_to(&mut socket)?;
2025                     }
2026                 }
2027                 Command::MemoryFd => {
2028                     info!("MemoryFd Command Received");
2029 
2030                     if !started {
2031                         warn!("Migration not started yet");
2032                         Response::error().write_to(&mut socket)?;
2033                         continue;
2034                     }
2035 
2036                     let mut buf = [0u8; 4];
2037                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2038                         MigratableError::MigrateReceive(anyhow!(
2039                             "Error receiving slot from socket: {}",
2040                             e
2041                         ))
2042                     })?;
2043 
2044                     if existing_memory_files.is_none() {
2045                         existing_memory_files = Some(HashMap::default())
2046                     }
2047 
2048                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2049                         let slot = u32::from_le_bytes(buf);
2050                         existing_memory_files.insert(slot, file.unwrap());
2051                     }
2052 
2053                     Response::ok().write_to(&mut socket)?;
2054                 }
2055                 Command::Complete => {
2056                     info!("Complete Command Received");
2057                     if let Some(ref mut vm) = self.vm.as_mut() {
2058                         vm.resume()?;
2059                         Response::ok().write_to(&mut socket)?;
2060                     } else {
2061                         warn!("VM not created yet");
2062                         Response::error().write_to(&mut socket)?;
2063                     }
2064                     break;
2065                 }
2066                 Command::Abandon => {
2067                     info!("Abandon Command Received");
2068                     self.vm = None;
2069                     self.vm_config = None;
2070                     Response::ok().write_to(&mut socket).ok();
2071                     break;
2072                 }
2073             }
2074         }
2075 
2076         Ok(())
2077     }
2078 
2079     fn vm_send_migration(
2080         &mut self,
2081         send_data_migration: VmSendMigrationData,
2082     ) -> result::Result<(), MigratableError> {
2083         info!(
2084             "Sending migration: destination_url = {}, local = {}",
2085             send_data_migration.destination_url, send_data_migration.local
2086         );
2087 
2088         if !self
2089             .vm_config
2090             .as_ref()
2091             .unwrap()
2092             .lock()
2093             .unwrap()
2094             .backed_by_shared_memory()
2095             && send_data_migration.local
2096         {
2097             return Err(MigratableError::MigrateSend(anyhow!(
2098                 "Local migration requires shared memory or hugepages enabled"
2099             )));
2100         }
2101 
2102         if let Some(vm) = self.vm.as_mut() {
2103             Self::send_migration(
2104                 vm,
2105                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2106                 self.hypervisor.clone(),
2107                 send_data_migration,
2108             )
2109             .map_err(|migration_err| {
2110                 error!("Migration failed: {:?}", migration_err);
2111 
2112                 // Stop logging dirty pages
2113                 if let Err(e) = vm.stop_dirty_log() {
2114                     return e;
2115                 }
2116 
2117                 if vm.get_state().unwrap() == VmState::Paused {
2118                     if let Err(e) = vm.resume() {
2119                         return e;
2120                     }
2121                 }
2122 
2123                 migration_err
2124             })?;
2125 
2126             // Shutdown the VM after the migration succeeded
2127             self.exit_evt.write(1).map_err(|e| {
2128                 MigratableError::MigrateSend(anyhow!(
2129                     "Failed shutting down the VM after migration: {:?}",
2130                     e
2131                 ))
2132             })
2133         } else {
2134             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2135         }
2136     }
2137 }
2138 
2139 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2140 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2141 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2142 
2143 #[cfg(test)]
2144 mod unit_tests {
2145     use super::*;
2146     #[cfg(target_arch = "x86_64")]
2147     use crate::config::DebugConsoleConfig;
2148     use config::{
2149         ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig,
2150         RngConfig,
2151     };
2152 
2153     fn create_dummy_vmm() -> Vmm {
2154         Vmm::new(
2155             VmmVersionInfo::new("dummy", "dummy"),
2156             EventFd::new(EFD_NONBLOCK).unwrap(),
2157             #[cfg(feature = "guest_debug")]
2158             EventFd::new(EFD_NONBLOCK).unwrap(),
2159             #[cfg(feature = "guest_debug")]
2160             EventFd::new(EFD_NONBLOCK).unwrap(),
2161             SeccompAction::Allow,
2162             hypervisor::new().unwrap(),
2163             EventFd::new(EFD_NONBLOCK).unwrap(),
2164         )
2165         .unwrap()
2166     }
2167 
2168     fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> {
2169         Arc::new(Mutex::new(VmConfig {
2170             cpus: CpusConfig {
2171                 boot_vcpus: 1,
2172                 max_vcpus: 1,
2173                 topology: None,
2174                 kvm_hyperv: false,
2175                 max_phys_bits: 46,
2176                 affinity: None,
2177                 features: config::CpuFeatures::default(),
2178             },
2179             memory: MemoryConfig {
2180                 size: 536_870_912,
2181                 mergeable: false,
2182                 hotplug_method: HotplugMethod::Acpi,
2183                 hotplug_size: None,
2184                 hotplugged_size: None,
2185                 shared: true,
2186                 hugepages: false,
2187                 hugepage_size: None,
2188                 prefault: false,
2189                 zones: None,
2190                 thp: true,
2191             },
2192             payload: Some(PayloadConfig {
2193                 kernel: Some(PathBuf::from("/path/to/kernel")),
2194                 firmware: None,
2195                 cmdline: None,
2196                 initramfs: None,
2197                 #[cfg(feature = "igvm")]
2198                 igvm: None,
2199                 #[cfg(feature = "sev_snp")]
2200                 host_data: None,
2201             }),
2202             rate_limit_groups: None,
2203             disks: None,
2204             net: None,
2205             rng: RngConfig {
2206                 src: PathBuf::from("/dev/urandom"),
2207                 iommu: false,
2208             },
2209             balloon: None,
2210             fs: None,
2211             pmem: None,
2212             serial: ConsoleConfig {
2213                 file: None,
2214                 mode: ConsoleOutputMode::Null,
2215                 iommu: false,
2216                 socket: None,
2217             },
2218             console: ConsoleConfig {
2219                 file: None,
2220                 mode: ConsoleOutputMode::Tty,
2221                 iommu: false,
2222                 socket: None,
2223             },
2224             #[cfg(target_arch = "x86_64")]
2225             debug_console: DebugConsoleConfig::default(),
2226             devices: None,
2227             user_devices: None,
2228             vdpa: None,
2229             vsock: None,
2230             #[cfg(feature = "pvmemcontrol")]
2231             pvmemcontrol: None,
2232             pvpanic: false,
2233             iommu: false,
2234             #[cfg(target_arch = "x86_64")]
2235             sgx_epc: None,
2236             numa: None,
2237             watchdog: false,
2238             #[cfg(feature = "guest_debug")]
2239             gdb: false,
2240             pci_segments: None,
2241             platform: None,
2242             tpm: None,
2243             preserved_fds: None,
2244             landlock_enable: false,
2245             landlock_rules: None,
2246         }))
2247     }
2248 
2249     #[test]
2250     fn test_vmm_vm_create() {
2251         let mut vmm = create_dummy_vmm();
2252         let config = create_dummy_vm_config();
2253 
2254         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2255         assert!(matches!(
2256             vmm.vm_create(config),
2257             Err(VmError::VmAlreadyCreated)
2258         ));
2259     }
2260 
2261     #[test]
2262     fn test_vmm_vm_cold_add_device() {
2263         let mut vmm = create_dummy_vmm();
2264         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2265 
2266         assert!(matches!(
2267             vmm.vm_add_device(device_config.clone()),
2268             Err(VmError::VmNotCreated)
2269         ));
2270 
2271         let _ = vmm.vm_create(create_dummy_vm_config());
2272         assert!(vmm
2273             .vm_config
2274             .as_ref()
2275             .unwrap()
2276             .lock()
2277             .unwrap()
2278             .devices
2279             .is_none());
2280 
2281         let result = vmm.vm_add_device(device_config.clone());
2282         assert!(result.is_ok());
2283         assert!(result.unwrap().is_none());
2284         assert_eq!(
2285             vmm.vm_config
2286                 .as_ref()
2287                 .unwrap()
2288                 .lock()
2289                 .unwrap()
2290                 .devices
2291                 .clone()
2292                 .unwrap()
2293                 .len(),
2294             1
2295         );
2296         assert_eq!(
2297             vmm.vm_config
2298                 .as_ref()
2299                 .unwrap()
2300                 .lock()
2301                 .unwrap()
2302                 .devices
2303                 .clone()
2304                 .unwrap()[0],
2305             device_config
2306         );
2307     }
2308 
2309     #[test]
2310     fn test_vmm_vm_cold_add_user_device() {
2311         let mut vmm = create_dummy_vmm();
2312         let user_device_config =
2313             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2314 
2315         assert!(matches!(
2316             vmm.vm_add_user_device(user_device_config.clone()),
2317             Err(VmError::VmNotCreated)
2318         ));
2319 
2320         let _ = vmm.vm_create(create_dummy_vm_config());
2321         assert!(vmm
2322             .vm_config
2323             .as_ref()
2324             .unwrap()
2325             .lock()
2326             .unwrap()
2327             .user_devices
2328             .is_none());
2329 
2330         let result = vmm.vm_add_user_device(user_device_config.clone());
2331         assert!(result.is_ok());
2332         assert!(result.unwrap().is_none());
2333         assert_eq!(
2334             vmm.vm_config
2335                 .as_ref()
2336                 .unwrap()
2337                 .lock()
2338                 .unwrap()
2339                 .user_devices
2340                 .clone()
2341                 .unwrap()
2342                 .len(),
2343             1
2344         );
2345         assert_eq!(
2346             vmm.vm_config
2347                 .as_ref()
2348                 .unwrap()
2349                 .lock()
2350                 .unwrap()
2351                 .user_devices
2352                 .clone()
2353                 .unwrap()[0],
2354             user_device_config
2355         );
2356     }
2357 
2358     #[test]
2359     fn test_vmm_vm_cold_add_disk() {
2360         let mut vmm = create_dummy_vmm();
2361         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2362 
2363         assert!(matches!(
2364             vmm.vm_add_disk(disk_config.clone()),
2365             Err(VmError::VmNotCreated)
2366         ));
2367 
2368         let _ = vmm.vm_create(create_dummy_vm_config());
2369         assert!(vmm
2370             .vm_config
2371             .as_ref()
2372             .unwrap()
2373             .lock()
2374             .unwrap()
2375             .disks
2376             .is_none());
2377 
2378         let result = vmm.vm_add_disk(disk_config.clone());
2379         assert!(result.is_ok());
2380         assert!(result.unwrap().is_none());
2381         assert_eq!(
2382             vmm.vm_config
2383                 .as_ref()
2384                 .unwrap()
2385                 .lock()
2386                 .unwrap()
2387                 .disks
2388                 .clone()
2389                 .unwrap()
2390                 .len(),
2391             1
2392         );
2393         assert_eq!(
2394             vmm.vm_config
2395                 .as_ref()
2396                 .unwrap()
2397                 .lock()
2398                 .unwrap()
2399                 .disks
2400                 .clone()
2401                 .unwrap()[0],
2402             disk_config
2403         );
2404     }
2405 
2406     #[test]
2407     fn test_vmm_vm_cold_add_fs() {
2408         let mut vmm = create_dummy_vmm();
2409         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2410 
2411         assert!(matches!(
2412             vmm.vm_add_fs(fs_config.clone()),
2413             Err(VmError::VmNotCreated)
2414         ));
2415 
2416         let _ = vmm.vm_create(create_dummy_vm_config());
2417         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2418 
2419         let result = vmm.vm_add_fs(fs_config.clone());
2420         assert!(result.is_ok());
2421         assert!(result.unwrap().is_none());
2422         assert_eq!(
2423             vmm.vm_config
2424                 .as_ref()
2425                 .unwrap()
2426                 .lock()
2427                 .unwrap()
2428                 .fs
2429                 .clone()
2430                 .unwrap()
2431                 .len(),
2432             1
2433         );
2434         assert_eq!(
2435             vmm.vm_config
2436                 .as_ref()
2437                 .unwrap()
2438                 .lock()
2439                 .unwrap()
2440                 .fs
2441                 .clone()
2442                 .unwrap()[0],
2443             fs_config
2444         );
2445     }
2446 
2447     #[test]
2448     fn test_vmm_vm_cold_add_pmem() {
2449         let mut vmm = create_dummy_vmm();
2450         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2451 
2452         assert!(matches!(
2453             vmm.vm_add_pmem(pmem_config.clone()),
2454             Err(VmError::VmNotCreated)
2455         ));
2456 
2457         let _ = vmm.vm_create(create_dummy_vm_config());
2458         assert!(vmm
2459             .vm_config
2460             .as_ref()
2461             .unwrap()
2462             .lock()
2463             .unwrap()
2464             .pmem
2465             .is_none());
2466 
2467         let result = vmm.vm_add_pmem(pmem_config.clone());
2468         assert!(result.is_ok());
2469         assert!(result.unwrap().is_none());
2470         assert_eq!(
2471             vmm.vm_config
2472                 .as_ref()
2473                 .unwrap()
2474                 .lock()
2475                 .unwrap()
2476                 .pmem
2477                 .clone()
2478                 .unwrap()
2479                 .len(),
2480             1
2481         );
2482         assert_eq!(
2483             vmm.vm_config
2484                 .as_ref()
2485                 .unwrap()
2486                 .lock()
2487                 .unwrap()
2488                 .pmem
2489                 .clone()
2490                 .unwrap()[0],
2491             pmem_config
2492         );
2493     }
2494 
2495     #[test]
2496     fn test_vmm_vm_cold_add_net() {
2497         let mut vmm = create_dummy_vmm();
2498         let net_config = NetConfig::parse(
2499             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2500         )
2501         .unwrap();
2502 
2503         assert!(matches!(
2504             vmm.vm_add_net(net_config.clone()),
2505             Err(VmError::VmNotCreated)
2506         ));
2507 
2508         let _ = vmm.vm_create(create_dummy_vm_config());
2509         assert!(vmm
2510             .vm_config
2511             .as_ref()
2512             .unwrap()
2513             .lock()
2514             .unwrap()
2515             .net
2516             .is_none());
2517 
2518         let result = vmm.vm_add_net(net_config.clone());
2519         assert!(result.is_ok());
2520         assert!(result.unwrap().is_none());
2521         assert_eq!(
2522             vmm.vm_config
2523                 .as_ref()
2524                 .unwrap()
2525                 .lock()
2526                 .unwrap()
2527                 .net
2528                 .clone()
2529                 .unwrap()
2530                 .len(),
2531             1
2532         );
2533         assert_eq!(
2534             vmm.vm_config
2535                 .as_ref()
2536                 .unwrap()
2537                 .lock()
2538                 .unwrap()
2539                 .net
2540                 .clone()
2541                 .unwrap()[0],
2542             net_config
2543         );
2544     }
2545 
2546     #[test]
2547     fn test_vmm_vm_cold_add_vdpa() {
2548         let mut vmm = create_dummy_vmm();
2549         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2550 
2551         assert!(matches!(
2552             vmm.vm_add_vdpa(vdpa_config.clone()),
2553             Err(VmError::VmNotCreated)
2554         ));
2555 
2556         let _ = vmm.vm_create(create_dummy_vm_config());
2557         assert!(vmm
2558             .vm_config
2559             .as_ref()
2560             .unwrap()
2561             .lock()
2562             .unwrap()
2563             .vdpa
2564             .is_none());
2565 
2566         let result = vmm.vm_add_vdpa(vdpa_config.clone());
2567         assert!(result.is_ok());
2568         assert!(result.unwrap().is_none());
2569         assert_eq!(
2570             vmm.vm_config
2571                 .as_ref()
2572                 .unwrap()
2573                 .lock()
2574                 .unwrap()
2575                 .vdpa
2576                 .clone()
2577                 .unwrap()
2578                 .len(),
2579             1
2580         );
2581         assert_eq!(
2582             vmm.vm_config
2583                 .as_ref()
2584                 .unwrap()
2585                 .lock()
2586                 .unwrap()
2587                 .vdpa
2588                 .clone()
2589                 .unwrap()[0],
2590             vdpa_config
2591         );
2592     }
2593 
2594     #[test]
2595     fn test_vmm_vm_cold_add_vsock() {
2596         let mut vmm = create_dummy_vmm();
2597         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2598 
2599         assert!(matches!(
2600             vmm.vm_add_vsock(vsock_config.clone()),
2601             Err(VmError::VmNotCreated)
2602         ));
2603 
2604         let _ = vmm.vm_create(create_dummy_vm_config());
2605         assert!(vmm
2606             .vm_config
2607             .as_ref()
2608             .unwrap()
2609             .lock()
2610             .unwrap()
2611             .vsock
2612             .is_none());
2613 
2614         let result = vmm.vm_add_vsock(vsock_config.clone());
2615         assert!(result.is_ok());
2616         assert!(result.unwrap().is_none());
2617         assert_eq!(
2618             vmm.vm_config
2619                 .as_ref()
2620                 .unwrap()
2621                 .lock()
2622                 .unwrap()
2623                 .vsock
2624                 .clone()
2625                 .unwrap(),
2626             vsock_config
2627         );
2628     }
2629 }
2630