xref: /cloud-hypervisor/vmm/src/lib.rs (revision fee769bed4c58a07b67e25a7339cfd397f701f3a)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use crate::api::{
12     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
13     VmSendMigrationData, VmmPingResponse,
14 };
15 use crate::config::{
16     add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig,
17     UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
18 };
19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
20 use crate::coredump::GuestDebuggable;
21 use crate::landlock::Landlock;
22 use crate::memory_manager::MemoryManager;
23 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
24 use crate::migration::get_vm_snapshot;
25 use crate::migration::{recv_vm_config, recv_vm_state};
26 use crate::seccomp_filters::{get_seccomp_filter, Thread};
27 use crate::vm::{Error as VmError, Vm, VmState};
28 use anyhow::anyhow;
29 #[cfg(feature = "dbus_api")]
30 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
31 use api::http::HttpApiHandle;
32 use console_devices::{pre_create_console_devices, ConsoleInfo};
33 use landlock::LandlockError;
34 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
35 use memory_manager::MemoryManagerSnapshotData;
36 use pci::PciBdf;
37 use seccompiler::{apply_filter, SeccompAction};
38 use serde::ser::{SerializeStruct, Serializer};
39 use serde::{Deserialize, Serialize};
40 use signal_hook::iterator::{Handle, Signals};
41 use std::collections::HashMap;
42 use std::fs::File;
43 use std::io;
44 use std::io::{stdout, Read, Write};
45 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
46 use std::os::unix::net::UnixListener;
47 use std::os::unix::net::UnixStream;
48 use std::panic::AssertUnwindSafe;
49 use std::path::PathBuf;
50 use std::rc::Rc;
51 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
52 use std::sync::{Arc, Mutex};
53 use std::time::Instant;
54 use std::{result, thread};
55 use thiserror::Error;
56 use tracer::trace_scoped;
57 use vm_memory::bitmap::AtomicBitmap;
58 use vm_memory::{ReadVolatile, WriteVolatile};
59 use vm_migration::{protocol::*, Migratable};
60 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
61 use vmm_sys_util::eventfd::EventFd;
62 use vmm_sys_util::signal::unblock_signal;
63 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
64 
65 mod acpi;
66 pub mod api;
67 mod clone3;
68 pub mod config;
69 pub mod console_devices;
70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
71 mod coredump;
72 pub mod cpu;
73 pub mod device_manager;
74 pub mod device_tree;
75 #[cfg(feature = "guest_debug")]
76 mod gdb;
77 #[cfg(feature = "igvm")]
78 mod igvm;
79 pub mod interrupt;
80 pub mod landlock;
81 pub mod memory_manager;
82 pub mod migration;
83 mod pci_segment;
84 pub mod seccomp_filters;
85 mod serial_manager;
86 mod sigwinch_listener;
87 pub mod vm;
88 pub mod vm_config;
89 
90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
92 
93 /// Errors associated with VMM management
94 #[derive(Debug, Error)]
95 pub enum Error {
96     /// API request receive error
97     #[error("Error receiving API request: {0}")]
98     ApiRequestRecv(#[source] RecvError),
99 
100     /// API response send error
101     #[error("Error sending API request: {0}")]
102     ApiResponseSend(#[source] SendError<ApiResponse>),
103 
104     /// Cannot bind to the UNIX domain socket path
105     #[error("Error binding to UNIX domain socket: {0}")]
106     Bind(#[source] io::Error),
107 
108     /// Cannot clone EventFd.
109     #[error("Error cloning EventFd: {0}")]
110     EventFdClone(#[source] io::Error),
111 
112     /// Cannot create EventFd.
113     #[error("Error creating EventFd: {0}")]
114     EventFdCreate(#[source] io::Error),
115 
116     /// Cannot read from EventFd.
117     #[error("Error reading from EventFd: {0}")]
118     EventFdRead(#[source] io::Error),
119 
120     /// Cannot create epoll context.
121     #[error("Error creating epoll context: {0}")]
122     Epoll(#[source] io::Error),
123 
124     /// Cannot create HTTP thread
125     #[error("Error spawning HTTP thread: {0}")]
126     HttpThreadSpawn(#[source] io::Error),
127 
128     /// Cannot create D-Bus thread
129     #[cfg(feature = "dbus_api")]
130     #[error("Error spawning D-Bus thread: {0}")]
131     DBusThreadSpawn(#[source] io::Error),
132 
133     /// Cannot start D-Bus session
134     #[cfg(feature = "dbus_api")]
135     #[error("Error starting D-Bus session: {0}")]
136     CreateDBusSession(#[source] zbus::Error),
137 
138     /// Cannot create `event-monitor` thread
139     #[error("Error spawning `event-monitor` thread: {0}")]
140     EventMonitorThreadSpawn(#[source] io::Error),
141 
142     /// Cannot handle the VM STDIN stream
143     #[error("Error handling VM stdin: {0:?}")]
144     Stdin(VmError),
145 
146     /// Cannot handle the VM pty stream
147     #[error("Error handling VM pty: {0:?}")]
148     Pty(VmError),
149 
150     /// Cannot reboot the VM
151     #[error("Error rebooting VM: {0:?}")]
152     VmReboot(VmError),
153 
154     /// Cannot create VMM thread
155     #[error("Error spawning VMM thread {0:?}")]
156     VmmThreadSpawn(#[source] io::Error),
157 
158     /// Cannot shut the VMM down
159     #[error("Error shutting down VMM: {0:?}")]
160     VmmShutdown(VmError),
161 
162     /// Cannot create seccomp filter
163     #[error("Error creating seccomp filter: {0}")]
164     CreateSeccompFilter(seccompiler::Error),
165 
166     /// Cannot apply seccomp filter
167     #[error("Error applying seccomp filter: {0}")]
168     ApplySeccompFilter(seccompiler::Error),
169 
170     /// Error activating virtio devices
171     #[error("Error activating virtio devices: {0:?}")]
172     ActivateVirtioDevices(VmError),
173 
174     /// Error creating API server
175     #[error("Error creating API server {0:?}")]
176     CreateApiServer(micro_http::ServerError),
177 
178     /// Error binding API server socket
179     #[error("Error creation API server's socket {0:?}")]
180     CreateApiServerSocket(#[source] io::Error),
181 
182     #[cfg(feature = "guest_debug")]
183     #[error("Failed to start the GDB thread: {0}")]
184     GdbThreadSpawn(io::Error),
185 
186     /// GDB request receive error
187     #[cfg(feature = "guest_debug")]
188     #[error("Error receiving GDB request: {0}")]
189     GdbRequestRecv(#[source] RecvError),
190 
191     /// GDB response send error
192     #[cfg(feature = "guest_debug")]
193     #[error("Error sending GDB request: {0}")]
194     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
195 
196     #[error("Cannot spawn a signal handler thread: {0}")]
197     SignalHandlerSpawn(#[source] io::Error),
198 
199     #[error("Failed to join on threads: {0:?}")]
200     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
201 
202     /// Cannot create Landlock object
203     #[error("Error creating landlock object: {0}")]
204     CreateLandlock(LandlockError),
205 
206     /// Cannot apply landlock based sandboxing
207     #[error("Error applying landlock: {0}")]
208     ApplyLandlock(LandlockError),
209 }
210 pub type Result<T> = result::Result<T, Error>;
211 
212 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
213 #[repr(u64)]
214 pub enum EpollDispatch {
215     Exit = 0,
216     Reset = 1,
217     Api = 2,
218     ActivateVirtioDevices = 3,
219     Debug = 4,
220     Unknown,
221 }
222 
223 impl From<u64> for EpollDispatch {
224     fn from(v: u64) -> Self {
225         use EpollDispatch::*;
226         match v {
227             0 => Exit,
228             1 => Reset,
229             2 => Api,
230             3 => ActivateVirtioDevices,
231             4 => Debug,
232             _ => Unknown,
233         }
234     }
235 }
236 
237 pub struct EpollContext {
238     epoll_file: File,
239 }
240 
241 impl EpollContext {
242     pub fn new() -> result::Result<EpollContext, io::Error> {
243         let epoll_fd = epoll::create(true)?;
244         // Use 'File' to enforce closing on 'epoll_fd'
245         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
246         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
247 
248         Ok(EpollContext { epoll_file })
249     }
250 
251     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
252     where
253         T: AsRawFd,
254     {
255         let dispatch_index = token as u64;
256         epoll::ctl(
257             self.epoll_file.as_raw_fd(),
258             epoll::ControlOptions::EPOLL_CTL_ADD,
259             fd.as_raw_fd(),
260             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
261         )?;
262 
263         Ok(())
264     }
265 
266     #[cfg(fuzzing)]
267     pub fn add_event_custom<T>(
268         &mut self,
269         fd: &T,
270         id: u64,
271         evts: epoll::Events,
272     ) -> result::Result<(), io::Error>
273     where
274         T: AsRawFd,
275     {
276         epoll::ctl(
277             self.epoll_file.as_raw_fd(),
278             epoll::ControlOptions::EPOLL_CTL_ADD,
279             fd.as_raw_fd(),
280             epoll::Event::new(evts, id),
281         )?;
282 
283         Ok(())
284     }
285 }
286 
287 impl AsRawFd for EpollContext {
288     fn as_raw_fd(&self) -> RawFd {
289         self.epoll_file.as_raw_fd()
290     }
291 }
292 
293 pub struct PciDeviceInfo {
294     pub id: String,
295     pub bdf: PciBdf,
296 }
297 
298 impl Serialize for PciDeviceInfo {
299     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
300     where
301         S: Serializer,
302     {
303         let bdf_str = self.bdf.to_string();
304 
305         // Serialize the structure.
306         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
307         state.serialize_field("id", &self.id)?;
308         state.serialize_field("bdf", &bdf_str)?;
309         state.end()
310     }
311 }
312 
313 pub fn feature_list() -> Vec<String> {
314     vec![
315         #[cfg(feature = "dbus_api")]
316         "dbus_api".to_string(),
317         #[cfg(feature = "dhat-heap")]
318         "dhat-heap".to_string(),
319         #[cfg(feature = "guest_debug")]
320         "guest_debug".to_string(),
321         #[cfg(feature = "igvm")]
322         "igvm".to_string(),
323         #[cfg(feature = "io_uring")]
324         "io_uring".to_string(),
325         #[cfg(feature = "kvm")]
326         "kvm".to_string(),
327         #[cfg(feature = "mshv")]
328         "mshv".to_string(),
329         #[cfg(feature = "sev_snp")]
330         "sev_snp".to_string(),
331         #[cfg(feature = "tdx")]
332         "tdx".to_string(),
333         #[cfg(feature = "tracing")]
334         "tracing".to_string(),
335     ]
336 }
337 
338 pub fn start_event_monitor_thread(
339     mut monitor: event_monitor::Monitor,
340     seccomp_action: &SeccompAction,
341     landlock_enable: bool,
342     hypervisor_type: hypervisor::HypervisorType,
343     exit_event: EventFd,
344 ) -> Result<thread::JoinHandle<Result<()>>> {
345     // Retrieve seccomp filter
346     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
347         .map_err(Error::CreateSeccompFilter)?;
348 
349     thread::Builder::new()
350         .name("event-monitor".to_owned())
351         .spawn(move || {
352             // Apply seccomp filter
353             if !seccomp_filter.is_empty() {
354                 apply_filter(&seccomp_filter)
355                     .map_err(Error::ApplySeccompFilter)
356                     .map_err(|e| {
357                         error!("Error applying seccomp filter: {:?}", e);
358                         exit_event.write(1).ok();
359                         e
360                     })?;
361             }
362             if landlock_enable {
363                 Landlock::new()
364                     .map_err(Error::CreateLandlock)?
365                     .restrict_self()
366                     .map_err(Error::ApplyLandlock)
367                     .map_err(|e| {
368                         error!("Error applying landlock to event monitor thread: {:?}", e);
369                         exit_event.write(1).ok();
370                         e
371                     })?;
372             }
373 
374             std::panic::catch_unwind(AssertUnwindSafe(move || {
375                 while let Ok(event) = monitor.rx.recv() {
376                     let event = Arc::new(event);
377 
378                     if let Some(ref mut file) = monitor.file {
379                         file.write_all(event.as_bytes().as_ref()).ok();
380                         file.write_all(b"\n\n").ok();
381                     }
382 
383                     for tx in monitor.broadcast.iter() {
384                         tx.send(event.clone()).ok();
385                     }
386                 }
387             }))
388             .map_err(|_| {
389                 error!("`event-monitor` thread panicked");
390                 exit_event.write(1).ok();
391             })
392             .ok();
393 
394             Ok(())
395         })
396         .map_err(Error::EventMonitorThreadSpawn)
397 }
398 
399 #[allow(unused_variables)]
400 #[allow(clippy::too_many_arguments)]
401 pub fn start_vmm_thread(
402     vmm_version: VmmVersionInfo,
403     http_path: &Option<String>,
404     http_fd: Option<RawFd>,
405     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
406     api_event: EventFd,
407     api_sender: Sender<ApiRequest>,
408     api_receiver: Receiver<ApiRequest>,
409     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
410     #[cfg(feature = "guest_debug")] debug_event: EventFd,
411     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
412     exit_event: EventFd,
413     seccomp_action: &SeccompAction,
414     hypervisor: Arc<dyn hypervisor::Hypervisor>,
415     landlock_enable: bool,
416 ) -> Result<VmmThreadHandle> {
417     #[cfg(feature = "guest_debug")]
418     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
419     #[cfg(feature = "guest_debug")]
420     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
421     #[cfg(feature = "guest_debug")]
422     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
423     #[cfg(feature = "guest_debug")]
424     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
425 
426     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
427     let hypervisor_type = hypervisor.hypervisor_type();
428 
429     // Retrieve seccomp filter
430     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
431         .map_err(Error::CreateSeccompFilter)?;
432 
433     let vmm_seccomp_action = seccomp_action.clone();
434     let thread = {
435         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
436         thread::Builder::new()
437             .name("vmm".to_string())
438             .spawn(move || {
439                 // Apply seccomp filter for VMM thread.
440                 if !vmm_seccomp_filter.is_empty() {
441                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
442                 }
443 
444                 let mut vmm = Vmm::new(
445                     vmm_version,
446                     api_event,
447                     #[cfg(feature = "guest_debug")]
448                     debug_event,
449                     #[cfg(feature = "guest_debug")]
450                     vm_debug_event,
451                     vmm_seccomp_action,
452                     hypervisor,
453                     exit_event,
454                 )?;
455 
456                 vmm.setup_signal_handler(landlock_enable)?;
457 
458                 vmm.control_loop(
459                     Rc::new(api_receiver),
460                     #[cfg(feature = "guest_debug")]
461                     Rc::new(gdb_receiver),
462                 )
463             })
464             .map_err(Error::VmmThreadSpawn)?
465     };
466 
467     // The VMM thread is started, we can start the dbus thread
468     // and start serving HTTP requests
469     #[cfg(feature = "dbus_api")]
470     let dbus_shutdown_chs = match dbus_options {
471         Some(opts) => {
472             let (_, chs) = api::start_dbus_thread(
473                 opts,
474                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
475                 api_sender.clone(),
476                 seccomp_action,
477                 exit_event.try_clone().map_err(Error::EventFdClone)?,
478                 hypervisor_type,
479             )?;
480             Some(chs)
481         }
482         None => None,
483     };
484 
485     let http_api_handle = if let Some(http_path) = http_path {
486         Some(api::start_http_path_thread(
487             http_path,
488             api_event_clone,
489             api_sender,
490             seccomp_action,
491             exit_event,
492             hypervisor_type,
493             landlock_enable,
494         )?)
495     } else if let Some(http_fd) = http_fd {
496         Some(api::start_http_fd_thread(
497             http_fd,
498             api_event_clone,
499             api_sender,
500             seccomp_action,
501             exit_event,
502             hypervisor_type,
503             landlock_enable,
504         )?)
505     } else {
506         None
507     };
508 
509     #[cfg(feature = "guest_debug")]
510     if let Some(debug_path) = debug_path {
511         let target = gdb::GdbStub::new(
512             gdb_sender,
513             gdb_debug_event,
514             gdb_vm_debug_event,
515             gdb_hw_breakpoints,
516         );
517         thread::Builder::new()
518             .name("gdb".to_owned())
519             .spawn(move || gdb::gdb_thread(target, &debug_path))
520             .map_err(Error::GdbThreadSpawn)?;
521     }
522 
523     Ok(VmmThreadHandle {
524         thread_handle: thread,
525         #[cfg(feature = "dbus_api")]
526         dbus_shutdown_chs,
527         http_api_handle,
528     })
529 }
530 
531 #[derive(Clone, Deserialize, Serialize)]
532 struct VmMigrationConfig {
533     vm_config: Arc<Mutex<VmConfig>>,
534     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
535     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
536     memory_manager_data: MemoryManagerSnapshotData,
537 }
538 
539 #[derive(Debug, Clone)]
540 pub struct VmmVersionInfo {
541     pub build_version: String,
542     pub version: String,
543 }
544 
545 impl VmmVersionInfo {
546     pub fn new(build_version: &str, version: &str) -> Self {
547         Self {
548             build_version: build_version.to_owned(),
549             version: version.to_owned(),
550         }
551     }
552 }
553 
554 pub struct VmmThreadHandle {
555     pub thread_handle: thread::JoinHandle<Result<()>>,
556     #[cfg(feature = "dbus_api")]
557     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
558     pub http_api_handle: Option<HttpApiHandle>,
559 }
560 
561 pub struct Vmm {
562     epoll: EpollContext,
563     exit_evt: EventFd,
564     reset_evt: EventFd,
565     api_evt: EventFd,
566     #[cfg(feature = "guest_debug")]
567     debug_evt: EventFd,
568     #[cfg(feature = "guest_debug")]
569     vm_debug_evt: EventFd,
570     version: VmmVersionInfo,
571     vm: Option<Vm>,
572     vm_config: Option<Arc<Mutex<VmConfig>>>,
573     seccomp_action: SeccompAction,
574     hypervisor: Arc<dyn hypervisor::Hypervisor>,
575     activate_evt: EventFd,
576     signals: Option<Handle>,
577     threads: Vec<thread::JoinHandle<()>>,
578     original_termios_opt: Arc<Mutex<Option<termios>>>,
579     console_resize_pipe: Option<File>,
580     console_info: Option<ConsoleInfo>,
581 }
582 
583 impl Vmm {
584     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
585 
586     fn signal_handler(
587         mut signals: Signals,
588         original_termios_opt: Arc<Mutex<Option<termios>>>,
589         exit_evt: &EventFd,
590     ) {
591         for sig in &Self::HANDLED_SIGNALS {
592             unblock_signal(*sig).unwrap();
593         }
594 
595         for signal in signals.forever() {
596             match signal {
597                 SIGTERM | SIGINT => {
598                     if exit_evt.write(1).is_err() {
599                         // Resetting the terminal is usually done as the VMM exits
600                         if let Ok(lock) = original_termios_opt.lock() {
601                             if let Some(termios) = *lock {
602                                 // SAFETY: FFI call
603                                 let _ = unsafe {
604                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
605                                 };
606                             }
607                         } else {
608                             warn!("Failed to lock original termios");
609                         }
610 
611                         std::process::exit(1);
612                     }
613                 }
614                 _ => (),
615             }
616         }
617     }
618 
619     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
620         let signals = Signals::new(Self::HANDLED_SIGNALS);
621         match signals {
622             Ok(signals) => {
623                 self.signals = Some(signals.handle());
624                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
625                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
626 
627                 let signal_handler_seccomp_filter = get_seccomp_filter(
628                     &self.seccomp_action,
629                     Thread::SignalHandler,
630                     self.hypervisor.hypervisor_type(),
631                 )
632                 .map_err(Error::CreateSeccompFilter)?;
633                 self.threads.push(
634                     thread::Builder::new()
635                         .name("vmm_signal_handler".to_string())
636                         .spawn(move || {
637                             if !signal_handler_seccomp_filter.is_empty() {
638                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
639                                     .map_err(Error::ApplySeccompFilter)
640                                 {
641                                     error!("Error applying seccomp filter: {:?}", e);
642                                     exit_evt.write(1).ok();
643                                     return;
644                                 }
645                             }
646                             if landlock_enable{
647                                 match Landlock::new() {
648                                     Ok(landlock) => {
649                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
650                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
651                                             exit_evt.write(1).ok();
652                                         });
653                                     }
654                                     Err(e) => {
655                                         error!("Error creating Landlock object: {:?}", e);
656                                         exit_evt.write(1).ok();
657                                     }
658                                 };
659                             }
660 
661                             std::panic::catch_unwind(AssertUnwindSafe(|| {
662                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
663                             }))
664                             .map_err(|_| {
665                                 error!("vmm signal_handler thread panicked");
666                                 exit_evt.write(1).ok()
667                             })
668                             .ok();
669                         })
670                         .map_err(Error::SignalHandlerSpawn)?,
671                 );
672             }
673             Err(e) => error!("Signal not found {}", e),
674         }
675         Ok(())
676     }
677 
678     #[allow(clippy::too_many_arguments)]
679     fn new(
680         vmm_version: VmmVersionInfo,
681         api_evt: EventFd,
682         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
683         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
684         seccomp_action: SeccompAction,
685         hypervisor: Arc<dyn hypervisor::Hypervisor>,
686         exit_evt: EventFd,
687     ) -> Result<Self> {
688         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
689         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
690         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691 
692         epoll
693             .add_event(&exit_evt, EpollDispatch::Exit)
694             .map_err(Error::Epoll)?;
695 
696         epoll
697             .add_event(&reset_evt, EpollDispatch::Reset)
698             .map_err(Error::Epoll)?;
699 
700         epoll
701             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
702             .map_err(Error::Epoll)?;
703 
704         epoll
705             .add_event(&api_evt, EpollDispatch::Api)
706             .map_err(Error::Epoll)?;
707 
708         #[cfg(feature = "guest_debug")]
709         epoll
710             .add_event(&debug_evt, EpollDispatch::Debug)
711             .map_err(Error::Epoll)?;
712 
713         Ok(Vmm {
714             epoll,
715             exit_evt,
716             reset_evt,
717             api_evt,
718             #[cfg(feature = "guest_debug")]
719             debug_evt,
720             #[cfg(feature = "guest_debug")]
721             vm_debug_evt,
722             version: vmm_version,
723             vm: None,
724             vm_config: None,
725             seccomp_action,
726             hypervisor,
727             activate_evt,
728             signals: None,
729             threads: vec![],
730             original_termios_opt: Arc::new(Mutex::new(None)),
731             console_resize_pipe: None,
732             console_info: None,
733         })
734     }
735 
736     fn vm_receive_config<T>(
737         &mut self,
738         req: &Request,
739         socket: &mut T,
740         existing_memory_files: Option<HashMap<u32, File>>,
741     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
742     where
743         T: Read + Write,
744     {
745         // Read in config data along with memory manager data
746         let mut data: Vec<u8> = Vec::new();
747         data.resize_with(req.length() as usize, Default::default);
748         socket
749             .read_exact(&mut data)
750             .map_err(MigratableError::MigrateSocket)?;
751 
752         let vm_migration_config: VmMigrationConfig =
753             serde_json::from_slice(&data).map_err(|e| {
754                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
755             })?;
756 
757         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
758         self.vm_check_cpuid_compatibility(
759             &vm_migration_config.vm_config,
760             &vm_migration_config.common_cpuid,
761         )?;
762 
763         let config = vm_migration_config.vm_config.clone();
764         self.vm_config = Some(vm_migration_config.vm_config);
765         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
766             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
767         })?);
768 
769         if self
770             .vm_config
771             .as_ref()
772             .unwrap()
773             .lock()
774             .unwrap()
775             .landlock_enable
776         {
777             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
778                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
779             })?;
780         }
781 
782         let vm = Vm::create_hypervisor_vm(
783             &self.hypervisor,
784             #[cfg(feature = "tdx")]
785             false,
786             #[cfg(feature = "sev_snp")]
787             false,
788         )
789         .map_err(|e| {
790             MigratableError::MigrateReceive(anyhow!(
791                 "Error creating hypervisor VM from snapshot: {:?}",
792                 e
793             ))
794         })?;
795 
796         let phys_bits =
797             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
798 
799         let memory_manager = MemoryManager::new(
800             vm,
801             &config.lock().unwrap().memory.clone(),
802             None,
803             phys_bits,
804             #[cfg(feature = "tdx")]
805             false,
806             Some(&vm_migration_config.memory_manager_data),
807             existing_memory_files,
808             #[cfg(target_arch = "x86_64")]
809             None,
810         )
811         .map_err(|e| {
812             MigratableError::MigrateReceive(anyhow!(
813                 "Error creating MemoryManager from snapshot: {:?}",
814                 e
815             ))
816         })?;
817 
818         Response::ok().write_to(socket)?;
819 
820         Ok(memory_manager)
821     }
822 
823     fn vm_receive_state<T>(
824         &mut self,
825         req: &Request,
826         socket: &mut T,
827         mm: Arc<Mutex<MemoryManager>>,
828     ) -> std::result::Result<(), MigratableError>
829     where
830         T: Read + Write,
831     {
832         // Read in state data
833         let mut data: Vec<u8> = Vec::new();
834         data.resize_with(req.length() as usize, Default::default);
835         socket
836             .read_exact(&mut data)
837             .map_err(MigratableError::MigrateSocket)?;
838         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
839             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
840         })?;
841 
842         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
843             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
844         })?;
845         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
846             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
847         })?;
848         #[cfg(feature = "guest_debug")]
849         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
850             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
851         })?;
852         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
853             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
854         })?;
855 
856         let timestamp = Instant::now();
857         let hypervisor_vm = mm.lock().unwrap().vm.clone();
858         let mut vm = Vm::new_from_memory_manager(
859             self.vm_config.clone().unwrap(),
860             mm,
861             hypervisor_vm,
862             exit_evt,
863             reset_evt,
864             #[cfg(feature = "guest_debug")]
865             debug_evt,
866             &self.seccomp_action,
867             self.hypervisor.clone(),
868             activate_evt,
869             timestamp,
870             self.console_info.clone(),
871             None,
872             Arc::clone(&self.original_termios_opt),
873             Some(snapshot),
874         )
875         .map_err(|e| {
876             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
877         })?;
878 
879         // Create VM
880         vm.restore().map_err(|e| {
881             Response::error().write_to(socket).ok();
882             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
883         })?;
884         self.vm = Some(vm);
885 
886         Response::ok().write_to(socket)?;
887 
888         Ok(())
889     }
890 
891     fn vm_receive_memory<T>(
892         &mut self,
893         req: &Request,
894         socket: &mut T,
895         memory_manager: &mut MemoryManager,
896     ) -> std::result::Result<(), MigratableError>
897     where
898         T: Read + ReadVolatile + Write,
899     {
900         // Read table
901         let table = MemoryRangeTable::read_from(socket, req.length())?;
902 
903         // And then read the memory itself
904         memory_manager
905             .receive_memory_regions(&table, socket)
906             .inspect_err(|_| {
907                 Response::error().write_to(socket).ok();
908             })?;
909         Response::ok().write_to(socket)?;
910         Ok(())
911     }
912 
913     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
914         url.strip_prefix("unix:")
915             .ok_or_else(|| {
916                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
917             })
918             .map(|s| s.into())
919     }
920 
921     // Returns true if there were dirty pages to send
922     fn vm_maybe_send_dirty_pages<T>(
923         vm: &mut Vm,
924         socket: &mut T,
925     ) -> result::Result<bool, MigratableError>
926     where
927         T: Read + Write + WriteVolatile,
928     {
929         // Send (dirty) memory table
930         let table = vm.dirty_log()?;
931 
932         // But if there are no regions go straight to pause
933         if table.regions().is_empty() {
934             return Ok(false);
935         }
936 
937         Request::memory(table.length()).write_to(socket).unwrap();
938         table.write_to(socket)?;
939         // And then the memory itself
940         vm.send_memory_regions(&table, socket)?;
941         Response::read_from(socket)?.ok_or_abandon(
942             socket,
943             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
944         )?;
945 
946         Ok(true)
947     }
948 
949     fn send_migration(
950         vm: &mut Vm,
951         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
952             dyn hypervisor::Hypervisor,
953         >,
954         send_data_migration: VmSendMigrationData,
955     ) -> result::Result<(), MigratableError> {
956         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
957         let mut socket = UnixStream::connect(path).map_err(|e| {
958             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
959         })?;
960 
961         // Start the migration
962         Request::start().write_to(&mut socket)?;
963         Response::read_from(&mut socket)?.ok_or_abandon(
964             &mut socket,
965             MigratableError::MigrateSend(anyhow!("Error starting migration")),
966         )?;
967 
968         // Send config
969         let vm_config = vm.get_config();
970         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
971         let common_cpuid = {
972             #[cfg(feature = "tdx")]
973             if vm_config.lock().unwrap().is_tdx_enabled() {
974                 return Err(MigratableError::MigrateSend(anyhow!(
975                     "Live Migration is not supported when TDX is enabled"
976                 )));
977             };
978 
979             let amx = vm_config.lock().unwrap().cpus.features.amx;
980             let phys_bits =
981                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
982             arch::generate_common_cpuid(
983                 &hypervisor,
984                 &arch::CpuidConfig {
985                     sgx_epc_sections: None,
986                     phys_bits,
987                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
988                     #[cfg(feature = "tdx")]
989                     tdx: false,
990                     amx,
991                 },
992             )
993             .map_err(|e| {
994                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
995             })?
996         };
997 
998         if send_data_migration.local {
999             vm.send_memory_fds(&mut socket)?;
1000         }
1001 
1002         let vm_migration_config = VmMigrationConfig {
1003             vm_config,
1004             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1005             common_cpuid,
1006             memory_manager_data: vm.memory_manager_data(),
1007         };
1008         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1009         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1010         socket
1011             .write_all(&config_data)
1012             .map_err(MigratableError::MigrateSocket)?;
1013         Response::read_from(&mut socket)?.ok_or_abandon(
1014             &mut socket,
1015             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1016         )?;
1017 
1018         // Let every Migratable object know about the migration being started.
1019         vm.start_migration()?;
1020 
1021         if send_data_migration.local {
1022             // Now pause VM
1023             vm.pause()?;
1024         } else {
1025             // Start logging dirty pages
1026             vm.start_dirty_log()?;
1027 
1028             // Send memory table
1029             let table = vm.memory_range_table()?;
1030             Request::memory(table.length())
1031                 .write_to(&mut socket)
1032                 .unwrap();
1033             table.write_to(&mut socket)?;
1034             // And then the memory itself
1035             vm.send_memory_regions(&table, &mut socket)?;
1036             Response::read_from(&mut socket)?.ok_or_abandon(
1037                 &mut socket,
1038                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1039             )?;
1040 
1041             // Try at most 5 passes of dirty memory sending
1042             const MAX_DIRTY_MIGRATIONS: usize = 5;
1043             for i in 0..MAX_DIRTY_MIGRATIONS {
1044                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1045                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1046                     break;
1047                 }
1048             }
1049 
1050             // Now pause VM
1051             vm.pause()?;
1052 
1053             // Send last batch of dirty pages
1054             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1055 
1056             // Stop logging dirty pages
1057             vm.stop_dirty_log()?;
1058         }
1059         // Capture snapshot and send it
1060         let vm_snapshot = vm.snapshot()?;
1061         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1062         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1063         socket
1064             .write_all(&snapshot_data)
1065             .map_err(MigratableError::MigrateSocket)?;
1066         Response::read_from(&mut socket)?.ok_or_abandon(
1067             &mut socket,
1068             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1069         )?;
1070         // Complete the migration
1071         Request::complete().write_to(&mut socket)?;
1072         Response::read_from(&mut socket)?.ok_or_abandon(
1073             &mut socket,
1074             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1075         )?;
1076 
1077         info!("Migration complete");
1078 
1079         // Let every Migratable object know about the migration being complete
1080         vm.complete_migration()
1081     }
1082 
1083     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1084     fn vm_check_cpuid_compatibility(
1085         &self,
1086         src_vm_config: &Arc<Mutex<VmConfig>>,
1087         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1088     ) -> result::Result<(), MigratableError> {
1089         #[cfg(feature = "tdx")]
1090         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1091             return Err(MigratableError::MigrateReceive(anyhow!(
1092                 "Live Migration is not supported when TDX is enabled"
1093             )));
1094         };
1095 
1096         // We check the `CPUID` compatibility of between the source vm and destination, which is
1097         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1098         let dest_cpuid = &{
1099             let vm_config = &src_vm_config.lock().unwrap();
1100 
1101             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1102             arch::generate_common_cpuid(
1103                 &self.hypervisor.clone(),
1104                 &arch::CpuidConfig {
1105                     sgx_epc_sections: None,
1106                     phys_bits,
1107                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1108                     #[cfg(feature = "tdx")]
1109                     tdx: false,
1110                     amx: vm_config.cpus.features.amx,
1111                 },
1112             )
1113             .map_err(|e| {
1114                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1115             })?
1116         };
1117         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1118             MigratableError::MigrateReceive(anyhow!(
1119                 "Error checking cpu feature compatibility': {:?}",
1120                 e
1121             ))
1122         })
1123     }
1124 
1125     fn control_loop(
1126         &mut self,
1127         api_receiver: Rc<Receiver<ApiRequest>>,
1128         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1129     ) -> Result<()> {
1130         const EPOLL_EVENTS_LEN: usize = 100;
1131 
1132         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1133         let epoll_fd = self.epoll.as_raw_fd();
1134 
1135         'outer: loop {
1136             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1137                 Ok(res) => res,
1138                 Err(e) => {
1139                     if e.kind() == io::ErrorKind::Interrupted {
1140                         // It's well defined from the epoll_wait() syscall
1141                         // documentation that the epoll loop can be interrupted
1142                         // before any of the requested events occurred or the
1143                         // timeout expired. In both those cases, epoll_wait()
1144                         // returns an error of type EINTR, but this should not
1145                         // be considered as a regular error. Instead it is more
1146                         // appropriate to retry, by calling into epoll_wait().
1147                         continue;
1148                     }
1149                     return Err(Error::Epoll(e));
1150                 }
1151             };
1152 
1153             for event in events.iter().take(num_events) {
1154                 let dispatch_event: EpollDispatch = event.data.into();
1155                 match dispatch_event {
1156                     EpollDispatch::Unknown => {
1157                         let event = event.data;
1158                         warn!("Unknown VMM loop event: {}", event);
1159                     }
1160                     EpollDispatch::Exit => {
1161                         info!("VM exit event");
1162                         // Consume the event.
1163                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1164                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1165 
1166                         break 'outer;
1167                     }
1168                     EpollDispatch::Reset => {
1169                         info!("VM reset event");
1170                         // Consume the event.
1171                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1172                         self.vm_reboot().map_err(Error::VmReboot)?;
1173                     }
1174                     EpollDispatch::ActivateVirtioDevices => {
1175                         if let Some(ref vm) = self.vm {
1176                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1177                             info!(
1178                                 "Trying to activate pending virtio devices: count = {}",
1179                                 count
1180                             );
1181                             vm.activate_virtio_devices()
1182                                 .map_err(Error::ActivateVirtioDevices)?;
1183                         }
1184                     }
1185                     EpollDispatch::Api => {
1186                         // Consume the events.
1187                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1188                             // Read from the API receiver channel
1189                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1190 
1191                             if api_request(self)? {
1192                                 break 'outer;
1193                             }
1194                         }
1195                     }
1196                     #[cfg(feature = "guest_debug")]
1197                     EpollDispatch::Debug => {
1198                         // Consume the events.
1199                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1200                             // Read from the API receiver channel
1201                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1202 
1203                             let response = if let Some(ref mut vm) = self.vm {
1204                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1205                             } else {
1206                                 Err(VmError::VmNotRunning)
1207                             }
1208                             .map_err(gdb::Error::Vm);
1209 
1210                             gdb_request
1211                                 .sender
1212                                 .send(response)
1213                                 .map_err(Error::GdbResponseSend)?;
1214                         }
1215                     }
1216                     #[cfg(not(feature = "guest_debug"))]
1217                     EpollDispatch::Debug => {}
1218                 }
1219             }
1220         }
1221 
1222         // Trigger the termination of the signal_handler thread
1223         if let Some(signals) = self.signals.take() {
1224             signals.close();
1225         }
1226 
1227         // Wait for all the threads to finish
1228         for thread in self.threads.drain(..) {
1229             thread.join().map_err(Error::ThreadCleanup)?
1230         }
1231 
1232         Ok(())
1233     }
1234 }
1235 
1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1237     vm_config.lock().unwrap().apply_landlock()?;
1238     Ok(())
1239 }
1240 
1241 impl RequestHandler for Vmm {
1242     fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> {
1243         // We only store the passed VM config.
1244         // The VM will be created when being asked to boot it.
1245         if self.vm_config.is_none() {
1246             self.vm_config = Some(config);
1247             self.console_info =
1248                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1249 
1250             if self
1251                 .vm_config
1252                 .as_ref()
1253                 .unwrap()
1254                 .lock()
1255                 .unwrap()
1256                 .landlock_enable
1257             {
1258                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1259                     .map_err(VmError::ApplyLandlock)?;
1260             }
1261             Ok(())
1262         } else {
1263             Err(VmError::VmAlreadyCreated)
1264         }
1265     }
1266 
1267     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1268         tracer::start();
1269         info!("Booting VM");
1270         event!("vm", "booting");
1271         let r = {
1272             trace_scoped!("vm_boot");
1273             // If we don't have a config, we cannot boot a VM.
1274             if self.vm_config.is_none() {
1275                 return Err(VmError::VmMissingConfig);
1276             };
1277 
1278             // console_info is set to None in vm_shutdown. re-populate here if empty
1279             if self.console_info.is_none() {
1280                 self.console_info =
1281                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1282             }
1283 
1284             // Create a new VM if we don't have one yet.
1285             if self.vm.is_none() {
1286                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1287                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1288                 #[cfg(feature = "guest_debug")]
1289                 let vm_debug_evt = self
1290                     .vm_debug_evt
1291                     .try_clone()
1292                     .map_err(VmError::EventFdClone)?;
1293                 let activate_evt = self
1294                     .activate_evt
1295                     .try_clone()
1296                     .map_err(VmError::EventFdClone)?;
1297 
1298                 if let Some(ref vm_config) = self.vm_config {
1299                     let vm = Vm::new(
1300                         Arc::clone(vm_config),
1301                         exit_evt,
1302                         reset_evt,
1303                         #[cfg(feature = "guest_debug")]
1304                         vm_debug_evt,
1305                         &self.seccomp_action,
1306                         self.hypervisor.clone(),
1307                         activate_evt,
1308                         self.console_info.clone(),
1309                         None,
1310                         Arc::clone(&self.original_termios_opt),
1311                         None,
1312                         None,
1313                         None,
1314                     )?;
1315 
1316                     self.vm = Some(vm);
1317                 }
1318             }
1319 
1320             // Now we can boot the VM.
1321             if let Some(ref mut vm) = self.vm {
1322                 vm.boot()
1323             } else {
1324                 Err(VmError::VmNotCreated)
1325             }
1326         };
1327         tracer::end();
1328         if r.is_ok() {
1329             event!("vm", "booted");
1330         }
1331         r
1332     }
1333 
1334     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1335         if let Some(ref mut vm) = self.vm {
1336             vm.pause().map_err(VmError::Pause)
1337         } else {
1338             Err(VmError::VmNotRunning)
1339         }
1340     }
1341 
1342     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1343         if let Some(ref mut vm) = self.vm {
1344             vm.resume().map_err(VmError::Resume)
1345         } else {
1346             Err(VmError::VmNotRunning)
1347         }
1348     }
1349 
1350     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1351         if let Some(ref mut vm) = self.vm {
1352             // Drain console_info so that FDs are not reused
1353             let _ = self.console_info.take();
1354             vm.snapshot()
1355                 .map_err(VmError::Snapshot)
1356                 .and_then(|snapshot| {
1357                     vm.send(&snapshot, destination_url)
1358                         .map_err(VmError::SnapshotSend)
1359                 })
1360         } else {
1361             Err(VmError::VmNotRunning)
1362         }
1363     }
1364 
1365     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1366         if self.vm.is_some() || self.vm_config.is_some() {
1367             return Err(VmError::VmAlreadyCreated);
1368         }
1369 
1370         let source_url = restore_cfg.source_url.as_path().to_str();
1371         if source_url.is_none() {
1372             return Err(VmError::InvalidRestoreSourceUrl);
1373         }
1374         // Safe to unwrap as we checked it was Some(&str).
1375         let source_url = source_url.unwrap();
1376 
1377         let vm_config = Arc::new(Mutex::new(
1378             recv_vm_config(source_url).map_err(VmError::Restore)?,
1379         ));
1380         restore_cfg
1381             .validate(&vm_config.lock().unwrap().clone())
1382             .map_err(VmError::ConfigValidation)?;
1383 
1384         // Update VM's net configurations with new fds received for restore operation
1385         if let (Some(restored_nets), Some(vm_net_configs)) =
1386             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1387         {
1388             for net in restored_nets.iter() {
1389                 for net_config in vm_net_configs.iter_mut() {
1390                     // update only if the net dev is backed by FDs
1391                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1392                         net_config.fds.clone_from(&net.fds);
1393                     }
1394                 }
1395             }
1396         }
1397 
1398         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1399         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1400         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1401 
1402         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1403         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1404             .map_err(VmError::Restore)?;
1405 
1406         self.vm_config = Some(Arc::clone(&vm_config));
1407 
1408         // console_info is set to None in vm_snapshot. re-populate here if empty
1409         if self.console_info.is_none() {
1410             self.console_info =
1411                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1412         }
1413 
1414         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1415         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1416         #[cfg(feature = "guest_debug")]
1417         let debug_evt = self
1418             .vm_debug_evt
1419             .try_clone()
1420             .map_err(VmError::EventFdClone)?;
1421         let activate_evt = self
1422             .activate_evt
1423             .try_clone()
1424             .map_err(VmError::EventFdClone)?;
1425 
1426         let vm = Vm::new(
1427             vm_config,
1428             exit_evt,
1429             reset_evt,
1430             #[cfg(feature = "guest_debug")]
1431             debug_evt,
1432             &self.seccomp_action,
1433             self.hypervisor.clone(),
1434             activate_evt,
1435             self.console_info.clone(),
1436             None,
1437             Arc::clone(&self.original_termios_opt),
1438             Some(snapshot),
1439             Some(source_url),
1440             Some(restore_cfg.prefault),
1441         )?;
1442         self.vm = Some(vm);
1443 
1444         if self
1445             .vm_config
1446             .as_ref()
1447             .unwrap()
1448             .lock()
1449             .unwrap()
1450             .landlock_enable
1451         {
1452             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1453                 .map_err(VmError::ApplyLandlock)?;
1454         }
1455 
1456         // Now we can restore the rest of the VM.
1457         if let Some(ref mut vm) = self.vm {
1458             vm.restore()
1459         } else {
1460             Err(VmError::VmNotCreated)
1461         }
1462     }
1463 
1464     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1465     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1466         if let Some(ref mut vm) = self.vm {
1467             vm.coredump(destination_url).map_err(VmError::Coredump)
1468         } else {
1469             Err(VmError::VmNotRunning)
1470         }
1471     }
1472 
1473     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1474         let r = if let Some(ref mut vm) = self.vm.take() {
1475             // Drain console_info so that the FDs are not reused
1476             let _ = self.console_info.take();
1477             vm.shutdown()
1478         } else {
1479             Err(VmError::VmNotRunning)
1480         };
1481 
1482         if r.is_ok() {
1483             event!("vm", "shutdown");
1484         }
1485 
1486         r
1487     }
1488 
1489     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1490         event!("vm", "rebooting");
1491 
1492         // First we stop the current VM
1493         let (config, console_resize_pipe) = if let Some(mut vm) = self.vm.take() {
1494             let config = vm.get_config();
1495             let console_resize_pipe = vm
1496                 .console_resize_pipe()
1497                 .as_ref()
1498                 .map(|pipe| pipe.try_clone().unwrap());
1499             vm.shutdown()?;
1500             (config, console_resize_pipe)
1501         } else {
1502             return Err(VmError::VmNotCreated);
1503         };
1504 
1505         // vm.shutdown() closes all the console devices, so set console_info to None
1506         // so that the closed FD #s are not reused.
1507         let _ = self.console_info.take();
1508 
1509         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1510         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1511         #[cfg(feature = "guest_debug")]
1512         let debug_evt = self
1513             .vm_debug_evt
1514             .try_clone()
1515             .map_err(VmError::EventFdClone)?;
1516         let activate_evt = self
1517             .activate_evt
1518             .try_clone()
1519             .map_err(VmError::EventFdClone)?;
1520 
1521         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1522         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1523         // during the boot process.
1524         if self.reset_evt.read().is_ok() {
1525             warn!("Spurious second reset event received. Ignoring.");
1526         }
1527 
1528         self.console_info =
1529             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1530 
1531         // Then we create the new VM
1532         let mut vm = Vm::new(
1533             config,
1534             exit_evt,
1535             reset_evt,
1536             #[cfg(feature = "guest_debug")]
1537             debug_evt,
1538             &self.seccomp_action,
1539             self.hypervisor.clone(),
1540             activate_evt,
1541             self.console_info.clone(),
1542             console_resize_pipe,
1543             Arc::clone(&self.original_termios_opt),
1544             None,
1545             None,
1546             None,
1547         )?;
1548 
1549         // And we boot it
1550         vm.boot()?;
1551 
1552         self.vm = Some(vm);
1553 
1554         event!("vm", "rebooted");
1555 
1556         Ok(())
1557     }
1558 
1559     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1560         match &self.vm_config {
1561             Some(config) => {
1562                 let state = match &self.vm {
1563                     Some(vm) => vm.get_state()?,
1564                     None => VmState::Created,
1565                 };
1566 
1567                 let config = Arc::clone(config);
1568 
1569                 let mut memory_actual_size = config.lock().unwrap().memory.total_size();
1570                 if let Some(vm) = &self.vm {
1571                     memory_actual_size -= vm.balloon_size();
1572                 }
1573 
1574                 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree());
1575 
1576                 Ok(VmInfoResponse {
1577                     config,
1578                     state,
1579                     memory_actual_size,
1580                     device_tree,
1581                 })
1582             }
1583             None => Err(VmError::VmNotCreated),
1584         }
1585     }
1586 
1587     fn vmm_ping(&self) -> VmmPingResponse {
1588         let VmmVersionInfo {
1589             build_version,
1590             version,
1591         } = self.version.clone();
1592 
1593         VmmPingResponse {
1594             build_version,
1595             version,
1596             pid: std::process::id() as i64,
1597             features: feature_list(),
1598         }
1599     }
1600 
1601     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1602         if self.vm_config.is_none() {
1603             return Ok(());
1604         }
1605 
1606         // If a VM is booted, we first try to shut it down.
1607         if self.vm.is_some() {
1608             self.vm_shutdown()?;
1609         }
1610 
1611         self.vm_config = None;
1612 
1613         event!("vm", "deleted");
1614 
1615         Ok(())
1616     }
1617 
1618     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1619         self.vm_delete()?;
1620         event!("vmm", "shutdown");
1621         Ok(())
1622     }
1623 
1624     fn vm_resize(
1625         &mut self,
1626         desired_vcpus: Option<u8>,
1627         desired_ram: Option<u64>,
1628         desired_balloon: Option<u64>,
1629     ) -> result::Result<(), VmError> {
1630         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1631 
1632         if let Some(ref mut vm) = self.vm {
1633             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1634                 error!("Error when resizing VM: {:?}", e);
1635                 Err(e)
1636             } else {
1637                 Ok(())
1638             }
1639         } else {
1640             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1641             if let Some(desired_vcpus) = desired_vcpus {
1642                 config.cpus.boot_vcpus = desired_vcpus;
1643             }
1644             if let Some(desired_ram) = desired_ram {
1645                 config.memory.size = desired_ram;
1646             }
1647             if let Some(desired_balloon) = desired_balloon {
1648                 if let Some(balloon_config) = &mut config.balloon {
1649                     balloon_config.size = desired_balloon;
1650                 }
1651             }
1652             Ok(())
1653         }
1654     }
1655 
1656     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1657         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1658 
1659         if let Some(ref mut vm) = self.vm {
1660             if let Err(e) = vm.resize_zone(id, desired_ram) {
1661                 error!("Error when resizing VM: {:?}", e);
1662                 Err(e)
1663             } else {
1664                 Ok(())
1665             }
1666         } else {
1667             // Update VmConfig by setting the new desired ram.
1668             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1669 
1670             if let Some(zones) = &mut memory_config.zones {
1671                 for zone in zones.iter_mut() {
1672                     if zone.id == id {
1673                         zone.size = desired_ram;
1674                         return Ok(());
1675                     }
1676                 }
1677             }
1678 
1679             error!("Could not find the memory zone {} for the resize", id);
1680             Err(VmError::ResizeZone)
1681         }
1682     }
1683 
1684     fn vm_add_device(
1685         &mut self,
1686         device_cfg: DeviceConfig,
1687     ) -> result::Result<Option<Vec<u8>>, VmError> {
1688         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1689 
1690         {
1691             // Validate the configuration change in a cloned configuration
1692             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1693             add_to_config(&mut config.devices, device_cfg.clone());
1694             config.validate().map_err(VmError::ConfigValidation)?;
1695         }
1696 
1697         if let Some(ref mut vm) = self.vm {
1698             let info = vm.add_device(device_cfg).map_err(|e| {
1699                 error!("Error when adding new device to the VM: {:?}", e);
1700                 e
1701             })?;
1702             serde_json::to_vec(&info)
1703                 .map(Some)
1704                 .map_err(VmError::SerializeJson)
1705         } else {
1706             // Update VmConfig by adding the new device.
1707             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1708             add_to_config(&mut config.devices, device_cfg);
1709             Ok(None)
1710         }
1711     }
1712 
1713     fn vm_add_user_device(
1714         &mut self,
1715         device_cfg: UserDeviceConfig,
1716     ) -> result::Result<Option<Vec<u8>>, VmError> {
1717         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1718 
1719         {
1720             // Validate the configuration change in a cloned configuration
1721             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1722             add_to_config(&mut config.user_devices, device_cfg.clone());
1723             config.validate().map_err(VmError::ConfigValidation)?;
1724         }
1725 
1726         if let Some(ref mut vm) = self.vm {
1727             let info = vm.add_user_device(device_cfg).map_err(|e| {
1728                 error!("Error when adding new user device to the VM: {:?}", e);
1729                 e
1730             })?;
1731             serde_json::to_vec(&info)
1732                 .map(Some)
1733                 .map_err(VmError::SerializeJson)
1734         } else {
1735             // Update VmConfig by adding the new device.
1736             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1737             add_to_config(&mut config.user_devices, device_cfg);
1738             Ok(None)
1739         }
1740     }
1741 
1742     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1743         if let Some(ref mut vm) = self.vm {
1744             if let Err(e) = vm.remove_device(id) {
1745                 error!("Error when removing device from the VM: {:?}", e);
1746                 Err(e)
1747             } else {
1748                 Ok(())
1749             }
1750         } else if let Some(ref config) = self.vm_config {
1751             let mut config = config.lock().unwrap();
1752             if config.remove_device(&id) {
1753                 Ok(())
1754             } else {
1755                 Err(VmError::NoDeviceToRemove(id))
1756             }
1757         } else {
1758             Err(VmError::VmNotCreated)
1759         }
1760     }
1761 
1762     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1763         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1764 
1765         {
1766             // Validate the configuration change in a cloned configuration
1767             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1768             add_to_config(&mut config.disks, disk_cfg.clone());
1769             config.validate().map_err(VmError::ConfigValidation)?;
1770         }
1771 
1772         if let Some(ref mut vm) = self.vm {
1773             let info = vm.add_disk(disk_cfg).map_err(|e| {
1774                 error!("Error when adding new disk to the VM: {:?}", e);
1775                 e
1776             })?;
1777             serde_json::to_vec(&info)
1778                 .map(Some)
1779                 .map_err(VmError::SerializeJson)
1780         } else {
1781             // Update VmConfig by adding the new device.
1782             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1783             add_to_config(&mut config.disks, disk_cfg);
1784             Ok(None)
1785         }
1786     }
1787 
1788     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1789         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1790 
1791         {
1792             // Validate the configuration change in a cloned configuration
1793             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1794             add_to_config(&mut config.fs, fs_cfg.clone());
1795             config.validate().map_err(VmError::ConfigValidation)?;
1796         }
1797 
1798         if let Some(ref mut vm) = self.vm {
1799             let info = vm.add_fs(fs_cfg).map_err(|e| {
1800                 error!("Error when adding new fs to the VM: {:?}", e);
1801                 e
1802             })?;
1803             serde_json::to_vec(&info)
1804                 .map(Some)
1805                 .map_err(VmError::SerializeJson)
1806         } else {
1807             // Update VmConfig by adding the new device.
1808             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1809             add_to_config(&mut config.fs, fs_cfg);
1810             Ok(None)
1811         }
1812     }
1813 
1814     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1815         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1816 
1817         {
1818             // Validate the configuration change in a cloned configuration
1819             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1820             add_to_config(&mut config.pmem, pmem_cfg.clone());
1821             config.validate().map_err(VmError::ConfigValidation)?;
1822         }
1823 
1824         if let Some(ref mut vm) = self.vm {
1825             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1826                 error!("Error when adding new pmem device to the VM: {:?}", e);
1827                 e
1828             })?;
1829             serde_json::to_vec(&info)
1830                 .map(Some)
1831                 .map_err(VmError::SerializeJson)
1832         } else {
1833             // Update VmConfig by adding the new device.
1834             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1835             add_to_config(&mut config.pmem, pmem_cfg);
1836             Ok(None)
1837         }
1838     }
1839 
1840     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1841         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1842 
1843         {
1844             // Validate the configuration change in a cloned configuration
1845             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1846             add_to_config(&mut config.net, net_cfg.clone());
1847             config.validate().map_err(VmError::ConfigValidation)?;
1848         }
1849 
1850         if let Some(ref mut vm) = self.vm {
1851             let info = vm.add_net(net_cfg).map_err(|e| {
1852                 error!("Error when adding new network device to the VM: {:?}", e);
1853                 e
1854             })?;
1855             serde_json::to_vec(&info)
1856                 .map(Some)
1857                 .map_err(VmError::SerializeJson)
1858         } else {
1859             // Update VmConfig by adding the new device.
1860             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1861             add_to_config(&mut config.net, net_cfg);
1862             Ok(None)
1863         }
1864     }
1865 
1866     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1867         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1868 
1869         {
1870             // Validate the configuration change in a cloned configuration
1871             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1872             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1873             config.validate().map_err(VmError::ConfigValidation)?;
1874         }
1875 
1876         if let Some(ref mut vm) = self.vm {
1877             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1878                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1879                 e
1880             })?;
1881             serde_json::to_vec(&info)
1882                 .map(Some)
1883                 .map_err(VmError::SerializeJson)
1884         } else {
1885             // Update VmConfig by adding the new device.
1886             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1887             add_to_config(&mut config.vdpa, vdpa_cfg);
1888             Ok(None)
1889         }
1890     }
1891 
1892     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1893         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1894 
1895         {
1896             // Validate the configuration change in a cloned configuration
1897             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1898 
1899             if config.vsock.is_some() {
1900                 return Err(VmError::TooManyVsockDevices);
1901             }
1902 
1903             config.vsock = Some(vsock_cfg.clone());
1904             config.validate().map_err(VmError::ConfigValidation)?;
1905         }
1906 
1907         if let Some(ref mut vm) = self.vm {
1908             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1909                 error!("Error when adding new vsock device to the VM: {:?}", e);
1910                 e
1911             })?;
1912             serde_json::to_vec(&info)
1913                 .map(Some)
1914                 .map_err(VmError::SerializeJson)
1915         } else {
1916             // Update VmConfig by adding the new device.
1917             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1918             config.vsock = Some(vsock_cfg);
1919             Ok(None)
1920         }
1921     }
1922 
1923     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1924         if let Some(ref mut vm) = self.vm {
1925             let info = vm.counters().map_err(|e| {
1926                 error!("Error when getting counters from the VM: {:?}", e);
1927                 e
1928             })?;
1929             serde_json::to_vec(&info)
1930                 .map(Some)
1931                 .map_err(VmError::SerializeJson)
1932         } else {
1933             Err(VmError::VmNotRunning)
1934         }
1935     }
1936 
1937     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1938         if let Some(ref mut vm) = self.vm {
1939             vm.power_button()
1940         } else {
1941             Err(VmError::VmNotRunning)
1942         }
1943     }
1944 
1945     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1946         if let Some(ref mut vm) = self.vm {
1947             vm.nmi()
1948         } else {
1949             Err(VmError::VmNotRunning)
1950         }
1951     }
1952 
1953     fn vm_receive_migration(
1954         &mut self,
1955         receive_data_migration: VmReceiveMigrationData,
1956     ) -> result::Result<(), MigratableError> {
1957         info!(
1958             "Receiving migration: receiver_url = {}",
1959             receive_data_migration.receiver_url
1960         );
1961 
1962         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1963         let listener = UnixListener::bind(&path).map_err(|e| {
1964             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1965         })?;
1966         let (mut socket, _addr) = listener.accept().map_err(|e| {
1967             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1968         })?;
1969         std::fs::remove_file(&path).map_err(|e| {
1970             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1971         })?;
1972 
1973         let mut started = false;
1974         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1975         let mut existing_memory_files = None;
1976         loop {
1977             let req = Request::read_from(&mut socket)?;
1978             match req.command() {
1979                 Command::Invalid => info!("Invalid Command Received"),
1980                 Command::Start => {
1981                     info!("Start Command Received");
1982                     started = true;
1983 
1984                     Response::ok().write_to(&mut socket)?;
1985                 }
1986                 Command::Config => {
1987                     info!("Config Command Received");
1988 
1989                     if !started {
1990                         warn!("Migration not started yet");
1991                         Response::error().write_to(&mut socket)?;
1992                         continue;
1993                     }
1994                     memory_manager = Some(self.vm_receive_config(
1995                         &req,
1996                         &mut socket,
1997                         existing_memory_files.take(),
1998                     )?);
1999                 }
2000                 Command::State => {
2001                     info!("State Command Received");
2002 
2003                     if !started {
2004                         warn!("Migration not started yet");
2005                         Response::error().write_to(&mut socket)?;
2006                         continue;
2007                     }
2008                     if let Some(mm) = memory_manager.take() {
2009                         self.vm_receive_state(&req, &mut socket, mm)?;
2010                     } else {
2011                         warn!("Configuration not sent yet");
2012                         Response::error().write_to(&mut socket)?;
2013                     }
2014                 }
2015                 Command::Memory => {
2016                     info!("Memory Command Received");
2017 
2018                     if !started {
2019                         warn!("Migration not started yet");
2020                         Response::error().write_to(&mut socket)?;
2021                         continue;
2022                     }
2023                     if let Some(mm) = memory_manager.as_ref() {
2024                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2025                     } else {
2026                         warn!("Configuration not sent yet");
2027                         Response::error().write_to(&mut socket)?;
2028                     }
2029                 }
2030                 Command::MemoryFd => {
2031                     info!("MemoryFd Command Received");
2032 
2033                     if !started {
2034                         warn!("Migration not started yet");
2035                         Response::error().write_to(&mut socket)?;
2036                         continue;
2037                     }
2038 
2039                     let mut buf = [0u8; 4];
2040                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2041                         MigratableError::MigrateReceive(anyhow!(
2042                             "Error receiving slot from socket: {}",
2043                             e
2044                         ))
2045                     })?;
2046 
2047                     if existing_memory_files.is_none() {
2048                         existing_memory_files = Some(HashMap::default())
2049                     }
2050 
2051                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2052                         let slot = u32::from_le_bytes(buf);
2053                         existing_memory_files.insert(slot, file.unwrap());
2054                     }
2055 
2056                     Response::ok().write_to(&mut socket)?;
2057                 }
2058                 Command::Complete => {
2059                     info!("Complete Command Received");
2060                     if let Some(ref mut vm) = self.vm.as_mut() {
2061                         vm.resume()?;
2062                         Response::ok().write_to(&mut socket)?;
2063                     } else {
2064                         warn!("VM not created yet");
2065                         Response::error().write_to(&mut socket)?;
2066                     }
2067                     break;
2068                 }
2069                 Command::Abandon => {
2070                     info!("Abandon Command Received");
2071                     self.vm = None;
2072                     self.vm_config = None;
2073                     Response::ok().write_to(&mut socket).ok();
2074                     break;
2075                 }
2076             }
2077         }
2078 
2079         Ok(())
2080     }
2081 
2082     fn vm_send_migration(
2083         &mut self,
2084         send_data_migration: VmSendMigrationData,
2085     ) -> result::Result<(), MigratableError> {
2086         info!(
2087             "Sending migration: destination_url = {}, local = {}",
2088             send_data_migration.destination_url, send_data_migration.local
2089         );
2090 
2091         if !self
2092             .vm_config
2093             .as_ref()
2094             .unwrap()
2095             .lock()
2096             .unwrap()
2097             .backed_by_shared_memory()
2098             && send_data_migration.local
2099         {
2100             return Err(MigratableError::MigrateSend(anyhow!(
2101                 "Local migration requires shared memory or hugepages enabled"
2102             )));
2103         }
2104 
2105         if let Some(vm) = self.vm.as_mut() {
2106             Self::send_migration(
2107                 vm,
2108                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2109                 self.hypervisor.clone(),
2110                 send_data_migration,
2111             )
2112             .map_err(|migration_err| {
2113                 error!("Migration failed: {:?}", migration_err);
2114 
2115                 // Stop logging dirty pages
2116                 if let Err(e) = vm.stop_dirty_log() {
2117                     return e;
2118                 }
2119 
2120                 if vm.get_state().unwrap() == VmState::Paused {
2121                     if let Err(e) = vm.resume() {
2122                         return e;
2123                     }
2124                 }
2125 
2126                 migration_err
2127             })?;
2128 
2129             // Shutdown the VM after the migration succeeded
2130             self.exit_evt.write(1).map_err(|e| {
2131                 MigratableError::MigrateSend(anyhow!(
2132                     "Failed shutting down the VM after migration: {:?}",
2133                     e
2134                 ))
2135             })
2136         } else {
2137             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2138         }
2139     }
2140 }
2141 
2142 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2143 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2144 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2145 
2146 #[cfg(test)]
2147 mod unit_tests {
2148     use super::*;
2149     #[cfg(target_arch = "x86_64")]
2150     use crate::config::DebugConsoleConfig;
2151     use config::{
2152         ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig,
2153         RngConfig,
2154     };
2155 
2156     fn create_dummy_vmm() -> Vmm {
2157         Vmm::new(
2158             VmmVersionInfo::new("dummy", "dummy"),
2159             EventFd::new(EFD_NONBLOCK).unwrap(),
2160             #[cfg(feature = "guest_debug")]
2161             EventFd::new(EFD_NONBLOCK).unwrap(),
2162             #[cfg(feature = "guest_debug")]
2163             EventFd::new(EFD_NONBLOCK).unwrap(),
2164             SeccompAction::Allow,
2165             hypervisor::new().unwrap(),
2166             EventFd::new(EFD_NONBLOCK).unwrap(),
2167         )
2168         .unwrap()
2169     }
2170 
2171     fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> {
2172         Arc::new(Mutex::new(VmConfig {
2173             cpus: CpusConfig {
2174                 boot_vcpus: 1,
2175                 max_vcpus: 1,
2176                 topology: None,
2177                 kvm_hyperv: false,
2178                 max_phys_bits: 46,
2179                 affinity: None,
2180                 features: config::CpuFeatures::default(),
2181             },
2182             memory: MemoryConfig {
2183                 size: 536_870_912,
2184                 mergeable: false,
2185                 hotplug_method: HotplugMethod::Acpi,
2186                 hotplug_size: None,
2187                 hotplugged_size: None,
2188                 shared: true,
2189                 hugepages: false,
2190                 hugepage_size: None,
2191                 prefault: false,
2192                 zones: None,
2193                 thp: true,
2194             },
2195             payload: Some(PayloadConfig {
2196                 kernel: Some(PathBuf::from("/path/to/kernel")),
2197                 firmware: None,
2198                 cmdline: None,
2199                 initramfs: None,
2200                 #[cfg(feature = "igvm")]
2201                 igvm: None,
2202                 #[cfg(feature = "sev_snp")]
2203                 host_data: None,
2204             }),
2205             rate_limit_groups: None,
2206             disks: None,
2207             net: None,
2208             rng: RngConfig {
2209                 src: PathBuf::from("/dev/urandom"),
2210                 iommu: false,
2211             },
2212             balloon: None,
2213             fs: None,
2214             pmem: None,
2215             serial: ConsoleConfig {
2216                 file: None,
2217                 mode: ConsoleOutputMode::Null,
2218                 iommu: false,
2219                 socket: None,
2220             },
2221             console: ConsoleConfig {
2222                 file: None,
2223                 mode: ConsoleOutputMode::Tty,
2224                 iommu: false,
2225                 socket: None,
2226             },
2227             #[cfg(target_arch = "x86_64")]
2228             debug_console: DebugConsoleConfig::default(),
2229             devices: None,
2230             user_devices: None,
2231             vdpa: None,
2232             vsock: None,
2233             #[cfg(feature = "pvmemcontrol")]
2234             pvmemcontrol: None,
2235             pvpanic: false,
2236             iommu: false,
2237             #[cfg(target_arch = "x86_64")]
2238             sgx_epc: None,
2239             numa: None,
2240             watchdog: false,
2241             #[cfg(feature = "guest_debug")]
2242             gdb: false,
2243             pci_segments: None,
2244             platform: None,
2245             tpm: None,
2246             preserved_fds: None,
2247             landlock_enable: false,
2248             landlock_rules: None,
2249         }))
2250     }
2251 
2252     #[test]
2253     fn test_vmm_vm_create() {
2254         let mut vmm = create_dummy_vmm();
2255         let config = create_dummy_vm_config();
2256 
2257         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2258         assert!(matches!(
2259             vmm.vm_create(config),
2260             Err(VmError::VmAlreadyCreated)
2261         ));
2262     }
2263 
2264     #[test]
2265     fn test_vmm_vm_cold_add_device() {
2266         let mut vmm = create_dummy_vmm();
2267         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2268 
2269         assert!(matches!(
2270             vmm.vm_add_device(device_config.clone()),
2271             Err(VmError::VmNotCreated)
2272         ));
2273 
2274         let _ = vmm.vm_create(create_dummy_vm_config());
2275         assert!(vmm
2276             .vm_config
2277             .as_ref()
2278             .unwrap()
2279             .lock()
2280             .unwrap()
2281             .devices
2282             .is_none());
2283 
2284         let result = vmm.vm_add_device(device_config.clone());
2285         assert!(result.is_ok());
2286         assert!(result.unwrap().is_none());
2287         assert_eq!(
2288             vmm.vm_config
2289                 .as_ref()
2290                 .unwrap()
2291                 .lock()
2292                 .unwrap()
2293                 .devices
2294                 .clone()
2295                 .unwrap()
2296                 .len(),
2297             1
2298         );
2299         assert_eq!(
2300             vmm.vm_config
2301                 .as_ref()
2302                 .unwrap()
2303                 .lock()
2304                 .unwrap()
2305                 .devices
2306                 .clone()
2307                 .unwrap()[0],
2308             device_config
2309         );
2310     }
2311 
2312     #[test]
2313     fn test_vmm_vm_cold_add_user_device() {
2314         let mut vmm = create_dummy_vmm();
2315         let user_device_config =
2316             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2317 
2318         assert!(matches!(
2319             vmm.vm_add_user_device(user_device_config.clone()),
2320             Err(VmError::VmNotCreated)
2321         ));
2322 
2323         let _ = vmm.vm_create(create_dummy_vm_config());
2324         assert!(vmm
2325             .vm_config
2326             .as_ref()
2327             .unwrap()
2328             .lock()
2329             .unwrap()
2330             .user_devices
2331             .is_none());
2332 
2333         let result = vmm.vm_add_user_device(user_device_config.clone());
2334         assert!(result.is_ok());
2335         assert!(result.unwrap().is_none());
2336         assert_eq!(
2337             vmm.vm_config
2338                 .as_ref()
2339                 .unwrap()
2340                 .lock()
2341                 .unwrap()
2342                 .user_devices
2343                 .clone()
2344                 .unwrap()
2345                 .len(),
2346             1
2347         );
2348         assert_eq!(
2349             vmm.vm_config
2350                 .as_ref()
2351                 .unwrap()
2352                 .lock()
2353                 .unwrap()
2354                 .user_devices
2355                 .clone()
2356                 .unwrap()[0],
2357             user_device_config
2358         );
2359     }
2360 
2361     #[test]
2362     fn test_vmm_vm_cold_add_disk() {
2363         let mut vmm = create_dummy_vmm();
2364         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2365 
2366         assert!(matches!(
2367             vmm.vm_add_disk(disk_config.clone()),
2368             Err(VmError::VmNotCreated)
2369         ));
2370 
2371         let _ = vmm.vm_create(create_dummy_vm_config());
2372         assert!(vmm
2373             .vm_config
2374             .as_ref()
2375             .unwrap()
2376             .lock()
2377             .unwrap()
2378             .disks
2379             .is_none());
2380 
2381         let result = vmm.vm_add_disk(disk_config.clone());
2382         assert!(result.is_ok());
2383         assert!(result.unwrap().is_none());
2384         assert_eq!(
2385             vmm.vm_config
2386                 .as_ref()
2387                 .unwrap()
2388                 .lock()
2389                 .unwrap()
2390                 .disks
2391                 .clone()
2392                 .unwrap()
2393                 .len(),
2394             1
2395         );
2396         assert_eq!(
2397             vmm.vm_config
2398                 .as_ref()
2399                 .unwrap()
2400                 .lock()
2401                 .unwrap()
2402                 .disks
2403                 .clone()
2404                 .unwrap()[0],
2405             disk_config
2406         );
2407     }
2408 
2409     #[test]
2410     fn test_vmm_vm_cold_add_fs() {
2411         let mut vmm = create_dummy_vmm();
2412         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2413 
2414         assert!(matches!(
2415             vmm.vm_add_fs(fs_config.clone()),
2416             Err(VmError::VmNotCreated)
2417         ));
2418 
2419         let _ = vmm.vm_create(create_dummy_vm_config());
2420         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2421 
2422         let result = vmm.vm_add_fs(fs_config.clone());
2423         assert!(result.is_ok());
2424         assert!(result.unwrap().is_none());
2425         assert_eq!(
2426             vmm.vm_config
2427                 .as_ref()
2428                 .unwrap()
2429                 .lock()
2430                 .unwrap()
2431                 .fs
2432                 .clone()
2433                 .unwrap()
2434                 .len(),
2435             1
2436         );
2437         assert_eq!(
2438             vmm.vm_config
2439                 .as_ref()
2440                 .unwrap()
2441                 .lock()
2442                 .unwrap()
2443                 .fs
2444                 .clone()
2445                 .unwrap()[0],
2446             fs_config
2447         );
2448     }
2449 
2450     #[test]
2451     fn test_vmm_vm_cold_add_pmem() {
2452         let mut vmm = create_dummy_vmm();
2453         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2454 
2455         assert!(matches!(
2456             vmm.vm_add_pmem(pmem_config.clone()),
2457             Err(VmError::VmNotCreated)
2458         ));
2459 
2460         let _ = vmm.vm_create(create_dummy_vm_config());
2461         assert!(vmm
2462             .vm_config
2463             .as_ref()
2464             .unwrap()
2465             .lock()
2466             .unwrap()
2467             .pmem
2468             .is_none());
2469 
2470         let result = vmm.vm_add_pmem(pmem_config.clone());
2471         assert!(result.is_ok());
2472         assert!(result.unwrap().is_none());
2473         assert_eq!(
2474             vmm.vm_config
2475                 .as_ref()
2476                 .unwrap()
2477                 .lock()
2478                 .unwrap()
2479                 .pmem
2480                 .clone()
2481                 .unwrap()
2482                 .len(),
2483             1
2484         );
2485         assert_eq!(
2486             vmm.vm_config
2487                 .as_ref()
2488                 .unwrap()
2489                 .lock()
2490                 .unwrap()
2491                 .pmem
2492                 .clone()
2493                 .unwrap()[0],
2494             pmem_config
2495         );
2496     }
2497 
2498     #[test]
2499     fn test_vmm_vm_cold_add_net() {
2500         let mut vmm = create_dummy_vmm();
2501         let net_config = NetConfig::parse(
2502             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2503         )
2504         .unwrap();
2505 
2506         assert!(matches!(
2507             vmm.vm_add_net(net_config.clone()),
2508             Err(VmError::VmNotCreated)
2509         ));
2510 
2511         let _ = vmm.vm_create(create_dummy_vm_config());
2512         assert!(vmm
2513             .vm_config
2514             .as_ref()
2515             .unwrap()
2516             .lock()
2517             .unwrap()
2518             .net
2519             .is_none());
2520 
2521         let result = vmm.vm_add_net(net_config.clone());
2522         assert!(result.is_ok());
2523         assert!(result.unwrap().is_none());
2524         assert_eq!(
2525             vmm.vm_config
2526                 .as_ref()
2527                 .unwrap()
2528                 .lock()
2529                 .unwrap()
2530                 .net
2531                 .clone()
2532                 .unwrap()
2533                 .len(),
2534             1
2535         );
2536         assert_eq!(
2537             vmm.vm_config
2538                 .as_ref()
2539                 .unwrap()
2540                 .lock()
2541                 .unwrap()
2542                 .net
2543                 .clone()
2544                 .unwrap()[0],
2545             net_config
2546         );
2547     }
2548 
2549     #[test]
2550     fn test_vmm_vm_cold_add_vdpa() {
2551         let mut vmm = create_dummy_vmm();
2552         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2553 
2554         assert!(matches!(
2555             vmm.vm_add_vdpa(vdpa_config.clone()),
2556             Err(VmError::VmNotCreated)
2557         ));
2558 
2559         let _ = vmm.vm_create(create_dummy_vm_config());
2560         assert!(vmm
2561             .vm_config
2562             .as_ref()
2563             .unwrap()
2564             .lock()
2565             .unwrap()
2566             .vdpa
2567             .is_none());
2568 
2569         let result = vmm.vm_add_vdpa(vdpa_config.clone());
2570         assert!(result.is_ok());
2571         assert!(result.unwrap().is_none());
2572         assert_eq!(
2573             vmm.vm_config
2574                 .as_ref()
2575                 .unwrap()
2576                 .lock()
2577                 .unwrap()
2578                 .vdpa
2579                 .clone()
2580                 .unwrap()
2581                 .len(),
2582             1
2583         );
2584         assert_eq!(
2585             vmm.vm_config
2586                 .as_ref()
2587                 .unwrap()
2588                 .lock()
2589                 .unwrap()
2590                 .vdpa
2591                 .clone()
2592                 .unwrap()[0],
2593             vdpa_config
2594         );
2595     }
2596 
2597     #[test]
2598     fn test_vmm_vm_cold_add_vsock() {
2599         let mut vmm = create_dummy_vmm();
2600         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2601 
2602         assert!(matches!(
2603             vmm.vm_add_vsock(vsock_config.clone()),
2604             Err(VmError::VmNotCreated)
2605         ));
2606 
2607         let _ = vmm.vm_create(create_dummy_vm_config());
2608         assert!(vmm
2609             .vm_config
2610             .as_ref()
2611             .unwrap()
2612             .lock()
2613             .unwrap()
2614             .vsock
2615             .is_none());
2616 
2617         let result = vmm.vm_add_vsock(vsock_config.clone());
2618         assert!(result.is_ok());
2619         assert!(result.unwrap().is_none());
2620         assert_eq!(
2621             vmm.vm_config
2622                 .as_ref()
2623                 .unwrap()
2624                 .lock()
2625                 .unwrap()
2626                 .vsock
2627                 .clone()
2628                 .unwrap(),
2629             vsock_config
2630         );
2631     }
2632 }
2633