xref: /cloud-hypervisor/vmm/src/lib.rs (revision 5a70d7ec69836ad66cdd1e4ea59414dcdaaeec8c)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 
6 #[macro_use]
7 extern crate event_monitor;
8 #[macro_use]
9 extern crate log;
10 
11 use crate::api::{
12     ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
13     VmSendMigrationData, VmmPingResponse,
14 };
15 use crate::config::{
16     add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig,
17     UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
18 };
19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
20 use crate::coredump::GuestDebuggable;
21 use crate::landlock::Landlock;
22 use crate::memory_manager::MemoryManager;
23 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
24 use crate::migration::get_vm_snapshot;
25 use crate::migration::{recv_vm_config, recv_vm_state};
26 use crate::seccomp_filters::{get_seccomp_filter, Thread};
27 use crate::vm::{Error as VmError, Vm, VmState};
28 use anyhow::anyhow;
29 #[cfg(feature = "dbus_api")]
30 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
31 use api::http::HttpApiHandle;
32 use console_devices::{pre_create_console_devices, ConsoleInfo};
33 use landlock::LandlockError;
34 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
35 use memory_manager::MemoryManagerSnapshotData;
36 use pci::PciBdf;
37 use seccompiler::{apply_filter, SeccompAction};
38 use serde::ser::{SerializeStruct, Serializer};
39 use serde::{Deserialize, Serialize};
40 use signal_hook::iterator::{Handle, Signals};
41 use std::collections::HashMap;
42 use std::fs::File;
43 use std::io;
44 use std::io::{stdout, Read, Write};
45 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
46 use std::os::unix::net::UnixListener;
47 use std::os::unix::net::UnixStream;
48 use std::panic::AssertUnwindSafe;
49 use std::path::PathBuf;
50 use std::rc::Rc;
51 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender};
52 use std::sync::{Arc, Mutex};
53 use std::time::Instant;
54 use std::{result, thread};
55 use thiserror::Error;
56 use tracer::trace_scoped;
57 use vm_memory::bitmap::AtomicBitmap;
58 use vm_memory::{ReadVolatile, WriteVolatile};
59 use vm_migration::{protocol::*, Migratable};
60 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
61 use vmm_sys_util::eventfd::EventFd;
62 use vmm_sys_util::signal::unblock_signal;
63 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
64 
65 mod acpi;
66 pub mod api;
67 mod clone3;
68 pub mod config;
69 pub mod console_devices;
70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
71 mod coredump;
72 pub mod cpu;
73 pub mod device_manager;
74 pub mod device_tree;
75 #[cfg(feature = "guest_debug")]
76 mod gdb;
77 #[cfg(feature = "igvm")]
78 mod igvm;
79 pub mod interrupt;
80 pub mod landlock;
81 pub mod memory_manager;
82 pub mod migration;
83 mod pci_segment;
84 pub mod seccomp_filters;
85 mod serial_manager;
86 mod sigwinch_listener;
87 pub mod vm;
88 pub mod vm_config;
89 
90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>;
91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>;
92 
93 /// Errors associated with VMM management
94 #[derive(Debug, Error)]
95 pub enum Error {
96     /// API request receive error
97     #[error("Error receiving API request: {0}")]
98     ApiRequestRecv(#[source] RecvError),
99 
100     /// API response send error
101     #[error("Error sending API request: {0}")]
102     ApiResponseSend(#[source] SendError<ApiResponse>),
103 
104     /// Cannot bind to the UNIX domain socket path
105     #[error("Error binding to UNIX domain socket: {0}")]
106     Bind(#[source] io::Error),
107 
108     /// Cannot clone EventFd.
109     #[error("Error cloning EventFd: {0}")]
110     EventFdClone(#[source] io::Error),
111 
112     /// Cannot create EventFd.
113     #[error("Error creating EventFd: {0}")]
114     EventFdCreate(#[source] io::Error),
115 
116     /// Cannot read from EventFd.
117     #[error("Error reading from EventFd: {0}")]
118     EventFdRead(#[source] io::Error),
119 
120     /// Cannot create epoll context.
121     #[error("Error creating epoll context: {0}")]
122     Epoll(#[source] io::Error),
123 
124     /// Cannot create HTTP thread
125     #[error("Error spawning HTTP thread: {0}")]
126     HttpThreadSpawn(#[source] io::Error),
127 
128     /// Cannot create D-Bus thread
129     #[cfg(feature = "dbus_api")]
130     #[error("Error spawning D-Bus thread: {0}")]
131     DBusThreadSpawn(#[source] io::Error),
132 
133     /// Cannot start D-Bus session
134     #[cfg(feature = "dbus_api")]
135     #[error("Error starting D-Bus session: {0}")]
136     CreateDBusSession(#[source] zbus::Error),
137 
138     /// Cannot create `event-monitor` thread
139     #[error("Error spawning `event-monitor` thread: {0}")]
140     EventMonitorThreadSpawn(#[source] io::Error),
141 
142     /// Cannot handle the VM STDIN stream
143     #[error("Error handling VM stdin: {0:?}")]
144     Stdin(VmError),
145 
146     /// Cannot handle the VM pty stream
147     #[error("Error handling VM pty: {0:?}")]
148     Pty(VmError),
149 
150     /// Cannot reboot the VM
151     #[error("Error rebooting VM: {0:?}")]
152     VmReboot(VmError),
153 
154     /// Cannot create VMM thread
155     #[error("Error spawning VMM thread {0:?}")]
156     VmmThreadSpawn(#[source] io::Error),
157 
158     /// Cannot shut the VMM down
159     #[error("Error shutting down VMM: {0:?}")]
160     VmmShutdown(VmError),
161 
162     /// Cannot create seccomp filter
163     #[error("Error creating seccomp filter: {0}")]
164     CreateSeccompFilter(seccompiler::Error),
165 
166     /// Cannot apply seccomp filter
167     #[error("Error applying seccomp filter: {0}")]
168     ApplySeccompFilter(seccompiler::Error),
169 
170     /// Error activating virtio devices
171     #[error("Error activating virtio devices: {0:?}")]
172     ActivateVirtioDevices(VmError),
173 
174     /// Error creating API server
175     #[error("Error creating API server {0:?}")]
176     CreateApiServer(micro_http::ServerError),
177 
178     /// Error binding API server socket
179     #[error("Error creation API server's socket {0:?}")]
180     CreateApiServerSocket(#[source] io::Error),
181 
182     #[cfg(feature = "guest_debug")]
183     #[error("Failed to start the GDB thread: {0}")]
184     GdbThreadSpawn(io::Error),
185 
186     /// GDB request receive error
187     #[cfg(feature = "guest_debug")]
188     #[error("Error receiving GDB request: {0}")]
189     GdbRequestRecv(#[source] RecvError),
190 
191     /// GDB response send error
192     #[cfg(feature = "guest_debug")]
193     #[error("Error sending GDB request: {0}")]
194     GdbResponseSend(#[source] SendError<gdb::GdbResponse>),
195 
196     #[error("Cannot spawn a signal handler thread: {0}")]
197     SignalHandlerSpawn(#[source] io::Error),
198 
199     #[error("Failed to join on threads: {0:?}")]
200     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
201 
202     /// Cannot create Landlock object
203     #[error("Error creating landlock object: {0}")]
204     CreateLandlock(LandlockError),
205 
206     /// Cannot apply landlock based sandboxing
207     #[error("Error applying landlock: {0}")]
208     ApplyLandlock(LandlockError),
209 }
210 pub type Result<T> = result::Result<T, Error>;
211 
212 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
213 #[repr(u64)]
214 pub enum EpollDispatch {
215     Exit = 0,
216     Reset = 1,
217     Api = 2,
218     ActivateVirtioDevices = 3,
219     Debug = 4,
220     Unknown,
221 }
222 
223 impl From<u64> for EpollDispatch {
224     fn from(v: u64) -> Self {
225         use EpollDispatch::*;
226         match v {
227             0 => Exit,
228             1 => Reset,
229             2 => Api,
230             3 => ActivateVirtioDevices,
231             4 => Debug,
232             _ => Unknown,
233         }
234     }
235 }
236 
237 pub struct EpollContext {
238     epoll_file: File,
239 }
240 
241 impl EpollContext {
242     pub fn new() -> result::Result<EpollContext, io::Error> {
243         let epoll_fd = epoll::create(true)?;
244         // Use 'File' to enforce closing on 'epoll_fd'
245         // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us.
246         let epoll_file = unsafe { File::from_raw_fd(epoll_fd) };
247 
248         Ok(EpollContext { epoll_file })
249     }
250 
251     pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error>
252     where
253         T: AsRawFd,
254     {
255         let dispatch_index = token as u64;
256         epoll::ctl(
257             self.epoll_file.as_raw_fd(),
258             epoll::ControlOptions::EPOLL_CTL_ADD,
259             fd.as_raw_fd(),
260             epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
261         )?;
262 
263         Ok(())
264     }
265 
266     #[cfg(fuzzing)]
267     pub fn add_event_custom<T>(
268         &mut self,
269         fd: &T,
270         id: u64,
271         evts: epoll::Events,
272     ) -> result::Result<(), io::Error>
273     where
274         T: AsRawFd,
275     {
276         epoll::ctl(
277             self.epoll_file.as_raw_fd(),
278             epoll::ControlOptions::EPOLL_CTL_ADD,
279             fd.as_raw_fd(),
280             epoll::Event::new(evts, id),
281         )?;
282 
283         Ok(())
284     }
285 }
286 
287 impl AsRawFd for EpollContext {
288     fn as_raw_fd(&self) -> RawFd {
289         self.epoll_file.as_raw_fd()
290     }
291 }
292 
293 pub struct PciDeviceInfo {
294     pub id: String,
295     pub bdf: PciBdf,
296 }
297 
298 impl Serialize for PciDeviceInfo {
299     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
300     where
301         S: Serializer,
302     {
303         let bdf_str = self.bdf.to_string();
304 
305         // Serialize the structure.
306         let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?;
307         state.serialize_field("id", &self.id)?;
308         state.serialize_field("bdf", &bdf_str)?;
309         state.end()
310     }
311 }
312 
313 pub fn feature_list() -> Vec<String> {
314     vec![
315         #[cfg(feature = "dbus_api")]
316         "dbus_api".to_string(),
317         #[cfg(feature = "dhat-heap")]
318         "dhat-heap".to_string(),
319         #[cfg(feature = "guest_debug")]
320         "guest_debug".to_string(),
321         #[cfg(feature = "igvm")]
322         "igvm".to_string(),
323         #[cfg(feature = "io_uring")]
324         "io_uring".to_string(),
325         #[cfg(feature = "kvm")]
326         "kvm".to_string(),
327         #[cfg(feature = "mshv")]
328         "mshv".to_string(),
329         #[cfg(feature = "sev_snp")]
330         "sev_snp".to_string(),
331         #[cfg(feature = "tdx")]
332         "tdx".to_string(),
333         #[cfg(feature = "tracing")]
334         "tracing".to_string(),
335     ]
336 }
337 
338 pub fn start_event_monitor_thread(
339     mut monitor: event_monitor::Monitor,
340     seccomp_action: &SeccompAction,
341     landlock_enable: bool,
342     hypervisor_type: hypervisor::HypervisorType,
343     exit_event: EventFd,
344 ) -> Result<thread::JoinHandle<Result<()>>> {
345     // Retrieve seccomp filter
346     let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type)
347         .map_err(Error::CreateSeccompFilter)?;
348 
349     thread::Builder::new()
350         .name("event-monitor".to_owned())
351         .spawn(move || {
352             // Apply seccomp filter
353             if !seccomp_filter.is_empty() {
354                 apply_filter(&seccomp_filter)
355                     .map_err(Error::ApplySeccompFilter)
356                     .map_err(|e| {
357                         error!("Error applying seccomp filter: {:?}", e);
358                         exit_event.write(1).ok();
359                         e
360                     })?;
361             }
362             if landlock_enable {
363                 Landlock::new()
364                     .map_err(Error::CreateLandlock)?
365                     .restrict_self()
366                     .map_err(Error::ApplyLandlock)
367                     .map_err(|e| {
368                         error!("Error applying landlock to event monitor thread: {:?}", e);
369                         exit_event.write(1).ok();
370                         e
371                     })?;
372             }
373 
374             std::panic::catch_unwind(AssertUnwindSafe(move || {
375                 while let Ok(event) = monitor.rx.recv() {
376                     let event = Arc::new(event);
377 
378                     if let Some(ref mut file) = monitor.file {
379                         file.write_all(event.as_bytes().as_ref()).ok();
380                         file.write_all(b"\n\n").ok();
381                     }
382 
383                     for tx in monitor.broadcast.iter() {
384                         tx.send(event.clone()).ok();
385                     }
386                 }
387             }))
388             .map_err(|_| {
389                 error!("`event-monitor` thread panicked");
390                 exit_event.write(1).ok();
391             })
392             .ok();
393 
394             Ok(())
395         })
396         .map_err(Error::EventMonitorThreadSpawn)
397 }
398 
399 #[allow(unused_variables)]
400 #[allow(clippy::too_many_arguments)]
401 pub fn start_vmm_thread(
402     vmm_version: VmmVersionInfo,
403     http_path: &Option<String>,
404     http_fd: Option<RawFd>,
405     #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>,
406     api_event: EventFd,
407     api_sender: Sender<ApiRequest>,
408     api_receiver: Receiver<ApiRequest>,
409     #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>,
410     #[cfg(feature = "guest_debug")] debug_event: EventFd,
411     #[cfg(feature = "guest_debug")] vm_debug_event: EventFd,
412     exit_event: EventFd,
413     seccomp_action: &SeccompAction,
414     hypervisor: Arc<dyn hypervisor::Hypervisor>,
415     landlock_enable: bool,
416 ) -> Result<VmmThreadHandle> {
417     #[cfg(feature = "guest_debug")]
418     let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps();
419     #[cfg(feature = "guest_debug")]
420     let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel();
421     #[cfg(feature = "guest_debug")]
422     let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?;
423     #[cfg(feature = "guest_debug")]
424     let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?;
425 
426     let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?;
427     let hypervisor_type = hypervisor.hypervisor_type();
428 
429     // Retrieve seccomp filter
430     let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type)
431         .map_err(Error::CreateSeccompFilter)?;
432 
433     let vmm_seccomp_action = seccomp_action.clone();
434     let thread = {
435         let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?;
436         thread::Builder::new()
437             .name("vmm".to_string())
438             .spawn(move || {
439                 // Apply seccomp filter for VMM thread.
440                 if !vmm_seccomp_filter.is_empty() {
441                     apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?;
442                 }
443 
444                 let mut vmm = Vmm::new(
445                     vmm_version,
446                     api_event,
447                     #[cfg(feature = "guest_debug")]
448                     debug_event,
449                     #[cfg(feature = "guest_debug")]
450                     vm_debug_event,
451                     vmm_seccomp_action,
452                     hypervisor,
453                     exit_event,
454                 )?;
455 
456                 vmm.setup_signal_handler(landlock_enable)?;
457 
458                 vmm.control_loop(
459                     Rc::new(api_receiver),
460                     #[cfg(feature = "guest_debug")]
461                     Rc::new(gdb_receiver),
462                 )
463             })
464             .map_err(Error::VmmThreadSpawn)?
465     };
466 
467     // The VMM thread is started, we can start the dbus thread
468     // and start serving HTTP requests
469     #[cfg(feature = "dbus_api")]
470     let dbus_shutdown_chs = match dbus_options {
471         Some(opts) => {
472             let (_, chs) = api::start_dbus_thread(
473                 opts,
474                 api_event_clone.try_clone().map_err(Error::EventFdClone)?,
475                 api_sender.clone(),
476                 seccomp_action,
477                 exit_event.try_clone().map_err(Error::EventFdClone)?,
478                 hypervisor_type,
479             )?;
480             Some(chs)
481         }
482         None => None,
483     };
484 
485     let http_api_handle = if let Some(http_path) = http_path {
486         Some(api::start_http_path_thread(
487             http_path,
488             api_event_clone,
489             api_sender,
490             seccomp_action,
491             exit_event,
492             hypervisor_type,
493             landlock_enable,
494         )?)
495     } else if let Some(http_fd) = http_fd {
496         Some(api::start_http_fd_thread(
497             http_fd,
498             api_event_clone,
499             api_sender,
500             seccomp_action,
501             exit_event,
502             hypervisor_type,
503             landlock_enable,
504         )?)
505     } else {
506         None
507     };
508 
509     #[cfg(feature = "guest_debug")]
510     if let Some(debug_path) = debug_path {
511         let target = gdb::GdbStub::new(
512             gdb_sender,
513             gdb_debug_event,
514             gdb_vm_debug_event,
515             gdb_hw_breakpoints,
516         );
517         thread::Builder::new()
518             .name("gdb".to_owned())
519             .spawn(move || gdb::gdb_thread(target, &debug_path))
520             .map_err(Error::GdbThreadSpawn)?;
521     }
522 
523     Ok(VmmThreadHandle {
524         thread_handle: thread,
525         #[cfg(feature = "dbus_api")]
526         dbus_shutdown_chs,
527         http_api_handle,
528     })
529 }
530 
531 #[derive(Clone, Deserialize, Serialize)]
532 struct VmMigrationConfig {
533     vm_config: Arc<Mutex<VmConfig>>,
534     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
535     common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
536     memory_manager_data: MemoryManagerSnapshotData,
537 }
538 
539 #[derive(Debug, Clone)]
540 pub struct VmmVersionInfo {
541     pub build_version: String,
542     pub version: String,
543 }
544 
545 impl VmmVersionInfo {
546     pub fn new(build_version: &str, version: &str) -> Self {
547         Self {
548             build_version: build_version.to_owned(),
549             version: version.to_owned(),
550         }
551     }
552 }
553 
554 pub struct VmmThreadHandle {
555     pub thread_handle: thread::JoinHandle<Result<()>>,
556     #[cfg(feature = "dbus_api")]
557     pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>,
558     pub http_api_handle: Option<HttpApiHandle>,
559 }
560 
561 pub struct Vmm {
562     epoll: EpollContext,
563     exit_evt: EventFd,
564     reset_evt: EventFd,
565     api_evt: EventFd,
566     #[cfg(feature = "guest_debug")]
567     debug_evt: EventFd,
568     #[cfg(feature = "guest_debug")]
569     vm_debug_evt: EventFd,
570     version: VmmVersionInfo,
571     vm: Option<Vm>,
572     vm_config: Option<Arc<Mutex<VmConfig>>>,
573     seccomp_action: SeccompAction,
574     hypervisor: Arc<dyn hypervisor::Hypervisor>,
575     activate_evt: EventFd,
576     signals: Option<Handle>,
577     threads: Vec<thread::JoinHandle<()>>,
578     original_termios_opt: Arc<Mutex<Option<termios>>>,
579     console_resize_pipe: Option<Arc<File>>,
580     console_info: Option<ConsoleInfo>,
581 }
582 
583 impl Vmm {
584     pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT];
585 
586     fn signal_handler(
587         mut signals: Signals,
588         original_termios_opt: Arc<Mutex<Option<termios>>>,
589         exit_evt: &EventFd,
590     ) {
591         for sig in &Self::HANDLED_SIGNALS {
592             unblock_signal(*sig).unwrap();
593         }
594 
595         for signal in signals.forever() {
596             match signal {
597                 SIGTERM | SIGINT => {
598                     if exit_evt.write(1).is_err() {
599                         // Resetting the terminal is usually done as the VMM exits
600                         if let Ok(lock) = original_termios_opt.lock() {
601                             if let Some(termios) = *lock {
602                                 // SAFETY: FFI call
603                                 let _ = unsafe {
604                                     tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios)
605                                 };
606                             }
607                         } else {
608                             warn!("Failed to lock original termios");
609                         }
610 
611                         std::process::exit(1);
612                     }
613                 }
614                 _ => (),
615             }
616         }
617     }
618 
619     fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> {
620         let signals = Signals::new(Self::HANDLED_SIGNALS);
621         match signals {
622             Ok(signals) => {
623                 self.signals = Some(signals.handle());
624                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
625                 let original_termios_opt = Arc::clone(&self.original_termios_opt);
626 
627                 let signal_handler_seccomp_filter = get_seccomp_filter(
628                     &self.seccomp_action,
629                     Thread::SignalHandler,
630                     self.hypervisor.hypervisor_type(),
631                 )
632                 .map_err(Error::CreateSeccompFilter)?;
633                 self.threads.push(
634                     thread::Builder::new()
635                         .name("vmm_signal_handler".to_string())
636                         .spawn(move || {
637                             if !signal_handler_seccomp_filter.is_empty() {
638                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
639                                     .map_err(Error::ApplySeccompFilter)
640                                 {
641                                     error!("Error applying seccomp filter: {:?}", e);
642                                     exit_evt.write(1).ok();
643                                     return;
644                                 }
645                             }
646                             if landlock_enable{
647                                 match Landlock::new() {
648                                     Ok(landlock) => {
649                                         let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| {
650                                             error!("Error applying Landlock to signal handler thread: {:?}", e);
651                                             exit_evt.write(1).ok();
652                                         });
653                                     }
654                                     Err(e) => {
655                                         error!("Error creating Landlock object: {:?}", e);
656                                         exit_evt.write(1).ok();
657                                     }
658                                 };
659                             }
660 
661                             std::panic::catch_unwind(AssertUnwindSafe(|| {
662                                 Vmm::signal_handler(signals, original_termios_opt, &exit_evt);
663                             }))
664                             .map_err(|_| {
665                                 error!("vmm signal_handler thread panicked");
666                                 exit_evt.write(1).ok()
667                             })
668                             .ok();
669                         })
670                         .map_err(Error::SignalHandlerSpawn)?,
671                 );
672             }
673             Err(e) => error!("Signal not found {}", e),
674         }
675         Ok(())
676     }
677 
678     #[allow(clippy::too_many_arguments)]
679     fn new(
680         vmm_version: VmmVersionInfo,
681         api_evt: EventFd,
682         #[cfg(feature = "guest_debug")] debug_evt: EventFd,
683         #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd,
684         seccomp_action: SeccompAction,
685         hypervisor: Arc<dyn hypervisor::Hypervisor>,
686         exit_evt: EventFd,
687     ) -> Result<Self> {
688         let mut epoll = EpollContext::new().map_err(Error::Epoll)?;
689         let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
690         let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
691 
692         epoll
693             .add_event(&exit_evt, EpollDispatch::Exit)
694             .map_err(Error::Epoll)?;
695 
696         epoll
697             .add_event(&reset_evt, EpollDispatch::Reset)
698             .map_err(Error::Epoll)?;
699 
700         epoll
701             .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices)
702             .map_err(Error::Epoll)?;
703 
704         epoll
705             .add_event(&api_evt, EpollDispatch::Api)
706             .map_err(Error::Epoll)?;
707 
708         #[cfg(feature = "guest_debug")]
709         epoll
710             .add_event(&debug_evt, EpollDispatch::Debug)
711             .map_err(Error::Epoll)?;
712 
713         Ok(Vmm {
714             epoll,
715             exit_evt,
716             reset_evt,
717             api_evt,
718             #[cfg(feature = "guest_debug")]
719             debug_evt,
720             #[cfg(feature = "guest_debug")]
721             vm_debug_evt,
722             version: vmm_version,
723             vm: None,
724             vm_config: None,
725             seccomp_action,
726             hypervisor,
727             activate_evt,
728             signals: None,
729             threads: vec![],
730             original_termios_opt: Arc::new(Mutex::new(None)),
731             console_resize_pipe: None,
732             console_info: None,
733         })
734     }
735 
736     fn vm_receive_config<T>(
737         &mut self,
738         req: &Request,
739         socket: &mut T,
740         existing_memory_files: Option<HashMap<u32, File>>,
741     ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError>
742     where
743         T: Read + Write,
744     {
745         // Read in config data along with memory manager data
746         let mut data: Vec<u8> = Vec::new();
747         data.resize_with(req.length() as usize, Default::default);
748         socket
749             .read_exact(&mut data)
750             .map_err(MigratableError::MigrateSocket)?;
751 
752         let vm_migration_config: VmMigrationConfig =
753             serde_json::from_slice(&data).map_err(|e| {
754                 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e))
755             })?;
756 
757         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
758         self.vm_check_cpuid_compatibility(
759             &vm_migration_config.vm_config,
760             &vm_migration_config.common_cpuid,
761         )?;
762 
763         let config = vm_migration_config.vm_config.clone();
764         self.vm_config = Some(vm_migration_config.vm_config);
765         self.console_info = Some(pre_create_console_devices(self).map_err(|e| {
766             MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e))
767         })?);
768 
769         if self
770             .vm_config
771             .as_ref()
772             .unwrap()
773             .lock()
774             .unwrap()
775             .landlock_enable
776         {
777             apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| {
778                 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e))
779             })?;
780         }
781 
782         let vm = Vm::create_hypervisor_vm(
783             &self.hypervisor,
784             #[cfg(feature = "tdx")]
785             false,
786             #[cfg(feature = "sev_snp")]
787             false,
788         )
789         .map_err(|e| {
790             MigratableError::MigrateReceive(anyhow!(
791                 "Error creating hypervisor VM from snapshot: {:?}",
792                 e
793             ))
794         })?;
795 
796         let phys_bits =
797             vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
798 
799         let memory_manager = MemoryManager::new(
800             vm,
801             &config.lock().unwrap().memory.clone(),
802             None,
803             phys_bits,
804             #[cfg(feature = "tdx")]
805             false,
806             Some(&vm_migration_config.memory_manager_data),
807             existing_memory_files,
808             #[cfg(target_arch = "x86_64")]
809             None,
810         )
811         .map_err(|e| {
812             MigratableError::MigrateReceive(anyhow!(
813                 "Error creating MemoryManager from snapshot: {:?}",
814                 e
815             ))
816         })?;
817 
818         Response::ok().write_to(socket)?;
819 
820         Ok(memory_manager)
821     }
822 
823     fn vm_receive_state<T>(
824         &mut self,
825         req: &Request,
826         socket: &mut T,
827         mm: Arc<Mutex<MemoryManager>>,
828     ) -> std::result::Result<(), MigratableError>
829     where
830         T: Read + Write,
831     {
832         // Read in state data
833         let mut data: Vec<u8> = Vec::new();
834         data.resize_with(req.length() as usize, Default::default);
835         socket
836             .read_exact(&mut data)
837             .map_err(MigratableError::MigrateSocket)?;
838         let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| {
839             MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e))
840         })?;
841 
842         let exit_evt = self.exit_evt.try_clone().map_err(|e| {
843             MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e))
844         })?;
845         let reset_evt = self.reset_evt.try_clone().map_err(|e| {
846             MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e))
847         })?;
848         #[cfg(feature = "guest_debug")]
849         let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| {
850             MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e))
851         })?;
852         let activate_evt = self.activate_evt.try_clone().map_err(|e| {
853             MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e))
854         })?;
855 
856         let timestamp = Instant::now();
857         let hypervisor_vm = mm.lock().unwrap().vm.clone();
858         let mut vm = Vm::new_from_memory_manager(
859             self.vm_config.clone().unwrap(),
860             mm,
861             hypervisor_vm,
862             exit_evt,
863             reset_evt,
864             #[cfg(feature = "guest_debug")]
865             debug_evt,
866             &self.seccomp_action,
867             self.hypervisor.clone(),
868             activate_evt,
869             timestamp,
870             self.console_info.clone(),
871             self.console_resize_pipe.as_ref().map(Arc::clone),
872             Arc::clone(&self.original_termios_opt),
873             Some(snapshot),
874         )
875         .map_err(|e| {
876             MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e))
877         })?;
878 
879         // Create VM
880         vm.restore().map_err(|e| {
881             Response::error().write_to(socket).ok();
882             MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e))
883         })?;
884         self.vm = Some(vm);
885 
886         Response::ok().write_to(socket)?;
887 
888         Ok(())
889     }
890 
891     fn vm_receive_memory<T>(
892         &mut self,
893         req: &Request,
894         socket: &mut T,
895         memory_manager: &mut MemoryManager,
896     ) -> std::result::Result<(), MigratableError>
897     where
898         T: Read + ReadVolatile + Write,
899     {
900         // Read table
901         let table = MemoryRangeTable::read_from(socket, req.length())?;
902 
903         // And then read the memory itself
904         memory_manager
905             .receive_memory_regions(&table, socket)
906             .inspect_err(|_| {
907                 Response::error().write_to(socket).ok();
908             })?;
909         Response::ok().write_to(socket)?;
910         Ok(())
911     }
912 
913     fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> {
914         url.strip_prefix("unix:")
915             .ok_or_else(|| {
916                 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url))
917             })
918             .map(|s| s.into())
919     }
920 
921     // Returns true if there were dirty pages to send
922     fn vm_maybe_send_dirty_pages<T>(
923         vm: &mut Vm,
924         socket: &mut T,
925     ) -> result::Result<bool, MigratableError>
926     where
927         T: Read + Write + WriteVolatile,
928     {
929         // Send (dirty) memory table
930         let table = vm.dirty_log()?;
931 
932         // But if there are no regions go straight to pause
933         if table.regions().is_empty() {
934             return Ok(false);
935         }
936 
937         Request::memory(table.length()).write_to(socket).unwrap();
938         table.write_to(socket)?;
939         // And then the memory itself
940         vm.send_memory_regions(&table, socket)?;
941         Response::read_from(socket)?.ok_or_abandon(
942             socket,
943             MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
944         )?;
945 
946         Ok(true)
947     }
948 
949     fn send_migration(
950         vm: &mut Vm,
951         #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc<
952             dyn hypervisor::Hypervisor,
953         >,
954         send_data_migration: VmSendMigrationData,
955     ) -> result::Result<(), MigratableError> {
956         let path = Self::socket_url_to_path(&send_data_migration.destination_url)?;
957         let mut socket = UnixStream::connect(path).map_err(|e| {
958             MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e))
959         })?;
960 
961         // Start the migration
962         Request::start().write_to(&mut socket)?;
963         Response::read_from(&mut socket)?.ok_or_abandon(
964             &mut socket,
965             MigratableError::MigrateSend(anyhow!("Error starting migration")),
966         )?;
967 
968         // Send config
969         let vm_config = vm.get_config();
970         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
971         let common_cpuid = {
972             #[cfg(feature = "tdx")]
973             if vm_config.lock().unwrap().is_tdx_enabled() {
974                 return Err(MigratableError::MigrateSend(anyhow!(
975                     "Live Migration is not supported when TDX is enabled"
976                 )));
977             };
978 
979             let amx = vm_config.lock().unwrap().cpus.features.amx;
980             let phys_bits =
981                 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
982             arch::generate_common_cpuid(
983                 &hypervisor,
984                 &arch::CpuidConfig {
985                     sgx_epc_sections: None,
986                     phys_bits,
987                     kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv,
988                     #[cfg(feature = "tdx")]
989                     tdx: false,
990                     amx,
991                 },
992             )
993             .map_err(|e| {
994                 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e))
995             })?
996         };
997 
998         if send_data_migration.local {
999             vm.send_memory_fds(&mut socket)?;
1000         }
1001 
1002         let vm_migration_config = VmMigrationConfig {
1003             vm_config,
1004             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1005             common_cpuid,
1006             memory_manager_data: vm.memory_manager_data(),
1007         };
1008         let config_data = serde_json::to_vec(&vm_migration_config).unwrap();
1009         Request::config(config_data.len() as u64).write_to(&mut socket)?;
1010         socket
1011             .write_all(&config_data)
1012             .map_err(MigratableError::MigrateSocket)?;
1013         Response::read_from(&mut socket)?.ok_or_abandon(
1014             &mut socket,
1015             MigratableError::MigrateSend(anyhow!("Error during config migration")),
1016         )?;
1017 
1018         // Let every Migratable object know about the migration being started.
1019         vm.start_migration()?;
1020 
1021         if send_data_migration.local {
1022             // Now pause VM
1023             vm.pause()?;
1024         } else {
1025             // Start logging dirty pages
1026             vm.start_dirty_log()?;
1027 
1028             // Send memory table
1029             let table = vm.memory_range_table()?;
1030             Request::memory(table.length())
1031                 .write_to(&mut socket)
1032                 .unwrap();
1033             table.write_to(&mut socket)?;
1034             // And then the memory itself
1035             vm.send_memory_regions(&table, &mut socket)?;
1036             Response::read_from(&mut socket)?.ok_or_abandon(
1037                 &mut socket,
1038                 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")),
1039             )?;
1040 
1041             // Try at most 5 passes of dirty memory sending
1042             const MAX_DIRTY_MIGRATIONS: usize = 5;
1043             for i in 0..MAX_DIRTY_MIGRATIONS {
1044                 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS);
1045                 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? {
1046                     break;
1047                 }
1048             }
1049 
1050             // Now pause VM
1051             vm.pause()?;
1052 
1053             // Send last batch of dirty pages
1054             Self::vm_maybe_send_dirty_pages(vm, &mut socket)?;
1055 
1056             // Stop logging dirty pages
1057             vm.stop_dirty_log()?;
1058         }
1059         // Capture snapshot and send it
1060         let vm_snapshot = vm.snapshot()?;
1061         let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();
1062         Request::state(snapshot_data.len() as u64).write_to(&mut socket)?;
1063         socket
1064             .write_all(&snapshot_data)
1065             .map_err(MigratableError::MigrateSocket)?;
1066         Response::read_from(&mut socket)?.ok_or_abandon(
1067             &mut socket,
1068             MigratableError::MigrateSend(anyhow!("Error during state migration")),
1069         )?;
1070         // Complete the migration
1071         Request::complete().write_to(&mut socket)?;
1072         Response::read_from(&mut socket)?.ok_or_abandon(
1073             &mut socket,
1074             MigratableError::MigrateSend(anyhow!("Error completing migration")),
1075         )?;
1076 
1077         info!("Migration complete");
1078 
1079         // Let every Migratable object know about the migration being complete
1080         vm.complete_migration()
1081     }
1082 
1083     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1084     fn vm_check_cpuid_compatibility(
1085         &self,
1086         src_vm_config: &Arc<Mutex<VmConfig>>,
1087         src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry],
1088     ) -> result::Result<(), MigratableError> {
1089         #[cfg(feature = "tdx")]
1090         if src_vm_config.lock().unwrap().is_tdx_enabled() {
1091             return Err(MigratableError::MigrateReceive(anyhow!(
1092                 "Live Migration is not supported when TDX is enabled"
1093             )));
1094         };
1095 
1096         // We check the `CPUID` compatibility of between the source vm and destination, which is
1097         // mostly about feature compatibility and "topology/sgx" leaves are not relevant.
1098         let dest_cpuid = &{
1099             let vm_config = &src_vm_config.lock().unwrap();
1100 
1101             let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits);
1102             arch::generate_common_cpuid(
1103                 &self.hypervisor.clone(),
1104                 &arch::CpuidConfig {
1105                     sgx_epc_sections: None,
1106                     phys_bits,
1107                     kvm_hyperv: vm_config.cpus.kvm_hyperv,
1108                     #[cfg(feature = "tdx")]
1109                     tdx: false,
1110                     amx: vm_config.cpus.features.amx,
1111                 },
1112             )
1113             .map_err(|e| {
1114                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
1115             })?
1116         };
1117         arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| {
1118             MigratableError::MigrateReceive(anyhow!(
1119                 "Error checking cpu feature compatibility': {:?}",
1120                 e
1121             ))
1122         })
1123     }
1124 
1125     fn control_loop(
1126         &mut self,
1127         api_receiver: Rc<Receiver<ApiRequest>>,
1128         #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>,
1129     ) -> Result<()> {
1130         const EPOLL_EVENTS_LEN: usize = 100;
1131 
1132         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
1133         let epoll_fd = self.epoll.as_raw_fd();
1134 
1135         'outer: loop {
1136             let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) {
1137                 Ok(res) => res,
1138                 Err(e) => {
1139                     if e.kind() == io::ErrorKind::Interrupted {
1140                         // It's well defined from the epoll_wait() syscall
1141                         // documentation that the epoll loop can be interrupted
1142                         // before any of the requested events occurred or the
1143                         // timeout expired. In both those cases, epoll_wait()
1144                         // returns an error of type EINTR, but this should not
1145                         // be considered as a regular error. Instead it is more
1146                         // appropriate to retry, by calling into epoll_wait().
1147                         continue;
1148                     }
1149                     return Err(Error::Epoll(e));
1150                 }
1151             };
1152 
1153             for event in events.iter().take(num_events) {
1154                 let dispatch_event: EpollDispatch = event.data.into();
1155                 match dispatch_event {
1156                     EpollDispatch::Unknown => {
1157                         let event = event.data;
1158                         warn!("Unknown VMM loop event: {}", event);
1159                     }
1160                     EpollDispatch::Exit => {
1161                         info!("VM exit event");
1162                         // Consume the event.
1163                         self.exit_evt.read().map_err(Error::EventFdRead)?;
1164                         self.vmm_shutdown().map_err(Error::VmmShutdown)?;
1165 
1166                         break 'outer;
1167                     }
1168                     EpollDispatch::Reset => {
1169                         info!("VM reset event");
1170                         // Consume the event.
1171                         self.reset_evt.read().map_err(Error::EventFdRead)?;
1172                         self.vm_reboot().map_err(Error::VmReboot)?;
1173                     }
1174                     EpollDispatch::ActivateVirtioDevices => {
1175                         if let Some(ref vm) = self.vm {
1176                             let count = self.activate_evt.read().map_err(Error::EventFdRead)?;
1177                             info!(
1178                                 "Trying to activate pending virtio devices: count = {}",
1179                                 count
1180                             );
1181                             vm.activate_virtio_devices()
1182                                 .map_err(Error::ActivateVirtioDevices)?;
1183                         }
1184                     }
1185                     EpollDispatch::Api => {
1186                         // Consume the events.
1187                         for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? {
1188                             // Read from the API receiver channel
1189                             let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?;
1190 
1191                             if api_request(self)? {
1192                                 break 'outer;
1193                             }
1194                         }
1195                     }
1196                     #[cfg(feature = "guest_debug")]
1197                     EpollDispatch::Debug => {
1198                         // Consume the events.
1199                         for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? {
1200                             // Read from the API receiver channel
1201                             let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?;
1202 
1203                             let response = if let Some(ref mut vm) = self.vm {
1204                                 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id)
1205                             } else {
1206                                 Err(VmError::VmNotRunning)
1207                             }
1208                             .map_err(gdb::Error::Vm);
1209 
1210                             gdb_request
1211                                 .sender
1212                                 .send(response)
1213                                 .map_err(Error::GdbResponseSend)?;
1214                         }
1215                     }
1216                     #[cfg(not(feature = "guest_debug"))]
1217                     EpollDispatch::Debug => {}
1218                 }
1219             }
1220         }
1221 
1222         // Trigger the termination of the signal_handler thread
1223         if let Some(signals) = self.signals.take() {
1224             signals.close();
1225         }
1226 
1227         // Wait for all the threads to finish
1228         for thread in self.threads.drain(..) {
1229             thread.join().map_err(Error::ThreadCleanup)?
1230         }
1231 
1232         Ok(())
1233     }
1234 }
1235 
1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> {
1237     vm_config.lock().unwrap().apply_landlock()?;
1238     Ok(())
1239 }
1240 
1241 impl RequestHandler for Vmm {
1242     fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> {
1243         // We only store the passed VM config.
1244         // The VM will be created when being asked to boot it.
1245         if self.vm_config.is_none() {
1246             self.vm_config = Some(config);
1247             self.console_info =
1248                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1249 
1250             if self
1251                 .vm_config
1252                 .as_ref()
1253                 .unwrap()
1254                 .lock()
1255                 .unwrap()
1256                 .landlock_enable
1257             {
1258                 apply_landlock(self.vm_config.as_ref().unwrap().clone())
1259                     .map_err(VmError::ApplyLandlock)?;
1260             }
1261             Ok(())
1262         } else {
1263             Err(VmError::VmAlreadyCreated)
1264         }
1265     }
1266 
1267     fn vm_boot(&mut self) -> result::Result<(), VmError> {
1268         tracer::start();
1269         info!("Booting VM");
1270         event!("vm", "booting");
1271         let r = {
1272             trace_scoped!("vm_boot");
1273             // If we don't have a config, we cannot boot a VM.
1274             if self.vm_config.is_none() {
1275                 return Err(VmError::VmMissingConfig);
1276             };
1277 
1278             // console_info is set to None in vm_shutdown. re-populate here if empty
1279             if self.console_info.is_none() {
1280                 self.console_info =
1281                     Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1282             }
1283 
1284             // Create a new VM if we don't have one yet.
1285             if self.vm.is_none() {
1286                 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1287                 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1288                 #[cfg(feature = "guest_debug")]
1289                 let vm_debug_evt = self
1290                     .vm_debug_evt
1291                     .try_clone()
1292                     .map_err(VmError::EventFdClone)?;
1293                 let activate_evt = self
1294                     .activate_evt
1295                     .try_clone()
1296                     .map_err(VmError::EventFdClone)?;
1297 
1298                 if let Some(ref vm_config) = self.vm_config {
1299                     let vm = Vm::new(
1300                         Arc::clone(vm_config),
1301                         exit_evt,
1302                         reset_evt,
1303                         #[cfg(feature = "guest_debug")]
1304                         vm_debug_evt,
1305                         &self.seccomp_action,
1306                         self.hypervisor.clone(),
1307                         activate_evt,
1308                         self.console_info.clone(),
1309                         self.console_resize_pipe.as_ref().map(Arc::clone),
1310                         Arc::clone(&self.original_termios_opt),
1311                         None,
1312                         None,
1313                         None,
1314                     )?;
1315 
1316                     self.vm = Some(vm);
1317                 }
1318             }
1319 
1320             // Now we can boot the VM.
1321             if let Some(ref mut vm) = self.vm {
1322                 vm.boot()
1323             } else {
1324                 Err(VmError::VmNotCreated)
1325             }
1326         };
1327         tracer::end();
1328         if r.is_ok() {
1329             event!("vm", "booted");
1330         }
1331         r
1332     }
1333 
1334     fn vm_pause(&mut self) -> result::Result<(), VmError> {
1335         if let Some(ref mut vm) = self.vm {
1336             vm.pause().map_err(VmError::Pause)
1337         } else {
1338             Err(VmError::VmNotRunning)
1339         }
1340     }
1341 
1342     fn vm_resume(&mut self) -> result::Result<(), VmError> {
1343         if let Some(ref mut vm) = self.vm {
1344             vm.resume().map_err(VmError::Resume)
1345         } else {
1346             Err(VmError::VmNotRunning)
1347         }
1348     }
1349 
1350     fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1351         if let Some(ref mut vm) = self.vm {
1352             // Drain console_info so that FDs are not reused
1353             let _ = self.console_info.take();
1354             vm.snapshot()
1355                 .map_err(VmError::Snapshot)
1356                 .and_then(|snapshot| {
1357                     vm.send(&snapshot, destination_url)
1358                         .map_err(VmError::SnapshotSend)
1359                 })
1360         } else {
1361             Err(VmError::VmNotRunning)
1362         }
1363     }
1364 
1365     fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> {
1366         if self.vm.is_some() || self.vm_config.is_some() {
1367             return Err(VmError::VmAlreadyCreated);
1368         }
1369 
1370         let source_url = restore_cfg.source_url.as_path().to_str();
1371         if source_url.is_none() {
1372             return Err(VmError::InvalidRestoreSourceUrl);
1373         }
1374         // Safe to unwrap as we checked it was Some(&str).
1375         let source_url = source_url.unwrap();
1376 
1377         let vm_config = Arc::new(Mutex::new(
1378             recv_vm_config(source_url).map_err(VmError::Restore)?,
1379         ));
1380         restore_cfg
1381             .validate(&vm_config.lock().unwrap().clone())
1382             .map_err(VmError::ConfigValidation)?;
1383 
1384         // Update VM's net configurations with new fds received for restore operation
1385         if let (Some(restored_nets), Some(vm_net_configs)) =
1386             (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net)
1387         {
1388             for net in restored_nets.iter() {
1389                 for net_config in vm_net_configs.iter_mut() {
1390                     // update only if the net dev is backed by FDs
1391                     if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() {
1392                         net_config.fds.clone_from(&net.fds);
1393                     }
1394                 }
1395             }
1396         }
1397 
1398         let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?;
1399         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1400         let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?;
1401 
1402         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
1403         self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid)
1404             .map_err(VmError::Restore)?;
1405 
1406         self.vm_config = Some(Arc::clone(&vm_config));
1407 
1408         // console_info is set to None in vm_snapshot. re-populate here if empty
1409         if self.console_info.is_none() {
1410             self.console_info =
1411                 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1412         }
1413 
1414         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1415         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1416         #[cfg(feature = "guest_debug")]
1417         let debug_evt = self
1418             .vm_debug_evt
1419             .try_clone()
1420             .map_err(VmError::EventFdClone)?;
1421         let activate_evt = self
1422             .activate_evt
1423             .try_clone()
1424             .map_err(VmError::EventFdClone)?;
1425 
1426         let vm = Vm::new(
1427             vm_config,
1428             exit_evt,
1429             reset_evt,
1430             #[cfg(feature = "guest_debug")]
1431             debug_evt,
1432             &self.seccomp_action,
1433             self.hypervisor.clone(),
1434             activate_evt,
1435             self.console_info.clone(),
1436             self.console_resize_pipe.as_ref().map(Arc::clone),
1437             Arc::clone(&self.original_termios_opt),
1438             Some(snapshot),
1439             Some(source_url),
1440             Some(restore_cfg.prefault),
1441         )?;
1442         self.vm = Some(vm);
1443 
1444         if self
1445             .vm_config
1446             .as_ref()
1447             .unwrap()
1448             .lock()
1449             .unwrap()
1450             .landlock_enable
1451         {
1452             apply_landlock(self.vm_config.as_ref().unwrap().clone())
1453                 .map_err(VmError::ApplyLandlock)?;
1454         }
1455 
1456         // Now we can restore the rest of the VM.
1457         if let Some(ref mut vm) = self.vm {
1458             vm.restore()
1459         } else {
1460             Err(VmError::VmNotCreated)
1461         }
1462     }
1463 
1464     #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
1465     fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> {
1466         if let Some(ref mut vm) = self.vm {
1467             vm.coredump(destination_url).map_err(VmError::Coredump)
1468         } else {
1469             Err(VmError::VmNotRunning)
1470         }
1471     }
1472 
1473     fn vm_shutdown(&mut self) -> result::Result<(), VmError> {
1474         let r = if let Some(ref mut vm) = self.vm.take() {
1475             // Drain console_info so that the FDs are not reused
1476             let _ = self.console_info.take();
1477             vm.shutdown()
1478         } else {
1479             Err(VmError::VmNotRunning)
1480         };
1481 
1482         if r.is_ok() {
1483             event!("vm", "shutdown");
1484         }
1485 
1486         r
1487     }
1488 
1489     fn vm_reboot(&mut self) -> result::Result<(), VmError> {
1490         event!("vm", "rebooting");
1491 
1492         // First we stop the current VM
1493         let config = if let Some(mut vm) = self.vm.take() {
1494             let config = vm.get_config();
1495             vm.shutdown()?;
1496             config
1497         } else {
1498             return Err(VmError::VmNotCreated);
1499         };
1500 
1501         // vm.shutdown() closes all the console devices, so set console_info to None
1502         // so that the closed FD #s are not reused.
1503         let _ = self.console_info.take();
1504 
1505         let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?;
1506         let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?;
1507         #[cfg(feature = "guest_debug")]
1508         let debug_evt = self
1509             .vm_debug_evt
1510             .try_clone()
1511             .map_err(VmError::EventFdClone)?;
1512         let activate_evt = self
1513             .activate_evt
1514             .try_clone()
1515             .map_err(VmError::EventFdClone)?;
1516 
1517         // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be
1518         // an event sitting in the shared reset_evt. Without doing this we get very early reboots
1519         // during the boot process.
1520         if self.reset_evt.read().is_ok() {
1521             warn!("Spurious second reset event received. Ignoring.");
1522         }
1523 
1524         self.console_info =
1525             Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?);
1526 
1527         // Then we create the new VM
1528         let mut vm = Vm::new(
1529             config,
1530             exit_evt,
1531             reset_evt,
1532             #[cfg(feature = "guest_debug")]
1533             debug_evt,
1534             &self.seccomp_action,
1535             self.hypervisor.clone(),
1536             activate_evt,
1537             self.console_info.clone(),
1538             self.console_resize_pipe.as_ref().map(Arc::clone),
1539             Arc::clone(&self.original_termios_opt),
1540             None,
1541             None,
1542             None,
1543         )?;
1544 
1545         // And we boot it
1546         vm.boot()?;
1547 
1548         self.vm = Some(vm);
1549 
1550         event!("vm", "rebooted");
1551 
1552         Ok(())
1553     }
1554 
1555     fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> {
1556         match &self.vm_config {
1557             Some(config) => {
1558                 let state = match &self.vm {
1559                     Some(vm) => vm.get_state()?,
1560                     None => VmState::Created,
1561                 };
1562 
1563                 let config = Arc::clone(config);
1564 
1565                 let mut memory_actual_size = config.lock().unwrap().memory.total_size();
1566                 if let Some(vm) = &self.vm {
1567                     memory_actual_size -= vm.balloon_size();
1568                 }
1569 
1570                 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree());
1571 
1572                 Ok(VmInfoResponse {
1573                     config,
1574                     state,
1575                     memory_actual_size,
1576                     device_tree,
1577                 })
1578             }
1579             None => Err(VmError::VmNotCreated),
1580         }
1581     }
1582 
1583     fn vmm_ping(&self) -> VmmPingResponse {
1584         let VmmVersionInfo {
1585             build_version,
1586             version,
1587         } = self.version.clone();
1588 
1589         VmmPingResponse {
1590             build_version,
1591             version,
1592             pid: std::process::id() as i64,
1593             features: feature_list(),
1594         }
1595     }
1596 
1597     fn vm_delete(&mut self) -> result::Result<(), VmError> {
1598         if self.vm_config.is_none() {
1599             return Ok(());
1600         }
1601 
1602         // If a VM is booted, we first try to shut it down.
1603         if self.vm.is_some() {
1604             self.vm_shutdown()?;
1605         }
1606 
1607         self.vm_config = None;
1608 
1609         event!("vm", "deleted");
1610 
1611         Ok(())
1612     }
1613 
1614     fn vmm_shutdown(&mut self) -> result::Result<(), VmError> {
1615         self.vm_delete()?;
1616         event!("vmm", "shutdown");
1617         Ok(())
1618     }
1619 
1620     fn vm_resize(
1621         &mut self,
1622         desired_vcpus: Option<u8>,
1623         desired_ram: Option<u64>,
1624         desired_balloon: Option<u64>,
1625     ) -> result::Result<(), VmError> {
1626         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1627 
1628         if let Some(ref mut vm) = self.vm {
1629             if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) {
1630                 error!("Error when resizing VM: {:?}", e);
1631                 Err(e)
1632             } else {
1633                 Ok(())
1634             }
1635         } else {
1636             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1637             if let Some(desired_vcpus) = desired_vcpus {
1638                 config.cpus.boot_vcpus = desired_vcpus;
1639             }
1640             if let Some(desired_ram) = desired_ram {
1641                 config.memory.size = desired_ram;
1642             }
1643             if let Some(desired_balloon) = desired_balloon {
1644                 if let Some(balloon_config) = &mut config.balloon {
1645                     balloon_config.size = desired_balloon;
1646                 }
1647             }
1648             Ok(())
1649         }
1650     }
1651 
1652     fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> {
1653         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1654 
1655         if let Some(ref mut vm) = self.vm {
1656             if let Err(e) = vm.resize_zone(id, desired_ram) {
1657                 error!("Error when resizing VM: {:?}", e);
1658                 Err(e)
1659             } else {
1660                 Ok(())
1661             }
1662         } else {
1663             // Update VmConfig by setting the new desired ram.
1664             let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory;
1665 
1666             if let Some(zones) = &mut memory_config.zones {
1667                 for zone in zones.iter_mut() {
1668                     if zone.id == id {
1669                         zone.size = desired_ram;
1670                         return Ok(());
1671                     }
1672                 }
1673             }
1674 
1675             error!("Could not find the memory zone {} for the resize", id);
1676             Err(VmError::ResizeZone)
1677         }
1678     }
1679 
1680     fn vm_add_device(
1681         &mut self,
1682         device_cfg: DeviceConfig,
1683     ) -> result::Result<Option<Vec<u8>>, VmError> {
1684         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1685 
1686         {
1687             // Validate the configuration change in a cloned configuration
1688             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1689             add_to_config(&mut config.devices, device_cfg.clone());
1690             config.validate().map_err(VmError::ConfigValidation)?;
1691         }
1692 
1693         if let Some(ref mut vm) = self.vm {
1694             let info = vm.add_device(device_cfg).map_err(|e| {
1695                 error!("Error when adding new device to the VM: {:?}", e);
1696                 e
1697             })?;
1698             serde_json::to_vec(&info)
1699                 .map(Some)
1700                 .map_err(VmError::SerializeJson)
1701         } else {
1702             // Update VmConfig by adding the new device.
1703             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1704             add_to_config(&mut config.devices, device_cfg);
1705             Ok(None)
1706         }
1707     }
1708 
1709     fn vm_add_user_device(
1710         &mut self,
1711         device_cfg: UserDeviceConfig,
1712     ) -> result::Result<Option<Vec<u8>>, VmError> {
1713         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1714 
1715         {
1716             // Validate the configuration change in a cloned configuration
1717             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1718             add_to_config(&mut config.user_devices, device_cfg.clone());
1719             config.validate().map_err(VmError::ConfigValidation)?;
1720         }
1721 
1722         if let Some(ref mut vm) = self.vm {
1723             let info = vm.add_user_device(device_cfg).map_err(|e| {
1724                 error!("Error when adding new user device to the VM: {:?}", e);
1725                 e
1726             })?;
1727             serde_json::to_vec(&info)
1728                 .map(Some)
1729                 .map_err(VmError::SerializeJson)
1730         } else {
1731             // Update VmConfig by adding the new device.
1732             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1733             add_to_config(&mut config.user_devices, device_cfg);
1734             Ok(None)
1735         }
1736     }
1737 
1738     fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> {
1739         if let Some(ref mut vm) = self.vm {
1740             if let Err(e) = vm.remove_device(id) {
1741                 error!("Error when removing device from the VM: {:?}", e);
1742                 Err(e)
1743             } else {
1744                 Ok(())
1745             }
1746         } else if let Some(ref config) = self.vm_config {
1747             let mut config = config.lock().unwrap();
1748             if config.remove_device(&id) {
1749                 Ok(())
1750             } else {
1751                 Err(VmError::NoDeviceToRemove(id))
1752             }
1753         } else {
1754             Err(VmError::VmNotCreated)
1755         }
1756     }
1757 
1758     fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1759         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1760 
1761         {
1762             // Validate the configuration change in a cloned configuration
1763             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1764             add_to_config(&mut config.disks, disk_cfg.clone());
1765             config.validate().map_err(VmError::ConfigValidation)?;
1766         }
1767 
1768         if let Some(ref mut vm) = self.vm {
1769             let info = vm.add_disk(disk_cfg).map_err(|e| {
1770                 error!("Error when adding new disk to the VM: {:?}", e);
1771                 e
1772             })?;
1773             serde_json::to_vec(&info)
1774                 .map(Some)
1775                 .map_err(VmError::SerializeJson)
1776         } else {
1777             // Update VmConfig by adding the new device.
1778             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1779             add_to_config(&mut config.disks, disk_cfg);
1780             Ok(None)
1781         }
1782     }
1783 
1784     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1785         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1786 
1787         {
1788             // Validate the configuration change in a cloned configuration
1789             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1790             add_to_config(&mut config.fs, fs_cfg.clone());
1791             config.validate().map_err(VmError::ConfigValidation)?;
1792         }
1793 
1794         if let Some(ref mut vm) = self.vm {
1795             let info = vm.add_fs(fs_cfg).map_err(|e| {
1796                 error!("Error when adding new fs to the VM: {:?}", e);
1797                 e
1798             })?;
1799             serde_json::to_vec(&info)
1800                 .map(Some)
1801                 .map_err(VmError::SerializeJson)
1802         } else {
1803             // Update VmConfig by adding the new device.
1804             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1805             add_to_config(&mut config.fs, fs_cfg);
1806             Ok(None)
1807         }
1808     }
1809 
1810     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1811         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1812 
1813         {
1814             // Validate the configuration change in a cloned configuration
1815             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1816             add_to_config(&mut config.pmem, pmem_cfg.clone());
1817             config.validate().map_err(VmError::ConfigValidation)?;
1818         }
1819 
1820         if let Some(ref mut vm) = self.vm {
1821             let info = vm.add_pmem(pmem_cfg).map_err(|e| {
1822                 error!("Error when adding new pmem device to the VM: {:?}", e);
1823                 e
1824             })?;
1825             serde_json::to_vec(&info)
1826                 .map(Some)
1827                 .map_err(VmError::SerializeJson)
1828         } else {
1829             // Update VmConfig by adding the new device.
1830             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1831             add_to_config(&mut config.pmem, pmem_cfg);
1832             Ok(None)
1833         }
1834     }
1835 
1836     fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1837         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1838 
1839         {
1840             // Validate the configuration change in a cloned configuration
1841             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1842             add_to_config(&mut config.net, net_cfg.clone());
1843             config.validate().map_err(VmError::ConfigValidation)?;
1844         }
1845 
1846         if let Some(ref mut vm) = self.vm {
1847             let info = vm.add_net(net_cfg).map_err(|e| {
1848                 error!("Error when adding new network device to the VM: {:?}", e);
1849                 e
1850             })?;
1851             serde_json::to_vec(&info)
1852                 .map(Some)
1853                 .map_err(VmError::SerializeJson)
1854         } else {
1855             // Update VmConfig by adding the new device.
1856             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1857             add_to_config(&mut config.net, net_cfg);
1858             Ok(None)
1859         }
1860     }
1861 
1862     fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1863         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1864 
1865         {
1866             // Validate the configuration change in a cloned configuration
1867             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1868             add_to_config(&mut config.vdpa, vdpa_cfg.clone());
1869             config.validate().map_err(VmError::ConfigValidation)?;
1870         }
1871 
1872         if let Some(ref mut vm) = self.vm {
1873             let info = vm.add_vdpa(vdpa_cfg).map_err(|e| {
1874                 error!("Error when adding new vDPA device to the VM: {:?}", e);
1875                 e
1876             })?;
1877             serde_json::to_vec(&info)
1878                 .map(Some)
1879                 .map_err(VmError::SerializeJson)
1880         } else {
1881             // Update VmConfig by adding the new device.
1882             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1883             add_to_config(&mut config.vdpa, vdpa_cfg);
1884             Ok(None)
1885         }
1886     }
1887 
1888     fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> {
1889         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
1890 
1891         {
1892             // Validate the configuration change in a cloned configuration
1893             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
1894 
1895             if config.vsock.is_some() {
1896                 return Err(VmError::TooManyVsockDevices);
1897             }
1898 
1899             config.vsock = Some(vsock_cfg.clone());
1900             config.validate().map_err(VmError::ConfigValidation)?;
1901         }
1902 
1903         if let Some(ref mut vm) = self.vm {
1904             let info = vm.add_vsock(vsock_cfg).map_err(|e| {
1905                 error!("Error when adding new vsock device to the VM: {:?}", e);
1906                 e
1907             })?;
1908             serde_json::to_vec(&info)
1909                 .map(Some)
1910                 .map_err(VmError::SerializeJson)
1911         } else {
1912             // Update VmConfig by adding the new device.
1913             let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
1914             config.vsock = Some(vsock_cfg);
1915             Ok(None)
1916         }
1917     }
1918 
1919     fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> {
1920         if let Some(ref mut vm) = self.vm {
1921             let info = vm.counters().map_err(|e| {
1922                 error!("Error when getting counters from the VM: {:?}", e);
1923                 e
1924             })?;
1925             serde_json::to_vec(&info)
1926                 .map(Some)
1927                 .map_err(VmError::SerializeJson)
1928         } else {
1929             Err(VmError::VmNotRunning)
1930         }
1931     }
1932 
1933     fn vm_power_button(&mut self) -> result::Result<(), VmError> {
1934         if let Some(ref mut vm) = self.vm {
1935             vm.power_button()
1936         } else {
1937             Err(VmError::VmNotRunning)
1938         }
1939     }
1940 
1941     fn vm_nmi(&mut self) -> result::Result<(), VmError> {
1942         if let Some(ref mut vm) = self.vm {
1943             vm.nmi()
1944         } else {
1945             Err(VmError::VmNotRunning)
1946         }
1947     }
1948 
1949     fn vm_receive_migration(
1950         &mut self,
1951         receive_data_migration: VmReceiveMigrationData,
1952     ) -> result::Result<(), MigratableError> {
1953         info!(
1954             "Receiving migration: receiver_url = {}",
1955             receive_data_migration.receiver_url
1956         );
1957 
1958         let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?;
1959         let listener = UnixListener::bind(&path).map_err(|e| {
1960             MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e))
1961         })?;
1962         let (mut socket, _addr) = listener.accept().map_err(|e| {
1963             MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e))
1964         })?;
1965         std::fs::remove_file(&path).map_err(|e| {
1966             MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e))
1967         })?;
1968 
1969         let mut started = false;
1970         let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None;
1971         let mut existing_memory_files = None;
1972         loop {
1973             let req = Request::read_from(&mut socket)?;
1974             match req.command() {
1975                 Command::Invalid => info!("Invalid Command Received"),
1976                 Command::Start => {
1977                     info!("Start Command Received");
1978                     started = true;
1979 
1980                     Response::ok().write_to(&mut socket)?;
1981                 }
1982                 Command::Config => {
1983                     info!("Config Command Received");
1984 
1985                     if !started {
1986                         warn!("Migration not started yet");
1987                         Response::error().write_to(&mut socket)?;
1988                         continue;
1989                     }
1990                     memory_manager = Some(self.vm_receive_config(
1991                         &req,
1992                         &mut socket,
1993                         existing_memory_files.take(),
1994                     )?);
1995                 }
1996                 Command::State => {
1997                     info!("State Command Received");
1998 
1999                     if !started {
2000                         warn!("Migration not started yet");
2001                         Response::error().write_to(&mut socket)?;
2002                         continue;
2003                     }
2004                     if let Some(mm) = memory_manager.take() {
2005                         self.vm_receive_state(&req, &mut socket, mm)?;
2006                     } else {
2007                         warn!("Configuration not sent yet");
2008                         Response::error().write_to(&mut socket)?;
2009                     }
2010                 }
2011                 Command::Memory => {
2012                     info!("Memory Command Received");
2013 
2014                     if !started {
2015                         warn!("Migration not started yet");
2016                         Response::error().write_to(&mut socket)?;
2017                         continue;
2018                     }
2019                     if let Some(mm) = memory_manager.as_ref() {
2020                         self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?;
2021                     } else {
2022                         warn!("Configuration not sent yet");
2023                         Response::error().write_to(&mut socket)?;
2024                     }
2025                 }
2026                 Command::MemoryFd => {
2027                     info!("MemoryFd Command Received");
2028 
2029                     if !started {
2030                         warn!("Migration not started yet");
2031                         Response::error().write_to(&mut socket)?;
2032                         continue;
2033                     }
2034 
2035                     let mut buf = [0u8; 4];
2036                     let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| {
2037                         MigratableError::MigrateReceive(anyhow!(
2038                             "Error receiving slot from socket: {}",
2039                             e
2040                         ))
2041                     })?;
2042 
2043                     if existing_memory_files.is_none() {
2044                         existing_memory_files = Some(HashMap::default())
2045                     }
2046 
2047                     if let Some(ref mut existing_memory_files) = existing_memory_files {
2048                         let slot = u32::from_le_bytes(buf);
2049                         existing_memory_files.insert(slot, file.unwrap());
2050                     }
2051 
2052                     Response::ok().write_to(&mut socket)?;
2053                 }
2054                 Command::Complete => {
2055                     info!("Complete Command Received");
2056                     if let Some(ref mut vm) = self.vm.as_mut() {
2057                         vm.resume()?;
2058                         Response::ok().write_to(&mut socket)?;
2059                     } else {
2060                         warn!("VM not created yet");
2061                         Response::error().write_to(&mut socket)?;
2062                     }
2063                     break;
2064                 }
2065                 Command::Abandon => {
2066                     info!("Abandon Command Received");
2067                     self.vm = None;
2068                     self.vm_config = None;
2069                     Response::ok().write_to(&mut socket).ok();
2070                     break;
2071                 }
2072             }
2073         }
2074 
2075         Ok(())
2076     }
2077 
2078     fn vm_send_migration(
2079         &mut self,
2080         send_data_migration: VmSendMigrationData,
2081     ) -> result::Result<(), MigratableError> {
2082         info!(
2083             "Sending migration: destination_url = {}, local = {}",
2084             send_data_migration.destination_url, send_data_migration.local
2085         );
2086 
2087         if !self
2088             .vm_config
2089             .as_ref()
2090             .unwrap()
2091             .lock()
2092             .unwrap()
2093             .backed_by_shared_memory()
2094             && send_data_migration.local
2095         {
2096             return Err(MigratableError::MigrateSend(anyhow!(
2097                 "Local migration requires shared memory or hugepages enabled"
2098             )));
2099         }
2100 
2101         if let Some(vm) = self.vm.as_mut() {
2102             Self::send_migration(
2103                 vm,
2104                 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2105                 self.hypervisor.clone(),
2106                 send_data_migration,
2107             )
2108             .map_err(|migration_err| {
2109                 error!("Migration failed: {:?}", migration_err);
2110 
2111                 // Stop logging dirty pages
2112                 if let Err(e) = vm.stop_dirty_log() {
2113                     return e;
2114                 }
2115 
2116                 if vm.get_state().unwrap() == VmState::Paused {
2117                     if let Err(e) = vm.resume() {
2118                         return e;
2119                     }
2120                 }
2121 
2122                 migration_err
2123             })?;
2124 
2125             // Shutdown the VM after the migration succeeded
2126             self.exit_evt.write(1).map_err(|e| {
2127                 MigratableError::MigrateSend(anyhow!(
2128                     "Failed shutting down the VM after migration: {:?}",
2129                     e
2130                 ))
2131             })
2132         } else {
2133             Err(MigratableError::MigrateSend(anyhow!("VM is not running")))
2134         }
2135     }
2136 }
2137 
2138 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager";
2139 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager";
2140 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager";
2141 
2142 #[cfg(test)]
2143 mod unit_tests {
2144     use super::*;
2145     #[cfg(target_arch = "x86_64")]
2146     use crate::config::DebugConsoleConfig;
2147     use config::{
2148         ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig,
2149         RngConfig,
2150     };
2151 
2152     fn create_dummy_vmm() -> Vmm {
2153         Vmm::new(
2154             VmmVersionInfo::new("dummy", "dummy"),
2155             EventFd::new(EFD_NONBLOCK).unwrap(),
2156             #[cfg(feature = "guest_debug")]
2157             EventFd::new(EFD_NONBLOCK).unwrap(),
2158             #[cfg(feature = "guest_debug")]
2159             EventFd::new(EFD_NONBLOCK).unwrap(),
2160             SeccompAction::Allow,
2161             hypervisor::new().unwrap(),
2162             EventFd::new(EFD_NONBLOCK).unwrap(),
2163         )
2164         .unwrap()
2165     }
2166 
2167     fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> {
2168         Arc::new(Mutex::new(VmConfig {
2169             cpus: CpusConfig {
2170                 boot_vcpus: 1,
2171                 max_vcpus: 1,
2172                 topology: None,
2173                 kvm_hyperv: false,
2174                 max_phys_bits: 46,
2175                 affinity: None,
2176                 features: config::CpuFeatures::default(),
2177             },
2178             memory: MemoryConfig {
2179                 size: 536_870_912,
2180                 mergeable: false,
2181                 hotplug_method: HotplugMethod::Acpi,
2182                 hotplug_size: None,
2183                 hotplugged_size: None,
2184                 shared: true,
2185                 hugepages: false,
2186                 hugepage_size: None,
2187                 prefault: false,
2188                 zones: None,
2189                 thp: true,
2190             },
2191             payload: Some(PayloadConfig {
2192                 kernel: Some(PathBuf::from("/path/to/kernel")),
2193                 firmware: None,
2194                 cmdline: None,
2195                 initramfs: None,
2196                 #[cfg(feature = "igvm")]
2197                 igvm: None,
2198                 #[cfg(feature = "sev_snp")]
2199                 host_data: None,
2200             }),
2201             rate_limit_groups: None,
2202             disks: None,
2203             net: None,
2204             rng: RngConfig {
2205                 src: PathBuf::from("/dev/urandom"),
2206                 iommu: false,
2207             },
2208             balloon: None,
2209             fs: None,
2210             pmem: None,
2211             serial: ConsoleConfig {
2212                 file: None,
2213                 mode: ConsoleOutputMode::Null,
2214                 iommu: false,
2215                 socket: None,
2216             },
2217             console: ConsoleConfig {
2218                 file: None,
2219                 mode: ConsoleOutputMode::Tty,
2220                 iommu: false,
2221                 socket: None,
2222             },
2223             #[cfg(target_arch = "x86_64")]
2224             debug_console: DebugConsoleConfig::default(),
2225             devices: None,
2226             user_devices: None,
2227             vdpa: None,
2228             vsock: None,
2229             #[cfg(feature = "pvmemcontrol")]
2230             pvmemcontrol: None,
2231             pvpanic: false,
2232             iommu: false,
2233             #[cfg(target_arch = "x86_64")]
2234             sgx_epc: None,
2235             numa: None,
2236             watchdog: false,
2237             #[cfg(feature = "guest_debug")]
2238             gdb: false,
2239             pci_segments: None,
2240             platform: None,
2241             tpm: None,
2242             preserved_fds: None,
2243             landlock_enable: false,
2244             landlock_rules: None,
2245         }))
2246     }
2247 
2248     #[test]
2249     fn test_vmm_vm_create() {
2250         let mut vmm = create_dummy_vmm();
2251         let config = create_dummy_vm_config();
2252 
2253         assert!(matches!(vmm.vm_create(config.clone()), Ok(())));
2254         assert!(matches!(
2255             vmm.vm_create(config),
2256             Err(VmError::VmAlreadyCreated)
2257         ));
2258     }
2259 
2260     #[test]
2261     fn test_vmm_vm_cold_add_device() {
2262         let mut vmm = create_dummy_vmm();
2263         let device_config = DeviceConfig::parse("path=/path/to/device").unwrap();
2264 
2265         assert!(matches!(
2266             vmm.vm_add_device(device_config.clone()),
2267             Err(VmError::VmNotCreated)
2268         ));
2269 
2270         let _ = vmm.vm_create(create_dummy_vm_config());
2271         assert!(vmm
2272             .vm_config
2273             .as_ref()
2274             .unwrap()
2275             .lock()
2276             .unwrap()
2277             .devices
2278             .is_none());
2279 
2280         let result = vmm.vm_add_device(device_config.clone());
2281         assert!(result.is_ok());
2282         assert!(result.unwrap().is_none());
2283         assert_eq!(
2284             vmm.vm_config
2285                 .as_ref()
2286                 .unwrap()
2287                 .lock()
2288                 .unwrap()
2289                 .devices
2290                 .clone()
2291                 .unwrap()
2292                 .len(),
2293             1
2294         );
2295         assert_eq!(
2296             vmm.vm_config
2297                 .as_ref()
2298                 .unwrap()
2299                 .lock()
2300                 .unwrap()
2301                 .devices
2302                 .clone()
2303                 .unwrap()[0],
2304             device_config
2305         );
2306     }
2307 
2308     #[test]
2309     fn test_vmm_vm_cold_add_user_device() {
2310         let mut vmm = create_dummy_vmm();
2311         let user_device_config =
2312             UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap();
2313 
2314         assert!(matches!(
2315             vmm.vm_add_user_device(user_device_config.clone()),
2316             Err(VmError::VmNotCreated)
2317         ));
2318 
2319         let _ = vmm.vm_create(create_dummy_vm_config());
2320         assert!(vmm
2321             .vm_config
2322             .as_ref()
2323             .unwrap()
2324             .lock()
2325             .unwrap()
2326             .user_devices
2327             .is_none());
2328 
2329         let result = vmm.vm_add_user_device(user_device_config.clone());
2330         assert!(result.is_ok());
2331         assert!(result.unwrap().is_none());
2332         assert_eq!(
2333             vmm.vm_config
2334                 .as_ref()
2335                 .unwrap()
2336                 .lock()
2337                 .unwrap()
2338                 .user_devices
2339                 .clone()
2340                 .unwrap()
2341                 .len(),
2342             1
2343         );
2344         assert_eq!(
2345             vmm.vm_config
2346                 .as_ref()
2347                 .unwrap()
2348                 .lock()
2349                 .unwrap()
2350                 .user_devices
2351                 .clone()
2352                 .unwrap()[0],
2353             user_device_config
2354         );
2355     }
2356 
2357     #[test]
2358     fn test_vmm_vm_cold_add_disk() {
2359         let mut vmm = create_dummy_vmm();
2360         let disk_config = DiskConfig::parse("path=/path/to_file").unwrap();
2361 
2362         assert!(matches!(
2363             vmm.vm_add_disk(disk_config.clone()),
2364             Err(VmError::VmNotCreated)
2365         ));
2366 
2367         let _ = vmm.vm_create(create_dummy_vm_config());
2368         assert!(vmm
2369             .vm_config
2370             .as_ref()
2371             .unwrap()
2372             .lock()
2373             .unwrap()
2374             .disks
2375             .is_none());
2376 
2377         let result = vmm.vm_add_disk(disk_config.clone());
2378         assert!(result.is_ok());
2379         assert!(result.unwrap().is_none());
2380         assert_eq!(
2381             vmm.vm_config
2382                 .as_ref()
2383                 .unwrap()
2384                 .lock()
2385                 .unwrap()
2386                 .disks
2387                 .clone()
2388                 .unwrap()
2389                 .len(),
2390             1
2391         );
2392         assert_eq!(
2393             vmm.vm_config
2394                 .as_ref()
2395                 .unwrap()
2396                 .lock()
2397                 .unwrap()
2398                 .disks
2399                 .clone()
2400                 .unwrap()[0],
2401             disk_config
2402         );
2403     }
2404 
2405     #[test]
2406     fn test_vmm_vm_cold_add_fs() {
2407         let mut vmm = create_dummy_vmm();
2408         let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap();
2409 
2410         assert!(matches!(
2411             vmm.vm_add_fs(fs_config.clone()),
2412             Err(VmError::VmNotCreated)
2413         ));
2414 
2415         let _ = vmm.vm_create(create_dummy_vm_config());
2416         assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none());
2417 
2418         let result = vmm.vm_add_fs(fs_config.clone());
2419         assert!(result.is_ok());
2420         assert!(result.unwrap().is_none());
2421         assert_eq!(
2422             vmm.vm_config
2423                 .as_ref()
2424                 .unwrap()
2425                 .lock()
2426                 .unwrap()
2427                 .fs
2428                 .clone()
2429                 .unwrap()
2430                 .len(),
2431             1
2432         );
2433         assert_eq!(
2434             vmm.vm_config
2435                 .as_ref()
2436                 .unwrap()
2437                 .lock()
2438                 .unwrap()
2439                 .fs
2440                 .clone()
2441                 .unwrap()[0],
2442             fs_config
2443         );
2444     }
2445 
2446     #[test]
2447     fn test_vmm_vm_cold_add_pmem() {
2448         let mut vmm = create_dummy_vmm();
2449         let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap();
2450 
2451         assert!(matches!(
2452             vmm.vm_add_pmem(pmem_config.clone()),
2453             Err(VmError::VmNotCreated)
2454         ));
2455 
2456         let _ = vmm.vm_create(create_dummy_vm_config());
2457         assert!(vmm
2458             .vm_config
2459             .as_ref()
2460             .unwrap()
2461             .lock()
2462             .unwrap()
2463             .pmem
2464             .is_none());
2465 
2466         let result = vmm.vm_add_pmem(pmem_config.clone());
2467         assert!(result.is_ok());
2468         assert!(result.unwrap().is_none());
2469         assert_eq!(
2470             vmm.vm_config
2471                 .as_ref()
2472                 .unwrap()
2473                 .lock()
2474                 .unwrap()
2475                 .pmem
2476                 .clone()
2477                 .unwrap()
2478                 .len(),
2479             1
2480         );
2481         assert_eq!(
2482             vmm.vm_config
2483                 .as_ref()
2484                 .unwrap()
2485                 .lock()
2486                 .unwrap()
2487                 .pmem
2488                 .clone()
2489                 .unwrap()[0],
2490             pmem_config
2491         );
2492     }
2493 
2494     #[test]
2495     fn test_vmm_vm_cold_add_net() {
2496         let mut vmm = create_dummy_vmm();
2497         let net_config = NetConfig::parse(
2498             "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock",
2499         )
2500         .unwrap();
2501 
2502         assert!(matches!(
2503             vmm.vm_add_net(net_config.clone()),
2504             Err(VmError::VmNotCreated)
2505         ));
2506 
2507         let _ = vmm.vm_create(create_dummy_vm_config());
2508         assert!(vmm
2509             .vm_config
2510             .as_ref()
2511             .unwrap()
2512             .lock()
2513             .unwrap()
2514             .net
2515             .is_none());
2516 
2517         let result = vmm.vm_add_net(net_config.clone());
2518         assert!(result.is_ok());
2519         assert!(result.unwrap().is_none());
2520         assert_eq!(
2521             vmm.vm_config
2522                 .as_ref()
2523                 .unwrap()
2524                 .lock()
2525                 .unwrap()
2526                 .net
2527                 .clone()
2528                 .unwrap()
2529                 .len(),
2530             1
2531         );
2532         assert_eq!(
2533             vmm.vm_config
2534                 .as_ref()
2535                 .unwrap()
2536                 .lock()
2537                 .unwrap()
2538                 .net
2539                 .clone()
2540                 .unwrap()[0],
2541             net_config
2542         );
2543     }
2544 
2545     #[test]
2546     fn test_vmm_vm_cold_add_vdpa() {
2547         let mut vmm = create_dummy_vmm();
2548         let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap();
2549 
2550         assert!(matches!(
2551             vmm.vm_add_vdpa(vdpa_config.clone()),
2552             Err(VmError::VmNotCreated)
2553         ));
2554 
2555         let _ = vmm.vm_create(create_dummy_vm_config());
2556         assert!(vmm
2557             .vm_config
2558             .as_ref()
2559             .unwrap()
2560             .lock()
2561             .unwrap()
2562             .vdpa
2563             .is_none());
2564 
2565         let result = vmm.vm_add_vdpa(vdpa_config.clone());
2566         assert!(result.is_ok());
2567         assert!(result.unwrap().is_none());
2568         assert_eq!(
2569             vmm.vm_config
2570                 .as_ref()
2571                 .unwrap()
2572                 .lock()
2573                 .unwrap()
2574                 .vdpa
2575                 .clone()
2576                 .unwrap()
2577                 .len(),
2578             1
2579         );
2580         assert_eq!(
2581             vmm.vm_config
2582                 .as_ref()
2583                 .unwrap()
2584                 .lock()
2585                 .unwrap()
2586                 .vdpa
2587                 .clone()
2588                 .unwrap()[0],
2589             vdpa_config
2590         );
2591     }
2592 
2593     #[test]
2594     fn test_vmm_vm_cold_add_vsock() {
2595         let mut vmm = create_dummy_vmm();
2596         let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap();
2597 
2598         assert!(matches!(
2599             vmm.vm_add_vsock(vsock_config.clone()),
2600             Err(VmError::VmNotCreated)
2601         ));
2602 
2603         let _ = vmm.vm_create(create_dummy_vm_config());
2604         assert!(vmm
2605             .vm_config
2606             .as_ref()
2607             .unwrap()
2608             .lock()
2609             .unwrap()
2610             .vsock
2611             .is_none());
2612 
2613         let result = vmm.vm_add_vsock(vsock_config.clone());
2614         assert!(result.is_ok());
2615         assert!(result.unwrap().is_none());
2616         assert_eq!(
2617             vmm.vm_config
2618                 .as_ref()
2619                 .unwrap()
2620                 .lock()
2621                 .unwrap()
2622                 .vsock
2623                 .clone()
2624                 .unwrap(),
2625             vsock_config
2626         );
2627     }
2628 }
2629