1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use crate::api::{ 12 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 13 VmSendMigrationData, VmmPingResponse, 14 }; 15 use crate::config::{ 16 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 17 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 18 }; 19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 20 use crate::coredump::GuestDebuggable; 21 use crate::landlock::Landlock; 22 use crate::memory_manager::MemoryManager; 23 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 24 use crate::migration::get_vm_snapshot; 25 use crate::migration::{recv_vm_config, recv_vm_state}; 26 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 27 use crate::vm::{Error as VmError, Vm, VmState}; 28 use anyhow::anyhow; 29 #[cfg(feature = "dbus_api")] 30 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 31 use api::http::HttpApiHandle; 32 use console_devices::{pre_create_console_devices, ConsoleInfo}; 33 use landlock::LandlockError; 34 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 35 use memory_manager::MemoryManagerSnapshotData; 36 use pci::PciBdf; 37 use seccompiler::{apply_filter, SeccompAction}; 38 use serde::ser::{SerializeStruct, Serializer}; 39 use serde::{Deserialize, Serialize}; 40 use signal_hook::iterator::{Handle, Signals}; 41 use std::collections::HashMap; 42 use std::fs::File; 43 use std::io; 44 use std::io::{stdout, Read, Write}; 45 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 46 use std::os::unix::net::UnixListener; 47 use std::os::unix::net::UnixStream; 48 use std::panic::AssertUnwindSafe; 49 use std::path::PathBuf; 50 use std::rc::Rc; 51 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 52 use std::sync::{Arc, Mutex}; 53 use std::time::Instant; 54 use std::{result, thread}; 55 use thiserror::Error; 56 use tracer::trace_scoped; 57 use vm_memory::bitmap::AtomicBitmap; 58 use vm_memory::{ReadVolatile, WriteVolatile}; 59 use vm_migration::{protocol::*, Migratable}; 60 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 61 use vmm_sys_util::eventfd::EventFd; 62 use vmm_sys_util::signal::unblock_signal; 63 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 64 65 mod acpi; 66 pub mod api; 67 mod clone3; 68 pub mod config; 69 pub mod console_devices; 70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 71 mod coredump; 72 pub mod cpu; 73 pub mod device_manager; 74 pub mod device_tree; 75 #[cfg(feature = "guest_debug")] 76 mod gdb; 77 #[cfg(feature = "igvm")] 78 mod igvm; 79 pub mod interrupt; 80 pub mod landlock; 81 pub mod memory_manager; 82 pub mod migration; 83 mod pci_segment; 84 pub mod seccomp_filters; 85 mod serial_manager; 86 mod sigwinch_listener; 87 pub mod vm; 88 pub mod vm_config; 89 90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 92 93 /// Errors associated with VMM management 94 #[derive(Debug, Error)] 95 pub enum Error { 96 /// API request receive error 97 #[error("Error receiving API request: {0}")] 98 ApiRequestRecv(#[source] RecvError), 99 100 /// API response send error 101 #[error("Error sending API request: {0}")] 102 ApiResponseSend(#[source] SendError<ApiResponse>), 103 104 /// Cannot bind to the UNIX domain socket path 105 #[error("Error binding to UNIX domain socket: {0}")] 106 Bind(#[source] io::Error), 107 108 /// Cannot clone EventFd. 109 #[error("Error cloning EventFd: {0}")] 110 EventFdClone(#[source] io::Error), 111 112 /// Cannot create EventFd. 113 #[error("Error creating EventFd: {0}")] 114 EventFdCreate(#[source] io::Error), 115 116 /// Cannot read from EventFd. 117 #[error("Error reading from EventFd: {0}")] 118 EventFdRead(#[source] io::Error), 119 120 /// Cannot create epoll context. 121 #[error("Error creating epoll context: {0}")] 122 Epoll(#[source] io::Error), 123 124 /// Cannot create HTTP thread 125 #[error("Error spawning HTTP thread: {0}")] 126 HttpThreadSpawn(#[source] io::Error), 127 128 /// Cannot create D-Bus thread 129 #[cfg(feature = "dbus_api")] 130 #[error("Error spawning D-Bus thread: {0}")] 131 DBusThreadSpawn(#[source] io::Error), 132 133 /// Cannot start D-Bus session 134 #[cfg(feature = "dbus_api")] 135 #[error("Error starting D-Bus session: {0}")] 136 CreateDBusSession(#[source] zbus::Error), 137 138 /// Cannot create `event-monitor` thread 139 #[error("Error spawning `event-monitor` thread: {0}")] 140 EventMonitorThreadSpawn(#[source] io::Error), 141 142 /// Cannot handle the VM STDIN stream 143 #[error("Error handling VM stdin: {0:?}")] 144 Stdin(VmError), 145 146 /// Cannot handle the VM pty stream 147 #[error("Error handling VM pty: {0:?}")] 148 Pty(VmError), 149 150 /// Cannot reboot the VM 151 #[error("Error rebooting VM: {0:?}")] 152 VmReboot(VmError), 153 154 /// Cannot create VMM thread 155 #[error("Error spawning VMM thread {0:?}")] 156 VmmThreadSpawn(#[source] io::Error), 157 158 /// Cannot shut the VMM down 159 #[error("Error shutting down VMM: {0:?}")] 160 VmmShutdown(VmError), 161 162 /// Cannot create seccomp filter 163 #[error("Error creating seccomp filter: {0}")] 164 CreateSeccompFilter(seccompiler::Error), 165 166 /// Cannot apply seccomp filter 167 #[error("Error applying seccomp filter: {0}")] 168 ApplySeccompFilter(seccompiler::Error), 169 170 /// Error activating virtio devices 171 #[error("Error activating virtio devices: {0:?}")] 172 ActivateVirtioDevices(VmError), 173 174 /// Error creating API server 175 #[error("Error creating API server {0:?}")] 176 CreateApiServer(micro_http::ServerError), 177 178 /// Error binding API server socket 179 #[error("Error creation API server's socket {0:?}")] 180 CreateApiServerSocket(#[source] io::Error), 181 182 #[cfg(feature = "guest_debug")] 183 #[error("Failed to start the GDB thread: {0}")] 184 GdbThreadSpawn(io::Error), 185 186 /// GDB request receive error 187 #[cfg(feature = "guest_debug")] 188 #[error("Error receiving GDB request: {0}")] 189 GdbRequestRecv(#[source] RecvError), 190 191 /// GDB response send error 192 #[cfg(feature = "guest_debug")] 193 #[error("Error sending GDB request: {0}")] 194 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 195 196 #[error("Cannot spawn a signal handler thread: {0}")] 197 SignalHandlerSpawn(#[source] io::Error), 198 199 #[error("Failed to join on threads: {0:?}")] 200 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 201 202 /// Cannot create Landlock object 203 #[error("Error creating landlock object: {0}")] 204 CreateLandlock(LandlockError), 205 206 /// Cannot apply landlock based sandboxing 207 #[error("Error applying landlock: {0}")] 208 ApplyLandlock(LandlockError), 209 } 210 pub type Result<T> = result::Result<T, Error>; 211 212 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 213 #[repr(u64)] 214 pub enum EpollDispatch { 215 Exit = 0, 216 Reset = 1, 217 Api = 2, 218 ActivateVirtioDevices = 3, 219 Debug = 4, 220 Unknown, 221 } 222 223 impl From<u64> for EpollDispatch { 224 fn from(v: u64) -> Self { 225 use EpollDispatch::*; 226 match v { 227 0 => Exit, 228 1 => Reset, 229 2 => Api, 230 3 => ActivateVirtioDevices, 231 4 => Debug, 232 _ => Unknown, 233 } 234 } 235 } 236 237 pub struct EpollContext { 238 epoll_file: File, 239 } 240 241 impl EpollContext { 242 pub fn new() -> result::Result<EpollContext, io::Error> { 243 let epoll_fd = epoll::create(true)?; 244 // Use 'File' to enforce closing on 'epoll_fd' 245 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 246 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 247 248 Ok(EpollContext { epoll_file }) 249 } 250 251 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 252 where 253 T: AsRawFd, 254 { 255 let dispatch_index = token as u64; 256 epoll::ctl( 257 self.epoll_file.as_raw_fd(), 258 epoll::ControlOptions::EPOLL_CTL_ADD, 259 fd.as_raw_fd(), 260 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 261 )?; 262 263 Ok(()) 264 } 265 266 #[cfg(fuzzing)] 267 pub fn add_event_custom<T>( 268 &mut self, 269 fd: &T, 270 id: u64, 271 evts: epoll::Events, 272 ) -> result::Result<(), io::Error> 273 where 274 T: AsRawFd, 275 { 276 epoll::ctl( 277 self.epoll_file.as_raw_fd(), 278 epoll::ControlOptions::EPOLL_CTL_ADD, 279 fd.as_raw_fd(), 280 epoll::Event::new(evts, id), 281 )?; 282 283 Ok(()) 284 } 285 } 286 287 impl AsRawFd for EpollContext { 288 fn as_raw_fd(&self) -> RawFd { 289 self.epoll_file.as_raw_fd() 290 } 291 } 292 293 pub struct PciDeviceInfo { 294 pub id: String, 295 pub bdf: PciBdf, 296 } 297 298 impl Serialize for PciDeviceInfo { 299 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 300 where 301 S: Serializer, 302 { 303 let bdf_str = self.bdf.to_string(); 304 305 // Serialize the structure. 306 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 307 state.serialize_field("id", &self.id)?; 308 state.serialize_field("bdf", &bdf_str)?; 309 state.end() 310 } 311 } 312 313 pub fn feature_list() -> Vec<String> { 314 vec![ 315 #[cfg(feature = "dbus_api")] 316 "dbus_api".to_string(), 317 #[cfg(feature = "dhat-heap")] 318 "dhat-heap".to_string(), 319 #[cfg(feature = "guest_debug")] 320 "guest_debug".to_string(), 321 #[cfg(feature = "igvm")] 322 "igvm".to_string(), 323 #[cfg(feature = "io_uring")] 324 "io_uring".to_string(), 325 #[cfg(feature = "kvm")] 326 "kvm".to_string(), 327 #[cfg(feature = "mshv")] 328 "mshv".to_string(), 329 #[cfg(feature = "sev_snp")] 330 "sev_snp".to_string(), 331 #[cfg(feature = "tdx")] 332 "tdx".to_string(), 333 #[cfg(feature = "tracing")] 334 "tracing".to_string(), 335 ] 336 } 337 338 pub fn start_event_monitor_thread( 339 mut monitor: event_monitor::Monitor, 340 seccomp_action: &SeccompAction, 341 landlock_enable: bool, 342 hypervisor_type: hypervisor::HypervisorType, 343 exit_event: EventFd, 344 ) -> Result<thread::JoinHandle<Result<()>>> { 345 // Retrieve seccomp filter 346 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 347 .map_err(Error::CreateSeccompFilter)?; 348 349 thread::Builder::new() 350 .name("event-monitor".to_owned()) 351 .spawn(move || { 352 // Apply seccomp filter 353 if !seccomp_filter.is_empty() { 354 apply_filter(&seccomp_filter) 355 .map_err(Error::ApplySeccompFilter) 356 .map_err(|e| { 357 error!("Error applying seccomp filter: {:?}", e); 358 exit_event.write(1).ok(); 359 e 360 })?; 361 } 362 if landlock_enable { 363 Landlock::new() 364 .map_err(Error::CreateLandlock)? 365 .restrict_self() 366 .map_err(Error::ApplyLandlock) 367 .map_err(|e| { 368 error!("Error applying landlock to event monitor thread: {:?}", e); 369 exit_event.write(1).ok(); 370 e 371 })?; 372 } 373 374 std::panic::catch_unwind(AssertUnwindSafe(move || { 375 while let Ok(event) = monitor.rx.recv() { 376 let event = Arc::new(event); 377 378 if let Some(ref mut file) = monitor.file { 379 file.write_all(event.as_bytes().as_ref()).ok(); 380 file.write_all(b"\n\n").ok(); 381 } 382 383 for tx in monitor.broadcast.iter() { 384 tx.send(event.clone()).ok(); 385 } 386 } 387 })) 388 .map_err(|_| { 389 error!("`event-monitor` thread panicked"); 390 exit_event.write(1).ok(); 391 }) 392 .ok(); 393 394 Ok(()) 395 }) 396 .map_err(Error::EventMonitorThreadSpawn) 397 } 398 399 #[allow(unused_variables)] 400 #[allow(clippy::too_many_arguments)] 401 pub fn start_vmm_thread( 402 vmm_version: VmmVersionInfo, 403 http_path: &Option<String>, 404 http_fd: Option<RawFd>, 405 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 406 api_event: EventFd, 407 api_sender: Sender<ApiRequest>, 408 api_receiver: Receiver<ApiRequest>, 409 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 410 #[cfg(feature = "guest_debug")] debug_event: EventFd, 411 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 412 exit_event: EventFd, 413 seccomp_action: &SeccompAction, 414 hypervisor: Arc<dyn hypervisor::Hypervisor>, 415 landlock_enable: bool, 416 ) -> Result<VmmThreadHandle> { 417 #[cfg(feature = "guest_debug")] 418 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 419 #[cfg(feature = "guest_debug")] 420 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 421 #[cfg(feature = "guest_debug")] 422 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 423 #[cfg(feature = "guest_debug")] 424 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 425 426 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 427 let hypervisor_type = hypervisor.hypervisor_type(); 428 429 // Retrieve seccomp filter 430 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 431 .map_err(Error::CreateSeccompFilter)?; 432 433 let vmm_seccomp_action = seccomp_action.clone(); 434 let thread = { 435 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 436 thread::Builder::new() 437 .name("vmm".to_string()) 438 .spawn(move || { 439 // Apply seccomp filter for VMM thread. 440 if !vmm_seccomp_filter.is_empty() { 441 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 442 } 443 444 let mut vmm = Vmm::new( 445 vmm_version, 446 api_event, 447 #[cfg(feature = "guest_debug")] 448 debug_event, 449 #[cfg(feature = "guest_debug")] 450 vm_debug_event, 451 vmm_seccomp_action, 452 hypervisor, 453 exit_event, 454 )?; 455 456 vmm.setup_signal_handler(landlock_enable)?; 457 458 vmm.control_loop( 459 Rc::new(api_receiver), 460 #[cfg(feature = "guest_debug")] 461 Rc::new(gdb_receiver), 462 ) 463 }) 464 .map_err(Error::VmmThreadSpawn)? 465 }; 466 467 // The VMM thread is started, we can start the dbus thread 468 // and start serving HTTP requests 469 #[cfg(feature = "dbus_api")] 470 let dbus_shutdown_chs = match dbus_options { 471 Some(opts) => { 472 let (_, chs) = api::start_dbus_thread( 473 opts, 474 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 475 api_sender.clone(), 476 seccomp_action, 477 exit_event.try_clone().map_err(Error::EventFdClone)?, 478 hypervisor_type, 479 )?; 480 Some(chs) 481 } 482 None => None, 483 }; 484 485 let http_api_handle = if let Some(http_path) = http_path { 486 Some(api::start_http_path_thread( 487 http_path, 488 api_event_clone, 489 api_sender, 490 seccomp_action, 491 exit_event, 492 hypervisor_type, 493 landlock_enable, 494 )?) 495 } else if let Some(http_fd) = http_fd { 496 Some(api::start_http_fd_thread( 497 http_fd, 498 api_event_clone, 499 api_sender, 500 seccomp_action, 501 exit_event, 502 hypervisor_type, 503 landlock_enable, 504 )?) 505 } else { 506 None 507 }; 508 509 #[cfg(feature = "guest_debug")] 510 if let Some(debug_path) = debug_path { 511 let target = gdb::GdbStub::new( 512 gdb_sender, 513 gdb_debug_event, 514 gdb_vm_debug_event, 515 gdb_hw_breakpoints, 516 ); 517 thread::Builder::new() 518 .name("gdb".to_owned()) 519 .spawn(move || gdb::gdb_thread(target, &debug_path)) 520 .map_err(Error::GdbThreadSpawn)?; 521 } 522 523 Ok(VmmThreadHandle { 524 thread_handle: thread, 525 #[cfg(feature = "dbus_api")] 526 dbus_shutdown_chs, 527 http_api_handle, 528 }) 529 } 530 531 #[derive(Clone, Deserialize, Serialize)] 532 struct VmMigrationConfig { 533 vm_config: Arc<Mutex<VmConfig>>, 534 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 535 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 536 memory_manager_data: MemoryManagerSnapshotData, 537 } 538 539 #[derive(Debug, Clone)] 540 pub struct VmmVersionInfo { 541 pub build_version: String, 542 pub version: String, 543 } 544 545 impl VmmVersionInfo { 546 pub fn new(build_version: &str, version: &str) -> Self { 547 Self { 548 build_version: build_version.to_owned(), 549 version: version.to_owned(), 550 } 551 } 552 } 553 554 pub struct VmmThreadHandle { 555 pub thread_handle: thread::JoinHandle<Result<()>>, 556 #[cfg(feature = "dbus_api")] 557 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 558 pub http_api_handle: Option<HttpApiHandle>, 559 } 560 561 pub struct Vmm { 562 epoll: EpollContext, 563 exit_evt: EventFd, 564 reset_evt: EventFd, 565 api_evt: EventFd, 566 #[cfg(feature = "guest_debug")] 567 debug_evt: EventFd, 568 #[cfg(feature = "guest_debug")] 569 vm_debug_evt: EventFd, 570 version: VmmVersionInfo, 571 vm: Option<Vm>, 572 vm_config: Option<Arc<Mutex<VmConfig>>>, 573 seccomp_action: SeccompAction, 574 hypervisor: Arc<dyn hypervisor::Hypervisor>, 575 activate_evt: EventFd, 576 signals: Option<Handle>, 577 threads: Vec<thread::JoinHandle<()>>, 578 original_termios_opt: Arc<Mutex<Option<termios>>>, 579 console_resize_pipe: Option<File>, 580 console_info: Option<ConsoleInfo>, 581 } 582 583 impl Vmm { 584 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 585 586 fn signal_handler( 587 mut signals: Signals, 588 original_termios_opt: Arc<Mutex<Option<termios>>>, 589 exit_evt: &EventFd, 590 ) { 591 for sig in &Self::HANDLED_SIGNALS { 592 unblock_signal(*sig).unwrap(); 593 } 594 595 for signal in signals.forever() { 596 match signal { 597 SIGTERM | SIGINT => { 598 if exit_evt.write(1).is_err() { 599 // Resetting the terminal is usually done as the VMM exits 600 if let Ok(lock) = original_termios_opt.lock() { 601 if let Some(termios) = *lock { 602 // SAFETY: FFI call 603 let _ = unsafe { 604 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 605 }; 606 } 607 } else { 608 warn!("Failed to lock original termios"); 609 } 610 611 std::process::exit(1); 612 } 613 } 614 _ => (), 615 } 616 } 617 } 618 619 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 620 let signals = Signals::new(Self::HANDLED_SIGNALS); 621 match signals { 622 Ok(signals) => { 623 self.signals = Some(signals.handle()); 624 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 625 let original_termios_opt = Arc::clone(&self.original_termios_opt); 626 627 let signal_handler_seccomp_filter = get_seccomp_filter( 628 &self.seccomp_action, 629 Thread::SignalHandler, 630 self.hypervisor.hypervisor_type(), 631 ) 632 .map_err(Error::CreateSeccompFilter)?; 633 self.threads.push( 634 thread::Builder::new() 635 .name("vmm_signal_handler".to_string()) 636 .spawn(move || { 637 if !signal_handler_seccomp_filter.is_empty() { 638 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 639 .map_err(Error::ApplySeccompFilter) 640 { 641 error!("Error applying seccomp filter: {:?}", e); 642 exit_evt.write(1).ok(); 643 return; 644 } 645 } 646 if landlock_enable{ 647 match Landlock::new() { 648 Ok(landlock) => { 649 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 650 error!("Error applying Landlock to signal handler thread: {:?}", e); 651 exit_evt.write(1).ok(); 652 }); 653 } 654 Err(e) => { 655 error!("Error creating Landlock object: {:?}", e); 656 exit_evt.write(1).ok(); 657 } 658 }; 659 } 660 661 std::panic::catch_unwind(AssertUnwindSafe(|| { 662 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 663 })) 664 .map_err(|_| { 665 error!("vmm signal_handler thread panicked"); 666 exit_evt.write(1).ok() 667 }) 668 .ok(); 669 }) 670 .map_err(Error::SignalHandlerSpawn)?, 671 ); 672 } 673 Err(e) => error!("Signal not found {}", e), 674 } 675 Ok(()) 676 } 677 678 #[allow(clippy::too_many_arguments)] 679 fn new( 680 vmm_version: VmmVersionInfo, 681 api_evt: EventFd, 682 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 683 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 684 seccomp_action: SeccompAction, 685 hypervisor: Arc<dyn hypervisor::Hypervisor>, 686 exit_evt: EventFd, 687 ) -> Result<Self> { 688 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 689 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 690 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 691 692 epoll 693 .add_event(&exit_evt, EpollDispatch::Exit) 694 .map_err(Error::Epoll)?; 695 696 epoll 697 .add_event(&reset_evt, EpollDispatch::Reset) 698 .map_err(Error::Epoll)?; 699 700 epoll 701 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 702 .map_err(Error::Epoll)?; 703 704 epoll 705 .add_event(&api_evt, EpollDispatch::Api) 706 .map_err(Error::Epoll)?; 707 708 #[cfg(feature = "guest_debug")] 709 epoll 710 .add_event(&debug_evt, EpollDispatch::Debug) 711 .map_err(Error::Epoll)?; 712 713 Ok(Vmm { 714 epoll, 715 exit_evt, 716 reset_evt, 717 api_evt, 718 #[cfg(feature = "guest_debug")] 719 debug_evt, 720 #[cfg(feature = "guest_debug")] 721 vm_debug_evt, 722 version: vmm_version, 723 vm: None, 724 vm_config: None, 725 seccomp_action, 726 hypervisor, 727 activate_evt, 728 signals: None, 729 threads: vec![], 730 original_termios_opt: Arc::new(Mutex::new(None)), 731 console_resize_pipe: None, 732 console_info: None, 733 }) 734 } 735 736 fn vm_receive_config<T>( 737 &mut self, 738 req: &Request, 739 socket: &mut T, 740 existing_memory_files: Option<HashMap<u32, File>>, 741 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 742 where 743 T: Read + Write, 744 { 745 // Read in config data along with memory manager data 746 let mut data: Vec<u8> = Vec::new(); 747 data.resize_with(req.length() as usize, Default::default); 748 socket 749 .read_exact(&mut data) 750 .map_err(MigratableError::MigrateSocket)?; 751 752 let vm_migration_config: VmMigrationConfig = 753 serde_json::from_slice(&data).map_err(|e| { 754 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 755 })?; 756 757 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 758 self.vm_check_cpuid_compatibility( 759 &vm_migration_config.vm_config, 760 &vm_migration_config.common_cpuid, 761 )?; 762 763 let config = vm_migration_config.vm_config.clone(); 764 self.vm_config = Some(vm_migration_config.vm_config); 765 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 766 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 767 })?); 768 769 if self 770 .vm_config 771 .as_ref() 772 .unwrap() 773 .lock() 774 .unwrap() 775 .landlock_enable 776 { 777 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 778 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 779 })?; 780 } 781 782 let vm = Vm::create_hypervisor_vm( 783 &self.hypervisor, 784 #[cfg(feature = "tdx")] 785 false, 786 #[cfg(feature = "sev_snp")] 787 false, 788 ) 789 .map_err(|e| { 790 MigratableError::MigrateReceive(anyhow!( 791 "Error creating hypervisor VM from snapshot: {:?}", 792 e 793 )) 794 })?; 795 796 let phys_bits = 797 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 798 799 let memory_manager = MemoryManager::new( 800 vm, 801 &config.lock().unwrap().memory.clone(), 802 None, 803 phys_bits, 804 #[cfg(feature = "tdx")] 805 false, 806 Some(&vm_migration_config.memory_manager_data), 807 existing_memory_files, 808 #[cfg(target_arch = "x86_64")] 809 None, 810 ) 811 .map_err(|e| { 812 MigratableError::MigrateReceive(anyhow!( 813 "Error creating MemoryManager from snapshot: {:?}", 814 e 815 )) 816 })?; 817 818 Response::ok().write_to(socket)?; 819 820 Ok(memory_manager) 821 } 822 823 fn vm_receive_state<T>( 824 &mut self, 825 req: &Request, 826 socket: &mut T, 827 mm: Arc<Mutex<MemoryManager>>, 828 ) -> std::result::Result<(), MigratableError> 829 where 830 T: Read + Write, 831 { 832 // Read in state data 833 let mut data: Vec<u8> = Vec::new(); 834 data.resize_with(req.length() as usize, Default::default); 835 socket 836 .read_exact(&mut data) 837 .map_err(MigratableError::MigrateSocket)?; 838 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 839 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 840 })?; 841 842 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 843 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 844 })?; 845 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 846 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 847 })?; 848 #[cfg(feature = "guest_debug")] 849 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 850 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 851 })?; 852 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 853 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 854 })?; 855 856 let timestamp = Instant::now(); 857 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 858 let mut vm = Vm::new_from_memory_manager( 859 self.vm_config.clone().unwrap(), 860 mm, 861 hypervisor_vm, 862 exit_evt, 863 reset_evt, 864 #[cfg(feature = "guest_debug")] 865 debug_evt, 866 &self.seccomp_action, 867 self.hypervisor.clone(), 868 activate_evt, 869 timestamp, 870 self.console_info.clone(), 871 None, 872 Arc::clone(&self.original_termios_opt), 873 Some(snapshot), 874 ) 875 .map_err(|e| { 876 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 877 })?; 878 879 // Create VM 880 vm.restore().map_err(|e| { 881 Response::error().write_to(socket).ok(); 882 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 883 })?; 884 self.vm = Some(vm); 885 886 Response::ok().write_to(socket)?; 887 888 Ok(()) 889 } 890 891 fn vm_receive_memory<T>( 892 &mut self, 893 req: &Request, 894 socket: &mut T, 895 memory_manager: &mut MemoryManager, 896 ) -> std::result::Result<(), MigratableError> 897 where 898 T: Read + ReadVolatile + Write, 899 { 900 // Read table 901 let table = MemoryRangeTable::read_from(socket, req.length())?; 902 903 // And then read the memory itself 904 memory_manager 905 .receive_memory_regions(&table, socket) 906 .inspect_err(|_| { 907 Response::error().write_to(socket).ok(); 908 })?; 909 Response::ok().write_to(socket)?; 910 Ok(()) 911 } 912 913 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 914 url.strip_prefix("unix:") 915 .ok_or_else(|| { 916 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 917 }) 918 .map(|s| s.into()) 919 } 920 921 // Returns true if there were dirty pages to send 922 fn vm_maybe_send_dirty_pages<T>( 923 vm: &mut Vm, 924 socket: &mut T, 925 ) -> result::Result<bool, MigratableError> 926 where 927 T: Read + Write + WriteVolatile, 928 { 929 // Send (dirty) memory table 930 let table = vm.dirty_log()?; 931 932 // But if there are no regions go straight to pause 933 if table.regions().is_empty() { 934 return Ok(false); 935 } 936 937 Request::memory(table.length()).write_to(socket).unwrap(); 938 table.write_to(socket)?; 939 // And then the memory itself 940 vm.send_memory_regions(&table, socket)?; 941 Response::read_from(socket)?.ok_or_abandon( 942 socket, 943 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 944 )?; 945 946 Ok(true) 947 } 948 949 fn send_migration( 950 vm: &mut Vm, 951 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 952 dyn hypervisor::Hypervisor, 953 >, 954 send_data_migration: VmSendMigrationData, 955 ) -> result::Result<(), MigratableError> { 956 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 957 let mut socket = UnixStream::connect(path).map_err(|e| { 958 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 959 })?; 960 961 // Start the migration 962 Request::start().write_to(&mut socket)?; 963 Response::read_from(&mut socket)?.ok_or_abandon( 964 &mut socket, 965 MigratableError::MigrateSend(anyhow!("Error starting migration")), 966 )?; 967 968 // Send config 969 let vm_config = vm.get_config(); 970 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 971 let common_cpuid = { 972 #[cfg(feature = "tdx")] 973 if vm_config.lock().unwrap().is_tdx_enabled() { 974 return Err(MigratableError::MigrateSend(anyhow!( 975 "Live Migration is not supported when TDX is enabled" 976 ))); 977 }; 978 979 let amx = vm_config.lock().unwrap().cpus.features.amx; 980 let phys_bits = 981 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 982 arch::generate_common_cpuid( 983 &hypervisor, 984 &arch::CpuidConfig { 985 sgx_epc_sections: None, 986 phys_bits, 987 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 988 #[cfg(feature = "tdx")] 989 tdx: false, 990 amx, 991 }, 992 ) 993 .map_err(|e| { 994 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 995 })? 996 }; 997 998 if send_data_migration.local { 999 vm.send_memory_fds(&mut socket)?; 1000 } 1001 1002 let vm_migration_config = VmMigrationConfig { 1003 vm_config, 1004 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1005 common_cpuid, 1006 memory_manager_data: vm.memory_manager_data(), 1007 }; 1008 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1009 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1010 socket 1011 .write_all(&config_data) 1012 .map_err(MigratableError::MigrateSocket)?; 1013 Response::read_from(&mut socket)?.ok_or_abandon( 1014 &mut socket, 1015 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1016 )?; 1017 1018 // Let every Migratable object know about the migration being started. 1019 vm.start_migration()?; 1020 1021 if send_data_migration.local { 1022 // Now pause VM 1023 vm.pause()?; 1024 } else { 1025 // Start logging dirty pages 1026 vm.start_dirty_log()?; 1027 1028 // Send memory table 1029 let table = vm.memory_range_table()?; 1030 Request::memory(table.length()) 1031 .write_to(&mut socket) 1032 .unwrap(); 1033 table.write_to(&mut socket)?; 1034 // And then the memory itself 1035 vm.send_memory_regions(&table, &mut socket)?; 1036 Response::read_from(&mut socket)?.ok_or_abandon( 1037 &mut socket, 1038 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1039 )?; 1040 1041 // Try at most 5 passes of dirty memory sending 1042 const MAX_DIRTY_MIGRATIONS: usize = 5; 1043 for i in 0..MAX_DIRTY_MIGRATIONS { 1044 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1045 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1046 break; 1047 } 1048 } 1049 1050 // Now pause VM 1051 vm.pause()?; 1052 1053 // Send last batch of dirty pages 1054 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1055 1056 // Stop logging dirty pages 1057 vm.stop_dirty_log()?; 1058 } 1059 // Capture snapshot and send it 1060 let vm_snapshot = vm.snapshot()?; 1061 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1062 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1063 socket 1064 .write_all(&snapshot_data) 1065 .map_err(MigratableError::MigrateSocket)?; 1066 Response::read_from(&mut socket)?.ok_or_abandon( 1067 &mut socket, 1068 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1069 )?; 1070 // Complete the migration 1071 Request::complete().write_to(&mut socket)?; 1072 Response::read_from(&mut socket)?.ok_or_abandon( 1073 &mut socket, 1074 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1075 )?; 1076 1077 info!("Migration complete"); 1078 1079 // Let every Migratable object know about the migration being complete 1080 vm.complete_migration() 1081 } 1082 1083 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1084 fn vm_check_cpuid_compatibility( 1085 &self, 1086 src_vm_config: &Arc<Mutex<VmConfig>>, 1087 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1088 ) -> result::Result<(), MigratableError> { 1089 #[cfg(feature = "tdx")] 1090 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1091 return Err(MigratableError::MigrateReceive(anyhow!( 1092 "Live Migration is not supported when TDX is enabled" 1093 ))); 1094 }; 1095 1096 // We check the `CPUID` compatibility of between the source vm and destination, which is 1097 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1098 let dest_cpuid = &{ 1099 let vm_config = &src_vm_config.lock().unwrap(); 1100 1101 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1102 arch::generate_common_cpuid( 1103 &self.hypervisor.clone(), 1104 &arch::CpuidConfig { 1105 sgx_epc_sections: None, 1106 phys_bits, 1107 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1108 #[cfg(feature = "tdx")] 1109 tdx: false, 1110 amx: vm_config.cpus.features.amx, 1111 }, 1112 ) 1113 .map_err(|e| { 1114 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1115 })? 1116 }; 1117 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1118 MigratableError::MigrateReceive(anyhow!( 1119 "Error checking cpu feature compatibility': {:?}", 1120 e 1121 )) 1122 }) 1123 } 1124 1125 fn control_loop( 1126 &mut self, 1127 api_receiver: Rc<Receiver<ApiRequest>>, 1128 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1129 ) -> Result<()> { 1130 const EPOLL_EVENTS_LEN: usize = 100; 1131 1132 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1133 let epoll_fd = self.epoll.as_raw_fd(); 1134 1135 'outer: loop { 1136 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1137 Ok(res) => res, 1138 Err(e) => { 1139 if e.kind() == io::ErrorKind::Interrupted { 1140 // It's well defined from the epoll_wait() syscall 1141 // documentation that the epoll loop can be interrupted 1142 // before any of the requested events occurred or the 1143 // timeout expired. In both those cases, epoll_wait() 1144 // returns an error of type EINTR, but this should not 1145 // be considered as a regular error. Instead it is more 1146 // appropriate to retry, by calling into epoll_wait(). 1147 continue; 1148 } 1149 return Err(Error::Epoll(e)); 1150 } 1151 }; 1152 1153 for event in events.iter().take(num_events) { 1154 let dispatch_event: EpollDispatch = event.data.into(); 1155 match dispatch_event { 1156 EpollDispatch::Unknown => { 1157 let event = event.data; 1158 warn!("Unknown VMM loop event: {}", event); 1159 } 1160 EpollDispatch::Exit => { 1161 info!("VM exit event"); 1162 // Consume the event. 1163 self.exit_evt.read().map_err(Error::EventFdRead)?; 1164 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1165 1166 break 'outer; 1167 } 1168 EpollDispatch::Reset => { 1169 info!("VM reset event"); 1170 // Consume the event. 1171 self.reset_evt.read().map_err(Error::EventFdRead)?; 1172 self.vm_reboot().map_err(Error::VmReboot)?; 1173 } 1174 EpollDispatch::ActivateVirtioDevices => { 1175 if let Some(ref vm) = self.vm { 1176 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1177 info!( 1178 "Trying to activate pending virtio devices: count = {}", 1179 count 1180 ); 1181 vm.activate_virtio_devices() 1182 .map_err(Error::ActivateVirtioDevices)?; 1183 } 1184 } 1185 EpollDispatch::Api => { 1186 // Consume the events. 1187 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1188 // Read from the API receiver channel 1189 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1190 1191 if api_request(self)? { 1192 break 'outer; 1193 } 1194 } 1195 } 1196 #[cfg(feature = "guest_debug")] 1197 EpollDispatch::Debug => { 1198 // Consume the events. 1199 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1200 // Read from the API receiver channel 1201 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1202 1203 let response = if let Some(ref mut vm) = self.vm { 1204 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1205 } else { 1206 Err(VmError::VmNotRunning) 1207 } 1208 .map_err(gdb::Error::Vm); 1209 1210 gdb_request 1211 .sender 1212 .send(response) 1213 .map_err(Error::GdbResponseSend)?; 1214 } 1215 } 1216 #[cfg(not(feature = "guest_debug"))] 1217 EpollDispatch::Debug => {} 1218 } 1219 } 1220 } 1221 1222 // Trigger the termination of the signal_handler thread 1223 if let Some(signals) = self.signals.take() { 1224 signals.close(); 1225 } 1226 1227 // Wait for all the threads to finish 1228 for thread in self.threads.drain(..) { 1229 thread.join().map_err(Error::ThreadCleanup)? 1230 } 1231 1232 Ok(()) 1233 } 1234 } 1235 1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1237 vm_config.lock().unwrap().apply_landlock()?; 1238 Ok(()) 1239 } 1240 1241 impl RequestHandler for Vmm { 1242 fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> { 1243 // We only store the passed VM config. 1244 // The VM will be created when being asked to boot it. 1245 if self.vm_config.is_none() { 1246 self.vm_config = Some(config); 1247 self.console_info = 1248 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1249 1250 if self 1251 .vm_config 1252 .as_ref() 1253 .unwrap() 1254 .lock() 1255 .unwrap() 1256 .landlock_enable 1257 { 1258 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1259 .map_err(VmError::ApplyLandlock)?; 1260 } 1261 Ok(()) 1262 } else { 1263 Err(VmError::VmAlreadyCreated) 1264 } 1265 } 1266 1267 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1268 tracer::start(); 1269 info!("Booting VM"); 1270 event!("vm", "booting"); 1271 let r = { 1272 trace_scoped!("vm_boot"); 1273 // If we don't have a config, we cannot boot a VM. 1274 if self.vm_config.is_none() { 1275 return Err(VmError::VmMissingConfig); 1276 }; 1277 1278 // console_info is set to None in vm_shutdown. re-populate here if empty 1279 if self.console_info.is_none() { 1280 self.console_info = 1281 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1282 } 1283 1284 // Create a new VM if we don't have one yet. 1285 if self.vm.is_none() { 1286 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1287 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1288 #[cfg(feature = "guest_debug")] 1289 let vm_debug_evt = self 1290 .vm_debug_evt 1291 .try_clone() 1292 .map_err(VmError::EventFdClone)?; 1293 let activate_evt = self 1294 .activate_evt 1295 .try_clone() 1296 .map_err(VmError::EventFdClone)?; 1297 1298 if let Some(ref vm_config) = self.vm_config { 1299 let vm = Vm::new( 1300 Arc::clone(vm_config), 1301 exit_evt, 1302 reset_evt, 1303 #[cfg(feature = "guest_debug")] 1304 vm_debug_evt, 1305 &self.seccomp_action, 1306 self.hypervisor.clone(), 1307 activate_evt, 1308 self.console_info.clone(), 1309 None, 1310 Arc::clone(&self.original_termios_opt), 1311 None, 1312 None, 1313 None, 1314 )?; 1315 1316 self.vm = Some(vm); 1317 } 1318 } 1319 1320 // Now we can boot the VM. 1321 if let Some(ref mut vm) = self.vm { 1322 vm.boot() 1323 } else { 1324 Err(VmError::VmNotCreated) 1325 } 1326 }; 1327 tracer::end(); 1328 if r.is_ok() { 1329 event!("vm", "booted"); 1330 } 1331 r 1332 } 1333 1334 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1335 if let Some(ref mut vm) = self.vm { 1336 vm.pause().map_err(VmError::Pause) 1337 } else { 1338 Err(VmError::VmNotRunning) 1339 } 1340 } 1341 1342 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1343 if let Some(ref mut vm) = self.vm { 1344 vm.resume().map_err(VmError::Resume) 1345 } else { 1346 Err(VmError::VmNotRunning) 1347 } 1348 } 1349 1350 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1351 if let Some(ref mut vm) = self.vm { 1352 // Drain console_info so that FDs are not reused 1353 let _ = self.console_info.take(); 1354 vm.snapshot() 1355 .map_err(VmError::Snapshot) 1356 .and_then(|snapshot| { 1357 vm.send(&snapshot, destination_url) 1358 .map_err(VmError::SnapshotSend) 1359 }) 1360 } else { 1361 Err(VmError::VmNotRunning) 1362 } 1363 } 1364 1365 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1366 if self.vm.is_some() || self.vm_config.is_some() { 1367 return Err(VmError::VmAlreadyCreated); 1368 } 1369 1370 let source_url = restore_cfg.source_url.as_path().to_str(); 1371 if source_url.is_none() { 1372 return Err(VmError::InvalidRestoreSourceUrl); 1373 } 1374 // Safe to unwrap as we checked it was Some(&str). 1375 let source_url = source_url.unwrap(); 1376 1377 let vm_config = Arc::new(Mutex::new( 1378 recv_vm_config(source_url).map_err(VmError::Restore)?, 1379 )); 1380 restore_cfg 1381 .validate(&vm_config.lock().unwrap().clone()) 1382 .map_err(VmError::ConfigValidation)?; 1383 1384 // Update VM's net configurations with new fds received for restore operation 1385 if let (Some(restored_nets), Some(vm_net_configs)) = 1386 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1387 { 1388 for net in restored_nets.iter() { 1389 for net_config in vm_net_configs.iter_mut() { 1390 // update only if the net dev is backed by FDs 1391 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1392 net_config.fds.clone_from(&net.fds); 1393 } 1394 } 1395 } 1396 } 1397 1398 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1399 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1400 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1401 1402 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1403 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1404 .map_err(VmError::Restore)?; 1405 1406 self.vm_config = Some(Arc::clone(&vm_config)); 1407 1408 // console_info is set to None in vm_snapshot. re-populate here if empty 1409 if self.console_info.is_none() { 1410 self.console_info = 1411 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1412 } 1413 1414 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1415 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1416 #[cfg(feature = "guest_debug")] 1417 let debug_evt = self 1418 .vm_debug_evt 1419 .try_clone() 1420 .map_err(VmError::EventFdClone)?; 1421 let activate_evt = self 1422 .activate_evt 1423 .try_clone() 1424 .map_err(VmError::EventFdClone)?; 1425 1426 let vm = Vm::new( 1427 vm_config, 1428 exit_evt, 1429 reset_evt, 1430 #[cfg(feature = "guest_debug")] 1431 debug_evt, 1432 &self.seccomp_action, 1433 self.hypervisor.clone(), 1434 activate_evt, 1435 self.console_info.clone(), 1436 None, 1437 Arc::clone(&self.original_termios_opt), 1438 Some(snapshot), 1439 Some(source_url), 1440 Some(restore_cfg.prefault), 1441 )?; 1442 self.vm = Some(vm); 1443 1444 if self 1445 .vm_config 1446 .as_ref() 1447 .unwrap() 1448 .lock() 1449 .unwrap() 1450 .landlock_enable 1451 { 1452 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1453 .map_err(VmError::ApplyLandlock)?; 1454 } 1455 1456 // Now we can restore the rest of the VM. 1457 if let Some(ref mut vm) = self.vm { 1458 vm.restore() 1459 } else { 1460 Err(VmError::VmNotCreated) 1461 } 1462 } 1463 1464 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1465 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1466 if let Some(ref mut vm) = self.vm { 1467 vm.coredump(destination_url).map_err(VmError::Coredump) 1468 } else { 1469 Err(VmError::VmNotRunning) 1470 } 1471 } 1472 1473 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1474 let r = if let Some(ref mut vm) = self.vm.take() { 1475 // Drain console_info so that the FDs are not reused 1476 let _ = self.console_info.take(); 1477 vm.shutdown() 1478 } else { 1479 Err(VmError::VmNotRunning) 1480 }; 1481 1482 if r.is_ok() { 1483 event!("vm", "shutdown"); 1484 } 1485 1486 r 1487 } 1488 1489 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1490 event!("vm", "rebooting"); 1491 1492 // First we stop the current VM 1493 let (config, console_resize_pipe) = if let Some(mut vm) = self.vm.take() { 1494 let config = vm.get_config(); 1495 let console_resize_pipe = vm 1496 .console_resize_pipe() 1497 .as_ref() 1498 .map(|pipe| pipe.try_clone().unwrap()); 1499 vm.shutdown()?; 1500 (config, console_resize_pipe) 1501 } else { 1502 return Err(VmError::VmNotCreated); 1503 }; 1504 1505 // vm.shutdown() closes all the console devices, so set console_info to None 1506 // so that the closed FD #s are not reused. 1507 let _ = self.console_info.take(); 1508 1509 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1510 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1511 #[cfg(feature = "guest_debug")] 1512 let debug_evt = self 1513 .vm_debug_evt 1514 .try_clone() 1515 .map_err(VmError::EventFdClone)?; 1516 let activate_evt = self 1517 .activate_evt 1518 .try_clone() 1519 .map_err(VmError::EventFdClone)?; 1520 1521 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1522 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1523 // during the boot process. 1524 if self.reset_evt.read().is_ok() { 1525 warn!("Spurious second reset event received. Ignoring."); 1526 } 1527 1528 self.console_info = 1529 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1530 1531 // Then we create the new VM 1532 let mut vm = Vm::new( 1533 config, 1534 exit_evt, 1535 reset_evt, 1536 #[cfg(feature = "guest_debug")] 1537 debug_evt, 1538 &self.seccomp_action, 1539 self.hypervisor.clone(), 1540 activate_evt, 1541 self.console_info.clone(), 1542 console_resize_pipe, 1543 Arc::clone(&self.original_termios_opt), 1544 None, 1545 None, 1546 None, 1547 )?; 1548 1549 // And we boot it 1550 vm.boot()?; 1551 1552 self.vm = Some(vm); 1553 1554 event!("vm", "rebooted"); 1555 1556 Ok(()) 1557 } 1558 1559 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1560 match &self.vm_config { 1561 Some(config) => { 1562 let state = match &self.vm { 1563 Some(vm) => vm.get_state()?, 1564 None => VmState::Created, 1565 }; 1566 1567 let config = Arc::clone(config); 1568 1569 let mut memory_actual_size = config.lock().unwrap().memory.total_size(); 1570 if let Some(vm) = &self.vm { 1571 memory_actual_size -= vm.balloon_size(); 1572 } 1573 1574 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree()); 1575 1576 Ok(VmInfoResponse { 1577 config, 1578 state, 1579 memory_actual_size, 1580 device_tree, 1581 }) 1582 } 1583 None => Err(VmError::VmNotCreated), 1584 } 1585 } 1586 1587 fn vmm_ping(&self) -> VmmPingResponse { 1588 let VmmVersionInfo { 1589 build_version, 1590 version, 1591 } = self.version.clone(); 1592 1593 VmmPingResponse { 1594 build_version, 1595 version, 1596 pid: std::process::id() as i64, 1597 features: feature_list(), 1598 } 1599 } 1600 1601 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1602 if self.vm_config.is_none() { 1603 return Ok(()); 1604 } 1605 1606 // If a VM is booted, we first try to shut it down. 1607 if self.vm.is_some() { 1608 self.vm_shutdown()?; 1609 } 1610 1611 self.vm_config = None; 1612 1613 event!("vm", "deleted"); 1614 1615 Ok(()) 1616 } 1617 1618 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1619 self.vm_delete()?; 1620 event!("vmm", "shutdown"); 1621 Ok(()) 1622 } 1623 1624 fn vm_resize( 1625 &mut self, 1626 desired_vcpus: Option<u8>, 1627 desired_ram: Option<u64>, 1628 desired_balloon: Option<u64>, 1629 ) -> result::Result<(), VmError> { 1630 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1631 1632 if let Some(ref mut vm) = self.vm { 1633 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1634 error!("Error when resizing VM: {:?}", e); 1635 Err(e) 1636 } else { 1637 Ok(()) 1638 } 1639 } else { 1640 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1641 if let Some(desired_vcpus) = desired_vcpus { 1642 config.cpus.boot_vcpus = desired_vcpus; 1643 } 1644 if let Some(desired_ram) = desired_ram { 1645 config.memory.size = desired_ram; 1646 } 1647 if let Some(desired_balloon) = desired_balloon { 1648 if let Some(balloon_config) = &mut config.balloon { 1649 balloon_config.size = desired_balloon; 1650 } 1651 } 1652 Ok(()) 1653 } 1654 } 1655 1656 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1657 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1658 1659 if let Some(ref mut vm) = self.vm { 1660 if let Err(e) = vm.resize_zone(id, desired_ram) { 1661 error!("Error when resizing VM: {:?}", e); 1662 Err(e) 1663 } else { 1664 Ok(()) 1665 } 1666 } else { 1667 // Update VmConfig by setting the new desired ram. 1668 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1669 1670 if let Some(zones) = &mut memory_config.zones { 1671 for zone in zones.iter_mut() { 1672 if zone.id == id { 1673 zone.size = desired_ram; 1674 return Ok(()); 1675 } 1676 } 1677 } 1678 1679 error!("Could not find the memory zone {} for the resize", id); 1680 Err(VmError::ResizeZone) 1681 } 1682 } 1683 1684 fn vm_add_device( 1685 &mut self, 1686 device_cfg: DeviceConfig, 1687 ) -> result::Result<Option<Vec<u8>>, VmError> { 1688 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1689 1690 { 1691 // Validate the configuration change in a cloned configuration 1692 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1693 add_to_config(&mut config.devices, device_cfg.clone()); 1694 config.validate().map_err(VmError::ConfigValidation)?; 1695 } 1696 1697 if let Some(ref mut vm) = self.vm { 1698 let info = vm.add_device(device_cfg).map_err(|e| { 1699 error!("Error when adding new device to the VM: {:?}", e); 1700 e 1701 })?; 1702 serde_json::to_vec(&info) 1703 .map(Some) 1704 .map_err(VmError::SerializeJson) 1705 } else { 1706 // Update VmConfig by adding the new device. 1707 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1708 add_to_config(&mut config.devices, device_cfg); 1709 Ok(None) 1710 } 1711 } 1712 1713 fn vm_add_user_device( 1714 &mut self, 1715 device_cfg: UserDeviceConfig, 1716 ) -> result::Result<Option<Vec<u8>>, VmError> { 1717 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1718 1719 { 1720 // Validate the configuration change in a cloned configuration 1721 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1722 add_to_config(&mut config.user_devices, device_cfg.clone()); 1723 config.validate().map_err(VmError::ConfigValidation)?; 1724 } 1725 1726 if let Some(ref mut vm) = self.vm { 1727 let info = vm.add_user_device(device_cfg).map_err(|e| { 1728 error!("Error when adding new user device to the VM: {:?}", e); 1729 e 1730 })?; 1731 serde_json::to_vec(&info) 1732 .map(Some) 1733 .map_err(VmError::SerializeJson) 1734 } else { 1735 // Update VmConfig by adding the new device. 1736 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1737 add_to_config(&mut config.user_devices, device_cfg); 1738 Ok(None) 1739 } 1740 } 1741 1742 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1743 if let Some(ref mut vm) = self.vm { 1744 if let Err(e) = vm.remove_device(id) { 1745 error!("Error when removing device from the VM: {:?}", e); 1746 Err(e) 1747 } else { 1748 Ok(()) 1749 } 1750 } else if let Some(ref config) = self.vm_config { 1751 let mut config = config.lock().unwrap(); 1752 if config.remove_device(&id) { 1753 Ok(()) 1754 } else { 1755 Err(VmError::NoDeviceToRemove(id)) 1756 } 1757 } else { 1758 Err(VmError::VmNotCreated) 1759 } 1760 } 1761 1762 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1763 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1764 1765 { 1766 // Validate the configuration change in a cloned configuration 1767 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1768 add_to_config(&mut config.disks, disk_cfg.clone()); 1769 config.validate().map_err(VmError::ConfigValidation)?; 1770 } 1771 1772 if let Some(ref mut vm) = self.vm { 1773 let info = vm.add_disk(disk_cfg).map_err(|e| { 1774 error!("Error when adding new disk to the VM: {:?}", e); 1775 e 1776 })?; 1777 serde_json::to_vec(&info) 1778 .map(Some) 1779 .map_err(VmError::SerializeJson) 1780 } else { 1781 // Update VmConfig by adding the new device. 1782 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1783 add_to_config(&mut config.disks, disk_cfg); 1784 Ok(None) 1785 } 1786 } 1787 1788 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1789 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1790 1791 { 1792 // Validate the configuration change in a cloned configuration 1793 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1794 add_to_config(&mut config.fs, fs_cfg.clone()); 1795 config.validate().map_err(VmError::ConfigValidation)?; 1796 } 1797 1798 if let Some(ref mut vm) = self.vm { 1799 let info = vm.add_fs(fs_cfg).map_err(|e| { 1800 error!("Error when adding new fs to the VM: {:?}", e); 1801 e 1802 })?; 1803 serde_json::to_vec(&info) 1804 .map(Some) 1805 .map_err(VmError::SerializeJson) 1806 } else { 1807 // Update VmConfig by adding the new device. 1808 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1809 add_to_config(&mut config.fs, fs_cfg); 1810 Ok(None) 1811 } 1812 } 1813 1814 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1815 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1816 1817 { 1818 // Validate the configuration change in a cloned configuration 1819 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1820 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1821 config.validate().map_err(VmError::ConfigValidation)?; 1822 } 1823 1824 if let Some(ref mut vm) = self.vm { 1825 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1826 error!("Error when adding new pmem device to the VM: {:?}", e); 1827 e 1828 })?; 1829 serde_json::to_vec(&info) 1830 .map(Some) 1831 .map_err(VmError::SerializeJson) 1832 } else { 1833 // Update VmConfig by adding the new device. 1834 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1835 add_to_config(&mut config.pmem, pmem_cfg); 1836 Ok(None) 1837 } 1838 } 1839 1840 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1841 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1842 1843 { 1844 // Validate the configuration change in a cloned configuration 1845 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1846 add_to_config(&mut config.net, net_cfg.clone()); 1847 config.validate().map_err(VmError::ConfigValidation)?; 1848 } 1849 1850 if let Some(ref mut vm) = self.vm { 1851 let info = vm.add_net(net_cfg).map_err(|e| { 1852 error!("Error when adding new network device to the VM: {:?}", e); 1853 e 1854 })?; 1855 serde_json::to_vec(&info) 1856 .map(Some) 1857 .map_err(VmError::SerializeJson) 1858 } else { 1859 // Update VmConfig by adding the new device. 1860 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1861 add_to_config(&mut config.net, net_cfg); 1862 Ok(None) 1863 } 1864 } 1865 1866 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1867 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1868 1869 { 1870 // Validate the configuration change in a cloned configuration 1871 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1872 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1873 config.validate().map_err(VmError::ConfigValidation)?; 1874 } 1875 1876 if let Some(ref mut vm) = self.vm { 1877 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1878 error!("Error when adding new vDPA device to the VM: {:?}", e); 1879 e 1880 })?; 1881 serde_json::to_vec(&info) 1882 .map(Some) 1883 .map_err(VmError::SerializeJson) 1884 } else { 1885 // Update VmConfig by adding the new device. 1886 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1887 add_to_config(&mut config.vdpa, vdpa_cfg); 1888 Ok(None) 1889 } 1890 } 1891 1892 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1893 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1894 1895 { 1896 // Validate the configuration change in a cloned configuration 1897 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1898 1899 if config.vsock.is_some() { 1900 return Err(VmError::TooManyVsockDevices); 1901 } 1902 1903 config.vsock = Some(vsock_cfg.clone()); 1904 config.validate().map_err(VmError::ConfigValidation)?; 1905 } 1906 1907 if let Some(ref mut vm) = self.vm { 1908 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1909 error!("Error when adding new vsock device to the VM: {:?}", e); 1910 e 1911 })?; 1912 serde_json::to_vec(&info) 1913 .map(Some) 1914 .map_err(VmError::SerializeJson) 1915 } else { 1916 // Update VmConfig by adding the new device. 1917 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1918 config.vsock = Some(vsock_cfg); 1919 Ok(None) 1920 } 1921 } 1922 1923 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1924 if let Some(ref mut vm) = self.vm { 1925 let info = vm.counters().map_err(|e| { 1926 error!("Error when getting counters from the VM: {:?}", e); 1927 e 1928 })?; 1929 serde_json::to_vec(&info) 1930 .map(Some) 1931 .map_err(VmError::SerializeJson) 1932 } else { 1933 Err(VmError::VmNotRunning) 1934 } 1935 } 1936 1937 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1938 if let Some(ref mut vm) = self.vm { 1939 vm.power_button() 1940 } else { 1941 Err(VmError::VmNotRunning) 1942 } 1943 } 1944 1945 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1946 if let Some(ref mut vm) = self.vm { 1947 vm.nmi() 1948 } else { 1949 Err(VmError::VmNotRunning) 1950 } 1951 } 1952 1953 fn vm_receive_migration( 1954 &mut self, 1955 receive_data_migration: VmReceiveMigrationData, 1956 ) -> result::Result<(), MigratableError> { 1957 info!( 1958 "Receiving migration: receiver_url = {}", 1959 receive_data_migration.receiver_url 1960 ); 1961 1962 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1963 let listener = UnixListener::bind(&path).map_err(|e| { 1964 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1965 })?; 1966 let (mut socket, _addr) = listener.accept().map_err(|e| { 1967 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1968 })?; 1969 std::fs::remove_file(&path).map_err(|e| { 1970 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1971 })?; 1972 1973 let mut started = false; 1974 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1975 let mut existing_memory_files = None; 1976 loop { 1977 let req = Request::read_from(&mut socket)?; 1978 match req.command() { 1979 Command::Invalid => info!("Invalid Command Received"), 1980 Command::Start => { 1981 info!("Start Command Received"); 1982 started = true; 1983 1984 Response::ok().write_to(&mut socket)?; 1985 } 1986 Command::Config => { 1987 info!("Config Command Received"); 1988 1989 if !started { 1990 warn!("Migration not started yet"); 1991 Response::error().write_to(&mut socket)?; 1992 continue; 1993 } 1994 memory_manager = Some(self.vm_receive_config( 1995 &req, 1996 &mut socket, 1997 existing_memory_files.take(), 1998 )?); 1999 } 2000 Command::State => { 2001 info!("State Command Received"); 2002 2003 if !started { 2004 warn!("Migration not started yet"); 2005 Response::error().write_to(&mut socket)?; 2006 continue; 2007 } 2008 if let Some(mm) = memory_manager.take() { 2009 self.vm_receive_state(&req, &mut socket, mm)?; 2010 } else { 2011 warn!("Configuration not sent yet"); 2012 Response::error().write_to(&mut socket)?; 2013 } 2014 } 2015 Command::Memory => { 2016 info!("Memory Command Received"); 2017 2018 if !started { 2019 warn!("Migration not started yet"); 2020 Response::error().write_to(&mut socket)?; 2021 continue; 2022 } 2023 if let Some(mm) = memory_manager.as_ref() { 2024 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2025 } else { 2026 warn!("Configuration not sent yet"); 2027 Response::error().write_to(&mut socket)?; 2028 } 2029 } 2030 Command::MemoryFd => { 2031 info!("MemoryFd Command Received"); 2032 2033 if !started { 2034 warn!("Migration not started yet"); 2035 Response::error().write_to(&mut socket)?; 2036 continue; 2037 } 2038 2039 let mut buf = [0u8; 4]; 2040 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2041 MigratableError::MigrateReceive(anyhow!( 2042 "Error receiving slot from socket: {}", 2043 e 2044 )) 2045 })?; 2046 2047 if existing_memory_files.is_none() { 2048 existing_memory_files = Some(HashMap::default()) 2049 } 2050 2051 if let Some(ref mut existing_memory_files) = existing_memory_files { 2052 let slot = u32::from_le_bytes(buf); 2053 existing_memory_files.insert(slot, file.unwrap()); 2054 } 2055 2056 Response::ok().write_to(&mut socket)?; 2057 } 2058 Command::Complete => { 2059 info!("Complete Command Received"); 2060 if let Some(ref mut vm) = self.vm.as_mut() { 2061 vm.resume()?; 2062 Response::ok().write_to(&mut socket)?; 2063 } else { 2064 warn!("VM not created yet"); 2065 Response::error().write_to(&mut socket)?; 2066 } 2067 break; 2068 } 2069 Command::Abandon => { 2070 info!("Abandon Command Received"); 2071 self.vm = None; 2072 self.vm_config = None; 2073 Response::ok().write_to(&mut socket).ok(); 2074 break; 2075 } 2076 } 2077 } 2078 2079 Ok(()) 2080 } 2081 2082 fn vm_send_migration( 2083 &mut self, 2084 send_data_migration: VmSendMigrationData, 2085 ) -> result::Result<(), MigratableError> { 2086 info!( 2087 "Sending migration: destination_url = {}, local = {}", 2088 send_data_migration.destination_url, send_data_migration.local 2089 ); 2090 2091 if !self 2092 .vm_config 2093 .as_ref() 2094 .unwrap() 2095 .lock() 2096 .unwrap() 2097 .backed_by_shared_memory() 2098 && send_data_migration.local 2099 { 2100 return Err(MigratableError::MigrateSend(anyhow!( 2101 "Local migration requires shared memory or hugepages enabled" 2102 ))); 2103 } 2104 2105 if let Some(vm) = self.vm.as_mut() { 2106 Self::send_migration( 2107 vm, 2108 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2109 self.hypervisor.clone(), 2110 send_data_migration, 2111 ) 2112 .map_err(|migration_err| { 2113 error!("Migration failed: {:?}", migration_err); 2114 2115 // Stop logging dirty pages 2116 if let Err(e) = vm.stop_dirty_log() { 2117 return e; 2118 } 2119 2120 if vm.get_state().unwrap() == VmState::Paused { 2121 if let Err(e) = vm.resume() { 2122 return e; 2123 } 2124 } 2125 2126 migration_err 2127 })?; 2128 2129 // Shutdown the VM after the migration succeeded 2130 self.exit_evt.write(1).map_err(|e| { 2131 MigratableError::MigrateSend(anyhow!( 2132 "Failed shutting down the VM after migration: {:?}", 2133 e 2134 )) 2135 }) 2136 } else { 2137 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2138 } 2139 } 2140 } 2141 2142 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2143 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2144 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2145 2146 #[cfg(test)] 2147 mod unit_tests { 2148 use super::*; 2149 #[cfg(target_arch = "x86_64")] 2150 use crate::config::DebugConsoleConfig; 2151 use config::{ 2152 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2153 RngConfig, 2154 }; 2155 2156 fn create_dummy_vmm() -> Vmm { 2157 Vmm::new( 2158 VmmVersionInfo::new("dummy", "dummy"), 2159 EventFd::new(EFD_NONBLOCK).unwrap(), 2160 #[cfg(feature = "guest_debug")] 2161 EventFd::new(EFD_NONBLOCK).unwrap(), 2162 #[cfg(feature = "guest_debug")] 2163 EventFd::new(EFD_NONBLOCK).unwrap(), 2164 SeccompAction::Allow, 2165 hypervisor::new().unwrap(), 2166 EventFd::new(EFD_NONBLOCK).unwrap(), 2167 ) 2168 .unwrap() 2169 } 2170 2171 fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> { 2172 Arc::new(Mutex::new(VmConfig { 2173 cpus: CpusConfig { 2174 boot_vcpus: 1, 2175 max_vcpus: 1, 2176 topology: None, 2177 kvm_hyperv: false, 2178 max_phys_bits: 46, 2179 affinity: None, 2180 features: config::CpuFeatures::default(), 2181 }, 2182 memory: MemoryConfig { 2183 size: 536_870_912, 2184 mergeable: false, 2185 hotplug_method: HotplugMethod::Acpi, 2186 hotplug_size: None, 2187 hotplugged_size: None, 2188 shared: true, 2189 hugepages: false, 2190 hugepage_size: None, 2191 prefault: false, 2192 zones: None, 2193 thp: true, 2194 }, 2195 payload: Some(PayloadConfig { 2196 kernel: Some(PathBuf::from("/path/to/kernel")), 2197 firmware: None, 2198 cmdline: None, 2199 initramfs: None, 2200 #[cfg(feature = "igvm")] 2201 igvm: None, 2202 #[cfg(feature = "sev_snp")] 2203 host_data: None, 2204 }), 2205 rate_limit_groups: None, 2206 disks: None, 2207 net: None, 2208 rng: RngConfig { 2209 src: PathBuf::from("/dev/urandom"), 2210 iommu: false, 2211 }, 2212 balloon: None, 2213 fs: None, 2214 pmem: None, 2215 serial: ConsoleConfig { 2216 file: None, 2217 mode: ConsoleOutputMode::Null, 2218 iommu: false, 2219 socket: None, 2220 }, 2221 console: ConsoleConfig { 2222 file: None, 2223 mode: ConsoleOutputMode::Tty, 2224 iommu: false, 2225 socket: None, 2226 }, 2227 #[cfg(target_arch = "x86_64")] 2228 debug_console: DebugConsoleConfig::default(), 2229 devices: None, 2230 user_devices: None, 2231 vdpa: None, 2232 vsock: None, 2233 #[cfg(feature = "pvmemcontrol")] 2234 pvmemcontrol: None, 2235 pvpanic: false, 2236 iommu: false, 2237 #[cfg(target_arch = "x86_64")] 2238 sgx_epc: None, 2239 numa: None, 2240 watchdog: false, 2241 #[cfg(feature = "guest_debug")] 2242 gdb: false, 2243 pci_segments: None, 2244 platform: None, 2245 tpm: None, 2246 preserved_fds: None, 2247 landlock_enable: false, 2248 landlock_rules: None, 2249 })) 2250 } 2251 2252 #[test] 2253 fn test_vmm_vm_create() { 2254 let mut vmm = create_dummy_vmm(); 2255 let config = create_dummy_vm_config(); 2256 2257 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2258 assert!(matches!( 2259 vmm.vm_create(config), 2260 Err(VmError::VmAlreadyCreated) 2261 )); 2262 } 2263 2264 #[test] 2265 fn test_vmm_vm_cold_add_device() { 2266 let mut vmm = create_dummy_vmm(); 2267 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2268 2269 assert!(matches!( 2270 vmm.vm_add_device(device_config.clone()), 2271 Err(VmError::VmNotCreated) 2272 )); 2273 2274 let _ = vmm.vm_create(create_dummy_vm_config()); 2275 assert!(vmm 2276 .vm_config 2277 .as_ref() 2278 .unwrap() 2279 .lock() 2280 .unwrap() 2281 .devices 2282 .is_none()); 2283 2284 let result = vmm.vm_add_device(device_config.clone()); 2285 assert!(result.is_ok()); 2286 assert!(result.unwrap().is_none()); 2287 assert_eq!( 2288 vmm.vm_config 2289 .as_ref() 2290 .unwrap() 2291 .lock() 2292 .unwrap() 2293 .devices 2294 .clone() 2295 .unwrap() 2296 .len(), 2297 1 2298 ); 2299 assert_eq!( 2300 vmm.vm_config 2301 .as_ref() 2302 .unwrap() 2303 .lock() 2304 .unwrap() 2305 .devices 2306 .clone() 2307 .unwrap()[0], 2308 device_config 2309 ); 2310 } 2311 2312 #[test] 2313 fn test_vmm_vm_cold_add_user_device() { 2314 let mut vmm = create_dummy_vmm(); 2315 let user_device_config = 2316 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2317 2318 assert!(matches!( 2319 vmm.vm_add_user_device(user_device_config.clone()), 2320 Err(VmError::VmNotCreated) 2321 )); 2322 2323 let _ = vmm.vm_create(create_dummy_vm_config()); 2324 assert!(vmm 2325 .vm_config 2326 .as_ref() 2327 .unwrap() 2328 .lock() 2329 .unwrap() 2330 .user_devices 2331 .is_none()); 2332 2333 let result = vmm.vm_add_user_device(user_device_config.clone()); 2334 assert!(result.is_ok()); 2335 assert!(result.unwrap().is_none()); 2336 assert_eq!( 2337 vmm.vm_config 2338 .as_ref() 2339 .unwrap() 2340 .lock() 2341 .unwrap() 2342 .user_devices 2343 .clone() 2344 .unwrap() 2345 .len(), 2346 1 2347 ); 2348 assert_eq!( 2349 vmm.vm_config 2350 .as_ref() 2351 .unwrap() 2352 .lock() 2353 .unwrap() 2354 .user_devices 2355 .clone() 2356 .unwrap()[0], 2357 user_device_config 2358 ); 2359 } 2360 2361 #[test] 2362 fn test_vmm_vm_cold_add_disk() { 2363 let mut vmm = create_dummy_vmm(); 2364 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2365 2366 assert!(matches!( 2367 vmm.vm_add_disk(disk_config.clone()), 2368 Err(VmError::VmNotCreated) 2369 )); 2370 2371 let _ = vmm.vm_create(create_dummy_vm_config()); 2372 assert!(vmm 2373 .vm_config 2374 .as_ref() 2375 .unwrap() 2376 .lock() 2377 .unwrap() 2378 .disks 2379 .is_none()); 2380 2381 let result = vmm.vm_add_disk(disk_config.clone()); 2382 assert!(result.is_ok()); 2383 assert!(result.unwrap().is_none()); 2384 assert_eq!( 2385 vmm.vm_config 2386 .as_ref() 2387 .unwrap() 2388 .lock() 2389 .unwrap() 2390 .disks 2391 .clone() 2392 .unwrap() 2393 .len(), 2394 1 2395 ); 2396 assert_eq!( 2397 vmm.vm_config 2398 .as_ref() 2399 .unwrap() 2400 .lock() 2401 .unwrap() 2402 .disks 2403 .clone() 2404 .unwrap()[0], 2405 disk_config 2406 ); 2407 } 2408 2409 #[test] 2410 fn test_vmm_vm_cold_add_fs() { 2411 let mut vmm = create_dummy_vmm(); 2412 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2413 2414 assert!(matches!( 2415 vmm.vm_add_fs(fs_config.clone()), 2416 Err(VmError::VmNotCreated) 2417 )); 2418 2419 let _ = vmm.vm_create(create_dummy_vm_config()); 2420 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2421 2422 let result = vmm.vm_add_fs(fs_config.clone()); 2423 assert!(result.is_ok()); 2424 assert!(result.unwrap().is_none()); 2425 assert_eq!( 2426 vmm.vm_config 2427 .as_ref() 2428 .unwrap() 2429 .lock() 2430 .unwrap() 2431 .fs 2432 .clone() 2433 .unwrap() 2434 .len(), 2435 1 2436 ); 2437 assert_eq!( 2438 vmm.vm_config 2439 .as_ref() 2440 .unwrap() 2441 .lock() 2442 .unwrap() 2443 .fs 2444 .clone() 2445 .unwrap()[0], 2446 fs_config 2447 ); 2448 } 2449 2450 #[test] 2451 fn test_vmm_vm_cold_add_pmem() { 2452 let mut vmm = create_dummy_vmm(); 2453 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2454 2455 assert!(matches!( 2456 vmm.vm_add_pmem(pmem_config.clone()), 2457 Err(VmError::VmNotCreated) 2458 )); 2459 2460 let _ = vmm.vm_create(create_dummy_vm_config()); 2461 assert!(vmm 2462 .vm_config 2463 .as_ref() 2464 .unwrap() 2465 .lock() 2466 .unwrap() 2467 .pmem 2468 .is_none()); 2469 2470 let result = vmm.vm_add_pmem(pmem_config.clone()); 2471 assert!(result.is_ok()); 2472 assert!(result.unwrap().is_none()); 2473 assert_eq!( 2474 vmm.vm_config 2475 .as_ref() 2476 .unwrap() 2477 .lock() 2478 .unwrap() 2479 .pmem 2480 .clone() 2481 .unwrap() 2482 .len(), 2483 1 2484 ); 2485 assert_eq!( 2486 vmm.vm_config 2487 .as_ref() 2488 .unwrap() 2489 .lock() 2490 .unwrap() 2491 .pmem 2492 .clone() 2493 .unwrap()[0], 2494 pmem_config 2495 ); 2496 } 2497 2498 #[test] 2499 fn test_vmm_vm_cold_add_net() { 2500 let mut vmm = create_dummy_vmm(); 2501 let net_config = NetConfig::parse( 2502 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2503 ) 2504 .unwrap(); 2505 2506 assert!(matches!( 2507 vmm.vm_add_net(net_config.clone()), 2508 Err(VmError::VmNotCreated) 2509 )); 2510 2511 let _ = vmm.vm_create(create_dummy_vm_config()); 2512 assert!(vmm 2513 .vm_config 2514 .as_ref() 2515 .unwrap() 2516 .lock() 2517 .unwrap() 2518 .net 2519 .is_none()); 2520 2521 let result = vmm.vm_add_net(net_config.clone()); 2522 assert!(result.is_ok()); 2523 assert!(result.unwrap().is_none()); 2524 assert_eq!( 2525 vmm.vm_config 2526 .as_ref() 2527 .unwrap() 2528 .lock() 2529 .unwrap() 2530 .net 2531 .clone() 2532 .unwrap() 2533 .len(), 2534 1 2535 ); 2536 assert_eq!( 2537 vmm.vm_config 2538 .as_ref() 2539 .unwrap() 2540 .lock() 2541 .unwrap() 2542 .net 2543 .clone() 2544 .unwrap()[0], 2545 net_config 2546 ); 2547 } 2548 2549 #[test] 2550 fn test_vmm_vm_cold_add_vdpa() { 2551 let mut vmm = create_dummy_vmm(); 2552 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2553 2554 assert!(matches!( 2555 vmm.vm_add_vdpa(vdpa_config.clone()), 2556 Err(VmError::VmNotCreated) 2557 )); 2558 2559 let _ = vmm.vm_create(create_dummy_vm_config()); 2560 assert!(vmm 2561 .vm_config 2562 .as_ref() 2563 .unwrap() 2564 .lock() 2565 .unwrap() 2566 .vdpa 2567 .is_none()); 2568 2569 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2570 assert!(result.is_ok()); 2571 assert!(result.unwrap().is_none()); 2572 assert_eq!( 2573 vmm.vm_config 2574 .as_ref() 2575 .unwrap() 2576 .lock() 2577 .unwrap() 2578 .vdpa 2579 .clone() 2580 .unwrap() 2581 .len(), 2582 1 2583 ); 2584 assert_eq!( 2585 vmm.vm_config 2586 .as_ref() 2587 .unwrap() 2588 .lock() 2589 .unwrap() 2590 .vdpa 2591 .clone() 2592 .unwrap()[0], 2593 vdpa_config 2594 ); 2595 } 2596 2597 #[test] 2598 fn test_vmm_vm_cold_add_vsock() { 2599 let mut vmm = create_dummy_vmm(); 2600 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2601 2602 assert!(matches!( 2603 vmm.vm_add_vsock(vsock_config.clone()), 2604 Err(VmError::VmNotCreated) 2605 )); 2606 2607 let _ = vmm.vm_create(create_dummy_vm_config()); 2608 assert!(vmm 2609 .vm_config 2610 .as_ref() 2611 .unwrap() 2612 .lock() 2613 .unwrap() 2614 .vsock 2615 .is_none()); 2616 2617 let result = vmm.vm_add_vsock(vsock_config.clone()); 2618 assert!(result.is_ok()); 2619 assert!(result.unwrap().is_none()); 2620 assert_eq!( 2621 vmm.vm_config 2622 .as_ref() 2623 .unwrap() 2624 .lock() 2625 .unwrap() 2626 .vsock 2627 .clone() 2628 .unwrap(), 2629 vsock_config 2630 ); 2631 } 2632 } 2633