1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use crate::api::{ 12 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 13 VmSendMigrationData, VmmPingResponse, 14 }; 15 use crate::config::{ 16 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 17 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 18 }; 19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 20 use crate::coredump::GuestDebuggable; 21 use crate::landlock::Landlock; 22 use crate::memory_manager::MemoryManager; 23 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 24 use crate::migration::get_vm_snapshot; 25 use crate::migration::{recv_vm_config, recv_vm_state}; 26 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 27 use crate::vm::{Error as VmError, Vm, VmState}; 28 use anyhow::anyhow; 29 #[cfg(feature = "dbus_api")] 30 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 31 use api::http::HttpApiHandle; 32 use console_devices::{pre_create_console_devices, ConsoleInfo}; 33 use landlock::LandlockError; 34 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 35 use memory_manager::MemoryManagerSnapshotData; 36 use pci::PciBdf; 37 use seccompiler::{apply_filter, SeccompAction}; 38 use serde::ser::{SerializeStruct, Serializer}; 39 use serde::{Deserialize, Serialize}; 40 use signal_hook::iterator::{Handle, Signals}; 41 use std::collections::HashMap; 42 use std::fs::File; 43 use std::io; 44 use std::io::{stdout, Read, Write}; 45 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 46 use std::os::unix::net::UnixListener; 47 use std::os::unix::net::UnixStream; 48 use std::panic::AssertUnwindSafe; 49 use std::path::PathBuf; 50 use std::rc::Rc; 51 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 52 use std::sync::{Arc, Mutex}; 53 use std::time::Instant; 54 use std::{result, thread}; 55 use thiserror::Error; 56 use tracer::trace_scoped; 57 use vm_memory::bitmap::AtomicBitmap; 58 use vm_memory::{ReadVolatile, WriteVolatile}; 59 use vm_migration::{protocol::*, Migratable}; 60 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 61 use vmm_sys_util::eventfd::EventFd; 62 use vmm_sys_util::signal::unblock_signal; 63 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 64 65 mod acpi; 66 pub mod api; 67 mod clone3; 68 pub mod config; 69 pub mod console_devices; 70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 71 mod coredump; 72 pub mod cpu; 73 pub mod device_manager; 74 pub mod device_tree; 75 #[cfg(feature = "guest_debug")] 76 mod gdb; 77 #[cfg(feature = "igvm")] 78 mod igvm; 79 pub mod interrupt; 80 pub mod landlock; 81 pub mod memory_manager; 82 pub mod migration; 83 mod pci_segment; 84 pub mod seccomp_filters; 85 mod serial_manager; 86 mod sigwinch_listener; 87 pub mod vm; 88 pub mod vm_config; 89 90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 92 93 /// Errors associated with VMM management 94 #[derive(Debug, Error)] 95 pub enum Error { 96 /// API request receive error 97 #[error("Error receiving API request: {0}")] 98 ApiRequestRecv(#[source] RecvError), 99 100 /// API response send error 101 #[error("Error sending API request: {0}")] 102 ApiResponseSend(#[source] SendError<ApiResponse>), 103 104 /// Cannot bind to the UNIX domain socket path 105 #[error("Error binding to UNIX domain socket: {0}")] 106 Bind(#[source] io::Error), 107 108 /// Cannot clone EventFd. 109 #[error("Error cloning EventFd: {0}")] 110 EventFdClone(#[source] io::Error), 111 112 /// Cannot create EventFd. 113 #[error("Error creating EventFd: {0}")] 114 EventFdCreate(#[source] io::Error), 115 116 /// Cannot read from EventFd. 117 #[error("Error reading from EventFd: {0}")] 118 EventFdRead(#[source] io::Error), 119 120 /// Cannot create epoll context. 121 #[error("Error creating epoll context: {0}")] 122 Epoll(#[source] io::Error), 123 124 /// Cannot create HTTP thread 125 #[error("Error spawning HTTP thread: {0}")] 126 HttpThreadSpawn(#[source] io::Error), 127 128 /// Cannot create D-Bus thread 129 #[cfg(feature = "dbus_api")] 130 #[error("Error spawning D-Bus thread: {0}")] 131 DBusThreadSpawn(#[source] io::Error), 132 133 /// Cannot start D-Bus session 134 #[cfg(feature = "dbus_api")] 135 #[error("Error starting D-Bus session: {0}")] 136 CreateDBusSession(#[source] zbus::Error), 137 138 /// Cannot create `event-monitor` thread 139 #[error("Error spawning `event-monitor` thread: {0}")] 140 EventMonitorThreadSpawn(#[source] io::Error), 141 142 /// Cannot handle the VM STDIN stream 143 #[error("Error handling VM stdin: {0:?}")] 144 Stdin(VmError), 145 146 /// Cannot handle the VM pty stream 147 #[error("Error handling VM pty: {0:?}")] 148 Pty(VmError), 149 150 /// Cannot reboot the VM 151 #[error("Error rebooting VM: {0:?}")] 152 VmReboot(VmError), 153 154 /// Cannot create VMM thread 155 #[error("Error spawning VMM thread {0:?}")] 156 VmmThreadSpawn(#[source] io::Error), 157 158 /// Cannot shut the VMM down 159 #[error("Error shutting down VMM: {0:?}")] 160 VmmShutdown(VmError), 161 162 /// Cannot create seccomp filter 163 #[error("Error creating seccomp filter: {0}")] 164 CreateSeccompFilter(seccompiler::Error), 165 166 /// Cannot apply seccomp filter 167 #[error("Error applying seccomp filter: {0}")] 168 ApplySeccompFilter(seccompiler::Error), 169 170 /// Error activating virtio devices 171 #[error("Error activating virtio devices: {0:?}")] 172 ActivateVirtioDevices(VmError), 173 174 /// Error creating API server 175 #[error("Error creating API server {0:?}")] 176 CreateApiServer(micro_http::ServerError), 177 178 /// Error binding API server socket 179 #[error("Error creation API server's socket {0:?}")] 180 CreateApiServerSocket(#[source] io::Error), 181 182 #[cfg(feature = "guest_debug")] 183 #[error("Failed to start the GDB thread: {0}")] 184 GdbThreadSpawn(io::Error), 185 186 /// GDB request receive error 187 #[cfg(feature = "guest_debug")] 188 #[error("Error receiving GDB request: {0}")] 189 GdbRequestRecv(#[source] RecvError), 190 191 /// GDB response send error 192 #[cfg(feature = "guest_debug")] 193 #[error("Error sending GDB request: {0}")] 194 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 195 196 #[error("Cannot spawn a signal handler thread: {0}")] 197 SignalHandlerSpawn(#[source] io::Error), 198 199 #[error("Failed to join on threads: {0:?}")] 200 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 201 202 /// Cannot create Landlock object 203 #[error("Error creating landlock object: {0}")] 204 CreateLandlock(LandlockError), 205 206 /// Cannot apply landlock based sandboxing 207 #[error("Error applying landlock: {0}")] 208 ApplyLandlock(LandlockError), 209 } 210 pub type Result<T> = result::Result<T, Error>; 211 212 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 213 #[repr(u64)] 214 pub enum EpollDispatch { 215 Exit = 0, 216 Reset = 1, 217 Api = 2, 218 ActivateVirtioDevices = 3, 219 Debug = 4, 220 Unknown, 221 } 222 223 impl From<u64> for EpollDispatch { 224 fn from(v: u64) -> Self { 225 use EpollDispatch::*; 226 match v { 227 0 => Exit, 228 1 => Reset, 229 2 => Api, 230 3 => ActivateVirtioDevices, 231 4 => Debug, 232 _ => Unknown, 233 } 234 } 235 } 236 237 pub struct EpollContext { 238 epoll_file: File, 239 } 240 241 impl EpollContext { 242 pub fn new() -> result::Result<EpollContext, io::Error> { 243 let epoll_fd = epoll::create(true)?; 244 // Use 'File' to enforce closing on 'epoll_fd' 245 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 246 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 247 248 Ok(EpollContext { epoll_file }) 249 } 250 251 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 252 where 253 T: AsRawFd, 254 { 255 let dispatch_index = token as u64; 256 epoll::ctl( 257 self.epoll_file.as_raw_fd(), 258 epoll::ControlOptions::EPOLL_CTL_ADD, 259 fd.as_raw_fd(), 260 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 261 )?; 262 263 Ok(()) 264 } 265 266 #[cfg(fuzzing)] 267 pub fn add_event_custom<T>( 268 &mut self, 269 fd: &T, 270 id: u64, 271 evts: epoll::Events, 272 ) -> result::Result<(), io::Error> 273 where 274 T: AsRawFd, 275 { 276 epoll::ctl( 277 self.epoll_file.as_raw_fd(), 278 epoll::ControlOptions::EPOLL_CTL_ADD, 279 fd.as_raw_fd(), 280 epoll::Event::new(evts, id), 281 )?; 282 283 Ok(()) 284 } 285 } 286 287 impl AsRawFd for EpollContext { 288 fn as_raw_fd(&self) -> RawFd { 289 self.epoll_file.as_raw_fd() 290 } 291 } 292 293 pub struct PciDeviceInfo { 294 pub id: String, 295 pub bdf: PciBdf, 296 } 297 298 impl Serialize for PciDeviceInfo { 299 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 300 where 301 S: Serializer, 302 { 303 let bdf_str = self.bdf.to_string(); 304 305 // Serialize the structure. 306 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 307 state.serialize_field("id", &self.id)?; 308 state.serialize_field("bdf", &bdf_str)?; 309 state.end() 310 } 311 } 312 313 pub fn feature_list() -> Vec<String> { 314 vec![ 315 #[cfg(feature = "dbus_api")] 316 "dbus_api".to_string(), 317 #[cfg(feature = "dhat-heap")] 318 "dhat-heap".to_string(), 319 #[cfg(feature = "guest_debug")] 320 "guest_debug".to_string(), 321 #[cfg(feature = "igvm")] 322 "igvm".to_string(), 323 #[cfg(feature = "io_uring")] 324 "io_uring".to_string(), 325 #[cfg(feature = "kvm")] 326 "kvm".to_string(), 327 #[cfg(feature = "mshv")] 328 "mshv".to_string(), 329 #[cfg(feature = "sev_snp")] 330 "sev_snp".to_string(), 331 #[cfg(feature = "tdx")] 332 "tdx".to_string(), 333 #[cfg(feature = "tracing")] 334 "tracing".to_string(), 335 ] 336 } 337 338 pub fn start_event_monitor_thread( 339 mut monitor: event_monitor::Monitor, 340 seccomp_action: &SeccompAction, 341 landlock_enable: bool, 342 hypervisor_type: hypervisor::HypervisorType, 343 exit_event: EventFd, 344 ) -> Result<thread::JoinHandle<Result<()>>> { 345 // Retrieve seccomp filter 346 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 347 .map_err(Error::CreateSeccompFilter)?; 348 349 thread::Builder::new() 350 .name("event-monitor".to_owned()) 351 .spawn(move || { 352 // Apply seccomp filter 353 if !seccomp_filter.is_empty() { 354 apply_filter(&seccomp_filter) 355 .map_err(Error::ApplySeccompFilter) 356 .map_err(|e| { 357 error!("Error applying seccomp filter: {:?}", e); 358 exit_event.write(1).ok(); 359 e 360 })?; 361 } 362 if landlock_enable { 363 Landlock::new() 364 .map_err(Error::CreateLandlock)? 365 .restrict_self() 366 .map_err(Error::ApplyLandlock) 367 .map_err(|e| { 368 error!("Error applying landlock to event monitor thread: {:?}", e); 369 exit_event.write(1).ok(); 370 e 371 })?; 372 } 373 374 std::panic::catch_unwind(AssertUnwindSafe(move || { 375 while let Ok(event) = monitor.rx.recv() { 376 let event = Arc::new(event); 377 378 if let Some(ref mut file) = monitor.file { 379 file.write_all(event.as_bytes().as_ref()).ok(); 380 file.write_all(b"\n\n").ok(); 381 } 382 383 for tx in monitor.broadcast.iter() { 384 tx.send(event.clone()).ok(); 385 } 386 } 387 })) 388 .map_err(|_| { 389 error!("`event-monitor` thread panicked"); 390 exit_event.write(1).ok(); 391 }) 392 .ok(); 393 394 Ok(()) 395 }) 396 .map_err(Error::EventMonitorThreadSpawn) 397 } 398 399 #[allow(unused_variables)] 400 #[allow(clippy::too_many_arguments)] 401 pub fn start_vmm_thread( 402 vmm_version: VmmVersionInfo, 403 http_path: &Option<String>, 404 http_fd: Option<RawFd>, 405 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 406 api_event: EventFd, 407 api_sender: Sender<ApiRequest>, 408 api_receiver: Receiver<ApiRequest>, 409 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 410 #[cfg(feature = "guest_debug")] debug_event: EventFd, 411 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 412 exit_event: EventFd, 413 seccomp_action: &SeccompAction, 414 hypervisor: Arc<dyn hypervisor::Hypervisor>, 415 landlock_enable: bool, 416 ) -> Result<VmmThreadHandle> { 417 #[cfg(feature = "guest_debug")] 418 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 419 #[cfg(feature = "guest_debug")] 420 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 421 #[cfg(feature = "guest_debug")] 422 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 423 #[cfg(feature = "guest_debug")] 424 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 425 426 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 427 let hypervisor_type = hypervisor.hypervisor_type(); 428 429 // Retrieve seccomp filter 430 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 431 .map_err(Error::CreateSeccompFilter)?; 432 433 let vmm_seccomp_action = seccomp_action.clone(); 434 let thread = { 435 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 436 thread::Builder::new() 437 .name("vmm".to_string()) 438 .spawn(move || { 439 // Apply seccomp filter for VMM thread. 440 if !vmm_seccomp_filter.is_empty() { 441 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 442 } 443 444 let mut vmm = Vmm::new( 445 vmm_version, 446 api_event, 447 #[cfg(feature = "guest_debug")] 448 debug_event, 449 #[cfg(feature = "guest_debug")] 450 vm_debug_event, 451 vmm_seccomp_action, 452 hypervisor, 453 exit_event, 454 )?; 455 456 vmm.setup_signal_handler(landlock_enable)?; 457 458 vmm.control_loop( 459 Rc::new(api_receiver), 460 #[cfg(feature = "guest_debug")] 461 Rc::new(gdb_receiver), 462 ) 463 }) 464 .map_err(Error::VmmThreadSpawn)? 465 }; 466 467 // The VMM thread is started, we can start the dbus thread 468 // and start serving HTTP requests 469 #[cfg(feature = "dbus_api")] 470 let dbus_shutdown_chs = match dbus_options { 471 Some(opts) => { 472 let (_, chs) = api::start_dbus_thread( 473 opts, 474 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 475 api_sender.clone(), 476 seccomp_action, 477 exit_event.try_clone().map_err(Error::EventFdClone)?, 478 hypervisor_type, 479 )?; 480 Some(chs) 481 } 482 None => None, 483 }; 484 485 let http_api_handle = if let Some(http_path) = http_path { 486 Some(api::start_http_path_thread( 487 http_path, 488 api_event_clone, 489 api_sender, 490 seccomp_action, 491 exit_event, 492 hypervisor_type, 493 landlock_enable, 494 )?) 495 } else if let Some(http_fd) = http_fd { 496 Some(api::start_http_fd_thread( 497 http_fd, 498 api_event_clone, 499 api_sender, 500 seccomp_action, 501 exit_event, 502 hypervisor_type, 503 landlock_enable, 504 )?) 505 } else { 506 None 507 }; 508 509 #[cfg(feature = "guest_debug")] 510 if let Some(debug_path) = debug_path { 511 let target = gdb::GdbStub::new( 512 gdb_sender, 513 gdb_debug_event, 514 gdb_vm_debug_event, 515 gdb_hw_breakpoints, 516 ); 517 thread::Builder::new() 518 .name("gdb".to_owned()) 519 .spawn(move || gdb::gdb_thread(target, &debug_path)) 520 .map_err(Error::GdbThreadSpawn)?; 521 } 522 523 Ok(VmmThreadHandle { 524 thread_handle: thread, 525 #[cfg(feature = "dbus_api")] 526 dbus_shutdown_chs, 527 http_api_handle, 528 }) 529 } 530 531 #[derive(Clone, Deserialize, Serialize)] 532 struct VmMigrationConfig { 533 vm_config: Arc<Mutex<VmConfig>>, 534 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 535 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 536 memory_manager_data: MemoryManagerSnapshotData, 537 } 538 539 #[derive(Debug, Clone)] 540 pub struct VmmVersionInfo { 541 pub build_version: String, 542 pub version: String, 543 } 544 545 impl VmmVersionInfo { 546 pub fn new(build_version: &str, version: &str) -> Self { 547 Self { 548 build_version: build_version.to_owned(), 549 version: version.to_owned(), 550 } 551 } 552 } 553 554 pub struct VmmThreadHandle { 555 pub thread_handle: thread::JoinHandle<Result<()>>, 556 #[cfg(feature = "dbus_api")] 557 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 558 pub http_api_handle: Option<HttpApiHandle>, 559 } 560 561 pub struct Vmm { 562 epoll: EpollContext, 563 exit_evt: EventFd, 564 reset_evt: EventFd, 565 api_evt: EventFd, 566 #[cfg(feature = "guest_debug")] 567 debug_evt: EventFd, 568 #[cfg(feature = "guest_debug")] 569 vm_debug_evt: EventFd, 570 version: VmmVersionInfo, 571 vm: Option<Vm>, 572 vm_config: Option<Arc<Mutex<VmConfig>>>, 573 seccomp_action: SeccompAction, 574 hypervisor: Arc<dyn hypervisor::Hypervisor>, 575 activate_evt: EventFd, 576 signals: Option<Handle>, 577 threads: Vec<thread::JoinHandle<()>>, 578 original_termios_opt: Arc<Mutex<Option<termios>>>, 579 console_resize_pipe: Option<Arc<File>>, 580 console_info: Option<ConsoleInfo>, 581 } 582 583 impl Vmm { 584 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 585 586 fn signal_handler( 587 mut signals: Signals, 588 original_termios_opt: Arc<Mutex<Option<termios>>>, 589 exit_evt: &EventFd, 590 ) { 591 for sig in &Self::HANDLED_SIGNALS { 592 unblock_signal(*sig).unwrap(); 593 } 594 595 for signal in signals.forever() { 596 match signal { 597 SIGTERM | SIGINT => { 598 if exit_evt.write(1).is_err() { 599 // Resetting the terminal is usually done as the VMM exits 600 if let Ok(lock) = original_termios_opt.lock() { 601 if let Some(termios) = *lock { 602 // SAFETY: FFI call 603 let _ = unsafe { 604 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 605 }; 606 } 607 } else { 608 warn!("Failed to lock original termios"); 609 } 610 611 std::process::exit(1); 612 } 613 } 614 _ => (), 615 } 616 } 617 } 618 619 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 620 let signals = Signals::new(Self::HANDLED_SIGNALS); 621 match signals { 622 Ok(signals) => { 623 self.signals = Some(signals.handle()); 624 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 625 let original_termios_opt = Arc::clone(&self.original_termios_opt); 626 627 let signal_handler_seccomp_filter = get_seccomp_filter( 628 &self.seccomp_action, 629 Thread::SignalHandler, 630 self.hypervisor.hypervisor_type(), 631 ) 632 .map_err(Error::CreateSeccompFilter)?; 633 self.threads.push( 634 thread::Builder::new() 635 .name("vmm_signal_handler".to_string()) 636 .spawn(move || { 637 if !signal_handler_seccomp_filter.is_empty() { 638 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 639 .map_err(Error::ApplySeccompFilter) 640 { 641 error!("Error applying seccomp filter: {:?}", e); 642 exit_evt.write(1).ok(); 643 return; 644 } 645 } 646 if landlock_enable{ 647 match Landlock::new() { 648 Ok(landlock) => { 649 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 650 error!("Error applying Landlock to signal handler thread: {:?}", e); 651 exit_evt.write(1).ok(); 652 }); 653 } 654 Err(e) => { 655 error!("Error creating Landlock object: {:?}", e); 656 exit_evt.write(1).ok(); 657 } 658 }; 659 } 660 661 std::panic::catch_unwind(AssertUnwindSafe(|| { 662 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 663 })) 664 .map_err(|_| { 665 error!("vmm signal_handler thread panicked"); 666 exit_evt.write(1).ok() 667 }) 668 .ok(); 669 }) 670 .map_err(Error::SignalHandlerSpawn)?, 671 ); 672 } 673 Err(e) => error!("Signal not found {}", e), 674 } 675 Ok(()) 676 } 677 678 #[allow(clippy::too_many_arguments)] 679 fn new( 680 vmm_version: VmmVersionInfo, 681 api_evt: EventFd, 682 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 683 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 684 seccomp_action: SeccompAction, 685 hypervisor: Arc<dyn hypervisor::Hypervisor>, 686 exit_evt: EventFd, 687 ) -> Result<Self> { 688 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 689 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 690 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 691 692 epoll 693 .add_event(&exit_evt, EpollDispatch::Exit) 694 .map_err(Error::Epoll)?; 695 696 epoll 697 .add_event(&reset_evt, EpollDispatch::Reset) 698 .map_err(Error::Epoll)?; 699 700 epoll 701 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 702 .map_err(Error::Epoll)?; 703 704 epoll 705 .add_event(&api_evt, EpollDispatch::Api) 706 .map_err(Error::Epoll)?; 707 708 #[cfg(feature = "guest_debug")] 709 epoll 710 .add_event(&debug_evt, EpollDispatch::Debug) 711 .map_err(Error::Epoll)?; 712 713 Ok(Vmm { 714 epoll, 715 exit_evt, 716 reset_evt, 717 api_evt, 718 #[cfg(feature = "guest_debug")] 719 debug_evt, 720 #[cfg(feature = "guest_debug")] 721 vm_debug_evt, 722 version: vmm_version, 723 vm: None, 724 vm_config: None, 725 seccomp_action, 726 hypervisor, 727 activate_evt, 728 signals: None, 729 threads: vec![], 730 original_termios_opt: Arc::new(Mutex::new(None)), 731 console_resize_pipe: None, 732 console_info: None, 733 }) 734 } 735 736 fn vm_receive_config<T>( 737 &mut self, 738 req: &Request, 739 socket: &mut T, 740 existing_memory_files: Option<HashMap<u32, File>>, 741 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 742 where 743 T: Read + Write, 744 { 745 // Read in config data along with memory manager data 746 let mut data: Vec<u8> = Vec::new(); 747 data.resize_with(req.length() as usize, Default::default); 748 socket 749 .read_exact(&mut data) 750 .map_err(MigratableError::MigrateSocket)?; 751 752 let vm_migration_config: VmMigrationConfig = 753 serde_json::from_slice(&data).map_err(|e| { 754 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 755 })?; 756 757 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 758 self.vm_check_cpuid_compatibility( 759 &vm_migration_config.vm_config, 760 &vm_migration_config.common_cpuid, 761 )?; 762 763 let config = vm_migration_config.vm_config.clone(); 764 self.vm_config = Some(vm_migration_config.vm_config); 765 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 766 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 767 })?); 768 769 if self 770 .vm_config 771 .as_ref() 772 .unwrap() 773 .lock() 774 .unwrap() 775 .landlock_enable 776 { 777 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 778 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 779 })?; 780 } 781 782 let vm = Vm::create_hypervisor_vm( 783 &self.hypervisor, 784 #[cfg(feature = "tdx")] 785 false, 786 #[cfg(feature = "sev_snp")] 787 false, 788 ) 789 .map_err(|e| { 790 MigratableError::MigrateReceive(anyhow!( 791 "Error creating hypervisor VM from snapshot: {:?}", 792 e 793 )) 794 })?; 795 796 let phys_bits = 797 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 798 799 let memory_manager = MemoryManager::new( 800 vm, 801 &config.lock().unwrap().memory.clone(), 802 None, 803 phys_bits, 804 #[cfg(feature = "tdx")] 805 false, 806 Some(&vm_migration_config.memory_manager_data), 807 existing_memory_files, 808 #[cfg(target_arch = "x86_64")] 809 None, 810 ) 811 .map_err(|e| { 812 MigratableError::MigrateReceive(anyhow!( 813 "Error creating MemoryManager from snapshot: {:?}", 814 e 815 )) 816 })?; 817 818 Response::ok().write_to(socket)?; 819 820 Ok(memory_manager) 821 } 822 823 fn vm_receive_state<T>( 824 &mut self, 825 req: &Request, 826 socket: &mut T, 827 mm: Arc<Mutex<MemoryManager>>, 828 ) -> std::result::Result<(), MigratableError> 829 where 830 T: Read + Write, 831 { 832 // Read in state data 833 let mut data: Vec<u8> = Vec::new(); 834 data.resize_with(req.length() as usize, Default::default); 835 socket 836 .read_exact(&mut data) 837 .map_err(MigratableError::MigrateSocket)?; 838 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 839 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 840 })?; 841 842 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 843 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 844 })?; 845 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 846 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 847 })?; 848 #[cfg(feature = "guest_debug")] 849 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 850 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 851 })?; 852 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 853 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 854 })?; 855 856 let timestamp = Instant::now(); 857 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 858 let mut vm = Vm::new_from_memory_manager( 859 self.vm_config.clone().unwrap(), 860 mm, 861 hypervisor_vm, 862 exit_evt, 863 reset_evt, 864 #[cfg(feature = "guest_debug")] 865 debug_evt, 866 &self.seccomp_action, 867 self.hypervisor.clone(), 868 activate_evt, 869 timestamp, 870 self.console_info.clone(), 871 self.console_resize_pipe.as_ref().map(Arc::clone), 872 Arc::clone(&self.original_termios_opt), 873 Some(snapshot), 874 ) 875 .map_err(|e| { 876 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 877 })?; 878 879 // Create VM 880 vm.restore().map_err(|e| { 881 Response::error().write_to(socket).ok(); 882 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 883 })?; 884 self.vm = Some(vm); 885 886 Response::ok().write_to(socket)?; 887 888 Ok(()) 889 } 890 891 fn vm_receive_memory<T>( 892 &mut self, 893 req: &Request, 894 socket: &mut T, 895 memory_manager: &mut MemoryManager, 896 ) -> std::result::Result<(), MigratableError> 897 where 898 T: Read + ReadVolatile + Write, 899 { 900 // Read table 901 let table = MemoryRangeTable::read_from(socket, req.length())?; 902 903 // And then read the memory itself 904 memory_manager 905 .receive_memory_regions(&table, socket) 906 .inspect_err(|_| { 907 Response::error().write_to(socket).ok(); 908 })?; 909 Response::ok().write_to(socket)?; 910 Ok(()) 911 } 912 913 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 914 url.strip_prefix("unix:") 915 .ok_or_else(|| { 916 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 917 }) 918 .map(|s| s.into()) 919 } 920 921 // Returns true if there were dirty pages to send 922 fn vm_maybe_send_dirty_pages<T>( 923 vm: &mut Vm, 924 socket: &mut T, 925 ) -> result::Result<bool, MigratableError> 926 where 927 T: Read + Write + WriteVolatile, 928 { 929 // Send (dirty) memory table 930 let table = vm.dirty_log()?; 931 932 // But if there are no regions go straight to pause 933 if table.regions().is_empty() { 934 return Ok(false); 935 } 936 937 Request::memory(table.length()).write_to(socket).unwrap(); 938 table.write_to(socket)?; 939 // And then the memory itself 940 vm.send_memory_regions(&table, socket)?; 941 Response::read_from(socket)?.ok_or_abandon( 942 socket, 943 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 944 )?; 945 946 Ok(true) 947 } 948 949 fn send_migration( 950 vm: &mut Vm, 951 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 952 dyn hypervisor::Hypervisor, 953 >, 954 send_data_migration: VmSendMigrationData, 955 ) -> result::Result<(), MigratableError> { 956 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 957 let mut socket = UnixStream::connect(path).map_err(|e| { 958 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 959 })?; 960 961 // Start the migration 962 Request::start().write_to(&mut socket)?; 963 Response::read_from(&mut socket)?.ok_or_abandon( 964 &mut socket, 965 MigratableError::MigrateSend(anyhow!("Error starting migration")), 966 )?; 967 968 // Send config 969 let vm_config = vm.get_config(); 970 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 971 let common_cpuid = { 972 #[cfg(feature = "tdx")] 973 if vm_config.lock().unwrap().is_tdx_enabled() { 974 return Err(MigratableError::MigrateSend(anyhow!( 975 "Live Migration is not supported when TDX is enabled" 976 ))); 977 }; 978 979 let amx = vm_config.lock().unwrap().cpus.features.amx; 980 let phys_bits = 981 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 982 arch::generate_common_cpuid( 983 &hypervisor, 984 &arch::CpuidConfig { 985 sgx_epc_sections: None, 986 phys_bits, 987 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 988 #[cfg(feature = "tdx")] 989 tdx: false, 990 amx, 991 }, 992 ) 993 .map_err(|e| { 994 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 995 })? 996 }; 997 998 if send_data_migration.local { 999 vm.send_memory_fds(&mut socket)?; 1000 } 1001 1002 let vm_migration_config = VmMigrationConfig { 1003 vm_config, 1004 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1005 common_cpuid, 1006 memory_manager_data: vm.memory_manager_data(), 1007 }; 1008 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1009 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1010 socket 1011 .write_all(&config_data) 1012 .map_err(MigratableError::MigrateSocket)?; 1013 Response::read_from(&mut socket)?.ok_or_abandon( 1014 &mut socket, 1015 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1016 )?; 1017 1018 // Let every Migratable object know about the migration being started. 1019 vm.start_migration()?; 1020 1021 if send_data_migration.local { 1022 // Now pause VM 1023 vm.pause()?; 1024 } else { 1025 // Start logging dirty pages 1026 vm.start_dirty_log()?; 1027 1028 // Send memory table 1029 let table = vm.memory_range_table()?; 1030 Request::memory(table.length()) 1031 .write_to(&mut socket) 1032 .unwrap(); 1033 table.write_to(&mut socket)?; 1034 // And then the memory itself 1035 vm.send_memory_regions(&table, &mut socket)?; 1036 Response::read_from(&mut socket)?.ok_or_abandon( 1037 &mut socket, 1038 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1039 )?; 1040 1041 // Try at most 5 passes of dirty memory sending 1042 const MAX_DIRTY_MIGRATIONS: usize = 5; 1043 for i in 0..MAX_DIRTY_MIGRATIONS { 1044 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1045 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1046 break; 1047 } 1048 } 1049 1050 // Now pause VM 1051 vm.pause()?; 1052 1053 // Send last batch of dirty pages 1054 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1055 1056 // Stop logging dirty pages 1057 vm.stop_dirty_log()?; 1058 } 1059 // Capture snapshot and send it 1060 let vm_snapshot = vm.snapshot()?; 1061 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1062 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1063 socket 1064 .write_all(&snapshot_data) 1065 .map_err(MigratableError::MigrateSocket)?; 1066 Response::read_from(&mut socket)?.ok_or_abandon( 1067 &mut socket, 1068 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1069 )?; 1070 // Complete the migration 1071 Request::complete().write_to(&mut socket)?; 1072 Response::read_from(&mut socket)?.ok_or_abandon( 1073 &mut socket, 1074 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1075 )?; 1076 1077 info!("Migration complete"); 1078 1079 // Let every Migratable object know about the migration being complete 1080 vm.complete_migration() 1081 } 1082 1083 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1084 fn vm_check_cpuid_compatibility( 1085 &self, 1086 src_vm_config: &Arc<Mutex<VmConfig>>, 1087 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1088 ) -> result::Result<(), MigratableError> { 1089 #[cfg(feature = "tdx")] 1090 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1091 return Err(MigratableError::MigrateReceive(anyhow!( 1092 "Live Migration is not supported when TDX is enabled" 1093 ))); 1094 }; 1095 1096 // We check the `CPUID` compatibility of between the source vm and destination, which is 1097 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1098 let dest_cpuid = &{ 1099 let vm_config = &src_vm_config.lock().unwrap(); 1100 1101 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1102 arch::generate_common_cpuid( 1103 &self.hypervisor.clone(), 1104 &arch::CpuidConfig { 1105 sgx_epc_sections: None, 1106 phys_bits, 1107 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1108 #[cfg(feature = "tdx")] 1109 tdx: false, 1110 amx: vm_config.cpus.features.amx, 1111 }, 1112 ) 1113 .map_err(|e| { 1114 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1115 })? 1116 }; 1117 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1118 MigratableError::MigrateReceive(anyhow!( 1119 "Error checking cpu feature compatibility': {:?}", 1120 e 1121 )) 1122 }) 1123 } 1124 1125 fn control_loop( 1126 &mut self, 1127 api_receiver: Rc<Receiver<ApiRequest>>, 1128 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1129 ) -> Result<()> { 1130 const EPOLL_EVENTS_LEN: usize = 100; 1131 1132 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1133 let epoll_fd = self.epoll.as_raw_fd(); 1134 1135 'outer: loop { 1136 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1137 Ok(res) => res, 1138 Err(e) => { 1139 if e.kind() == io::ErrorKind::Interrupted { 1140 // It's well defined from the epoll_wait() syscall 1141 // documentation that the epoll loop can be interrupted 1142 // before any of the requested events occurred or the 1143 // timeout expired. In both those cases, epoll_wait() 1144 // returns an error of type EINTR, but this should not 1145 // be considered as a regular error. Instead it is more 1146 // appropriate to retry, by calling into epoll_wait(). 1147 continue; 1148 } 1149 return Err(Error::Epoll(e)); 1150 } 1151 }; 1152 1153 for event in events.iter().take(num_events) { 1154 let dispatch_event: EpollDispatch = event.data.into(); 1155 match dispatch_event { 1156 EpollDispatch::Unknown => { 1157 let event = event.data; 1158 warn!("Unknown VMM loop event: {}", event); 1159 } 1160 EpollDispatch::Exit => { 1161 info!("VM exit event"); 1162 // Consume the event. 1163 self.exit_evt.read().map_err(Error::EventFdRead)?; 1164 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1165 1166 break 'outer; 1167 } 1168 EpollDispatch::Reset => { 1169 info!("VM reset event"); 1170 // Consume the event. 1171 self.reset_evt.read().map_err(Error::EventFdRead)?; 1172 self.vm_reboot().map_err(Error::VmReboot)?; 1173 } 1174 EpollDispatch::ActivateVirtioDevices => { 1175 if let Some(ref vm) = self.vm { 1176 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1177 info!( 1178 "Trying to activate pending virtio devices: count = {}", 1179 count 1180 ); 1181 vm.activate_virtio_devices() 1182 .map_err(Error::ActivateVirtioDevices)?; 1183 } 1184 } 1185 EpollDispatch::Api => { 1186 // Consume the events. 1187 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1188 // Read from the API receiver channel 1189 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1190 1191 if api_request(self)? { 1192 break 'outer; 1193 } 1194 } 1195 } 1196 #[cfg(feature = "guest_debug")] 1197 EpollDispatch::Debug => { 1198 // Consume the events. 1199 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1200 // Read from the API receiver channel 1201 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1202 1203 let response = if let Some(ref mut vm) = self.vm { 1204 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1205 } else { 1206 Err(VmError::VmNotRunning) 1207 } 1208 .map_err(gdb::Error::Vm); 1209 1210 gdb_request 1211 .sender 1212 .send(response) 1213 .map_err(Error::GdbResponseSend)?; 1214 } 1215 } 1216 #[cfg(not(feature = "guest_debug"))] 1217 EpollDispatch::Debug => {} 1218 } 1219 } 1220 } 1221 1222 // Trigger the termination of the signal_handler thread 1223 if let Some(signals) = self.signals.take() { 1224 signals.close(); 1225 } 1226 1227 // Wait for all the threads to finish 1228 for thread in self.threads.drain(..) { 1229 thread.join().map_err(Error::ThreadCleanup)? 1230 } 1231 1232 Ok(()) 1233 } 1234 } 1235 1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1237 vm_config.lock().unwrap().apply_landlock()?; 1238 Ok(()) 1239 } 1240 1241 impl RequestHandler for Vmm { 1242 fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> { 1243 // We only store the passed VM config. 1244 // The VM will be created when being asked to boot it. 1245 if self.vm_config.is_none() { 1246 self.vm_config = Some(config); 1247 self.console_info = 1248 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1249 1250 if self 1251 .vm_config 1252 .as_ref() 1253 .unwrap() 1254 .lock() 1255 .unwrap() 1256 .landlock_enable 1257 { 1258 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1259 .map_err(VmError::ApplyLandlock)?; 1260 } 1261 Ok(()) 1262 } else { 1263 Err(VmError::VmAlreadyCreated) 1264 } 1265 } 1266 1267 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1268 tracer::start(); 1269 info!("Booting VM"); 1270 event!("vm", "booting"); 1271 let r = { 1272 trace_scoped!("vm_boot"); 1273 // If we don't have a config, we cannot boot a VM. 1274 if self.vm_config.is_none() { 1275 return Err(VmError::VmMissingConfig); 1276 }; 1277 1278 // console_info is set to None in vm_shutdown. re-populate here if empty 1279 if self.console_info.is_none() { 1280 self.console_info = 1281 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1282 } 1283 1284 // Create a new VM if we don't have one yet. 1285 if self.vm.is_none() { 1286 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1287 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1288 #[cfg(feature = "guest_debug")] 1289 let vm_debug_evt = self 1290 .vm_debug_evt 1291 .try_clone() 1292 .map_err(VmError::EventFdClone)?; 1293 let activate_evt = self 1294 .activate_evt 1295 .try_clone() 1296 .map_err(VmError::EventFdClone)?; 1297 1298 if let Some(ref vm_config) = self.vm_config { 1299 let vm = Vm::new( 1300 Arc::clone(vm_config), 1301 exit_evt, 1302 reset_evt, 1303 #[cfg(feature = "guest_debug")] 1304 vm_debug_evt, 1305 &self.seccomp_action, 1306 self.hypervisor.clone(), 1307 activate_evt, 1308 self.console_info.clone(), 1309 self.console_resize_pipe.as_ref().map(Arc::clone), 1310 Arc::clone(&self.original_termios_opt), 1311 None, 1312 None, 1313 None, 1314 )?; 1315 1316 self.vm = Some(vm); 1317 } 1318 } 1319 1320 // Now we can boot the VM. 1321 if let Some(ref mut vm) = self.vm { 1322 vm.boot() 1323 } else { 1324 Err(VmError::VmNotCreated) 1325 } 1326 }; 1327 tracer::end(); 1328 if r.is_ok() { 1329 event!("vm", "booted"); 1330 } 1331 r 1332 } 1333 1334 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1335 if let Some(ref mut vm) = self.vm { 1336 vm.pause().map_err(VmError::Pause) 1337 } else { 1338 Err(VmError::VmNotRunning) 1339 } 1340 } 1341 1342 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1343 if let Some(ref mut vm) = self.vm { 1344 vm.resume().map_err(VmError::Resume) 1345 } else { 1346 Err(VmError::VmNotRunning) 1347 } 1348 } 1349 1350 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1351 if let Some(ref mut vm) = self.vm { 1352 // Drain console_info so that FDs are not reused 1353 let _ = self.console_info.take(); 1354 vm.snapshot() 1355 .map_err(VmError::Snapshot) 1356 .and_then(|snapshot| { 1357 vm.send(&snapshot, destination_url) 1358 .map_err(VmError::SnapshotSend) 1359 }) 1360 } else { 1361 Err(VmError::VmNotRunning) 1362 } 1363 } 1364 1365 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1366 if self.vm.is_some() || self.vm_config.is_some() { 1367 return Err(VmError::VmAlreadyCreated); 1368 } 1369 1370 let source_url = restore_cfg.source_url.as_path().to_str(); 1371 if source_url.is_none() { 1372 return Err(VmError::InvalidRestoreSourceUrl); 1373 } 1374 // Safe to unwrap as we checked it was Some(&str). 1375 let source_url = source_url.unwrap(); 1376 1377 let vm_config = Arc::new(Mutex::new( 1378 recv_vm_config(source_url).map_err(VmError::Restore)?, 1379 )); 1380 restore_cfg 1381 .validate(&vm_config.lock().unwrap().clone()) 1382 .map_err(VmError::ConfigValidation)?; 1383 1384 // Update VM's net configurations with new fds received for restore operation 1385 if let (Some(restored_nets), Some(vm_net_configs)) = 1386 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1387 { 1388 for net in restored_nets.iter() { 1389 for net_config in vm_net_configs.iter_mut() { 1390 // update only if the net dev is backed by FDs 1391 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1392 net_config.fds.clone_from(&net.fds); 1393 } 1394 } 1395 } 1396 } 1397 1398 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1399 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1400 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1401 1402 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1403 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1404 .map_err(VmError::Restore)?; 1405 1406 self.vm_config = Some(Arc::clone(&vm_config)); 1407 1408 // console_info is set to None in vm_snapshot. re-populate here if empty 1409 if self.console_info.is_none() { 1410 self.console_info = 1411 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1412 } 1413 1414 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1415 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1416 #[cfg(feature = "guest_debug")] 1417 let debug_evt = self 1418 .vm_debug_evt 1419 .try_clone() 1420 .map_err(VmError::EventFdClone)?; 1421 let activate_evt = self 1422 .activate_evt 1423 .try_clone() 1424 .map_err(VmError::EventFdClone)?; 1425 1426 let vm = Vm::new( 1427 vm_config, 1428 exit_evt, 1429 reset_evt, 1430 #[cfg(feature = "guest_debug")] 1431 debug_evt, 1432 &self.seccomp_action, 1433 self.hypervisor.clone(), 1434 activate_evt, 1435 self.console_info.clone(), 1436 self.console_resize_pipe.as_ref().map(Arc::clone), 1437 Arc::clone(&self.original_termios_opt), 1438 Some(snapshot), 1439 Some(source_url), 1440 Some(restore_cfg.prefault), 1441 )?; 1442 self.vm = Some(vm); 1443 1444 if self 1445 .vm_config 1446 .as_ref() 1447 .unwrap() 1448 .lock() 1449 .unwrap() 1450 .landlock_enable 1451 { 1452 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1453 .map_err(VmError::ApplyLandlock)?; 1454 } 1455 1456 // Now we can restore the rest of the VM. 1457 if let Some(ref mut vm) = self.vm { 1458 vm.restore() 1459 } else { 1460 Err(VmError::VmNotCreated) 1461 } 1462 } 1463 1464 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1465 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1466 if let Some(ref mut vm) = self.vm { 1467 vm.coredump(destination_url).map_err(VmError::Coredump) 1468 } else { 1469 Err(VmError::VmNotRunning) 1470 } 1471 } 1472 1473 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1474 let r = if let Some(ref mut vm) = self.vm.take() { 1475 // Drain console_info so that the FDs are not reused 1476 let _ = self.console_info.take(); 1477 vm.shutdown() 1478 } else { 1479 Err(VmError::VmNotRunning) 1480 }; 1481 1482 if r.is_ok() { 1483 event!("vm", "shutdown"); 1484 } 1485 1486 r 1487 } 1488 1489 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1490 event!("vm", "rebooting"); 1491 1492 // First we stop the current VM 1493 let (config, console_resize_pipe) = if let Some(mut vm) = self.vm.take() { 1494 let config = vm.get_config(); 1495 let console_resize_pipe = vm.console_resize_pipe(); 1496 vm.shutdown()?; 1497 (config, console_resize_pipe) 1498 } else { 1499 return Err(VmError::VmNotCreated); 1500 }; 1501 1502 // vm.shutdown() closes all the console devices, so set console_info to None 1503 // so that the closed FD #s are not reused. 1504 let _ = self.console_info.take(); 1505 1506 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1507 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1508 #[cfg(feature = "guest_debug")] 1509 let debug_evt = self 1510 .vm_debug_evt 1511 .try_clone() 1512 .map_err(VmError::EventFdClone)?; 1513 let activate_evt = self 1514 .activate_evt 1515 .try_clone() 1516 .map_err(VmError::EventFdClone)?; 1517 1518 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1519 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1520 // during the boot process. 1521 if self.reset_evt.read().is_ok() { 1522 warn!("Spurious second reset event received. Ignoring."); 1523 } 1524 1525 self.console_info = 1526 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1527 1528 // Then we create the new VM 1529 let mut vm = Vm::new( 1530 config, 1531 exit_evt, 1532 reset_evt, 1533 #[cfg(feature = "guest_debug")] 1534 debug_evt, 1535 &self.seccomp_action, 1536 self.hypervisor.clone(), 1537 activate_evt, 1538 self.console_info.clone(), 1539 console_resize_pipe, 1540 Arc::clone(&self.original_termios_opt), 1541 None, 1542 None, 1543 None, 1544 )?; 1545 1546 // And we boot it 1547 vm.boot()?; 1548 1549 self.vm = Some(vm); 1550 1551 event!("vm", "rebooted"); 1552 1553 Ok(()) 1554 } 1555 1556 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1557 match &self.vm_config { 1558 Some(config) => { 1559 let state = match &self.vm { 1560 Some(vm) => vm.get_state()?, 1561 None => VmState::Created, 1562 }; 1563 1564 let config = Arc::clone(config); 1565 1566 let mut memory_actual_size = config.lock().unwrap().memory.total_size(); 1567 if let Some(vm) = &self.vm { 1568 memory_actual_size -= vm.balloon_size(); 1569 } 1570 1571 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree()); 1572 1573 Ok(VmInfoResponse { 1574 config, 1575 state, 1576 memory_actual_size, 1577 device_tree, 1578 }) 1579 } 1580 None => Err(VmError::VmNotCreated), 1581 } 1582 } 1583 1584 fn vmm_ping(&self) -> VmmPingResponse { 1585 let VmmVersionInfo { 1586 build_version, 1587 version, 1588 } = self.version.clone(); 1589 1590 VmmPingResponse { 1591 build_version, 1592 version, 1593 pid: std::process::id() as i64, 1594 features: feature_list(), 1595 } 1596 } 1597 1598 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1599 if self.vm_config.is_none() { 1600 return Ok(()); 1601 } 1602 1603 // If a VM is booted, we first try to shut it down. 1604 if self.vm.is_some() { 1605 self.vm_shutdown()?; 1606 } 1607 1608 self.vm_config = None; 1609 1610 event!("vm", "deleted"); 1611 1612 Ok(()) 1613 } 1614 1615 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1616 self.vm_delete()?; 1617 event!("vmm", "shutdown"); 1618 Ok(()) 1619 } 1620 1621 fn vm_resize( 1622 &mut self, 1623 desired_vcpus: Option<u8>, 1624 desired_ram: Option<u64>, 1625 desired_balloon: Option<u64>, 1626 ) -> result::Result<(), VmError> { 1627 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1628 1629 if let Some(ref mut vm) = self.vm { 1630 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1631 error!("Error when resizing VM: {:?}", e); 1632 Err(e) 1633 } else { 1634 Ok(()) 1635 } 1636 } else { 1637 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1638 if let Some(desired_vcpus) = desired_vcpus { 1639 config.cpus.boot_vcpus = desired_vcpus; 1640 } 1641 if let Some(desired_ram) = desired_ram { 1642 config.memory.size = desired_ram; 1643 } 1644 if let Some(desired_balloon) = desired_balloon { 1645 if let Some(balloon_config) = &mut config.balloon { 1646 balloon_config.size = desired_balloon; 1647 } 1648 } 1649 Ok(()) 1650 } 1651 } 1652 1653 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1654 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1655 1656 if let Some(ref mut vm) = self.vm { 1657 if let Err(e) = vm.resize_zone(id, desired_ram) { 1658 error!("Error when resizing VM: {:?}", e); 1659 Err(e) 1660 } else { 1661 Ok(()) 1662 } 1663 } else { 1664 // Update VmConfig by setting the new desired ram. 1665 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1666 1667 if let Some(zones) = &mut memory_config.zones { 1668 for zone in zones.iter_mut() { 1669 if zone.id == id { 1670 zone.size = desired_ram; 1671 return Ok(()); 1672 } 1673 } 1674 } 1675 1676 error!("Could not find the memory zone {} for the resize", id); 1677 Err(VmError::ResizeZone) 1678 } 1679 } 1680 1681 fn vm_add_device( 1682 &mut self, 1683 device_cfg: DeviceConfig, 1684 ) -> result::Result<Option<Vec<u8>>, VmError> { 1685 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1686 1687 { 1688 // Validate the configuration change in a cloned configuration 1689 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1690 add_to_config(&mut config.devices, device_cfg.clone()); 1691 config.validate().map_err(VmError::ConfigValidation)?; 1692 } 1693 1694 if let Some(ref mut vm) = self.vm { 1695 let info = vm.add_device(device_cfg).map_err(|e| { 1696 error!("Error when adding new device to the VM: {:?}", e); 1697 e 1698 })?; 1699 serde_json::to_vec(&info) 1700 .map(Some) 1701 .map_err(VmError::SerializeJson) 1702 } else { 1703 // Update VmConfig by adding the new device. 1704 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1705 add_to_config(&mut config.devices, device_cfg); 1706 Ok(None) 1707 } 1708 } 1709 1710 fn vm_add_user_device( 1711 &mut self, 1712 device_cfg: UserDeviceConfig, 1713 ) -> result::Result<Option<Vec<u8>>, VmError> { 1714 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1715 1716 { 1717 // Validate the configuration change in a cloned configuration 1718 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1719 add_to_config(&mut config.user_devices, device_cfg.clone()); 1720 config.validate().map_err(VmError::ConfigValidation)?; 1721 } 1722 1723 if let Some(ref mut vm) = self.vm { 1724 let info = vm.add_user_device(device_cfg).map_err(|e| { 1725 error!("Error when adding new user device to the VM: {:?}", e); 1726 e 1727 })?; 1728 serde_json::to_vec(&info) 1729 .map(Some) 1730 .map_err(VmError::SerializeJson) 1731 } else { 1732 // Update VmConfig by adding the new device. 1733 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1734 add_to_config(&mut config.user_devices, device_cfg); 1735 Ok(None) 1736 } 1737 } 1738 1739 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1740 if let Some(ref mut vm) = self.vm { 1741 if let Err(e) = vm.remove_device(id) { 1742 error!("Error when removing device from the VM: {:?}", e); 1743 Err(e) 1744 } else { 1745 Ok(()) 1746 } 1747 } else if let Some(ref config) = self.vm_config { 1748 let mut config = config.lock().unwrap(); 1749 if config.remove_device(&id) { 1750 Ok(()) 1751 } else { 1752 Err(VmError::NoDeviceToRemove(id)) 1753 } 1754 } else { 1755 Err(VmError::VmNotCreated) 1756 } 1757 } 1758 1759 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1760 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1761 1762 { 1763 // Validate the configuration change in a cloned configuration 1764 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1765 add_to_config(&mut config.disks, disk_cfg.clone()); 1766 config.validate().map_err(VmError::ConfigValidation)?; 1767 } 1768 1769 if let Some(ref mut vm) = self.vm { 1770 let info = vm.add_disk(disk_cfg).map_err(|e| { 1771 error!("Error when adding new disk to the VM: {:?}", e); 1772 e 1773 })?; 1774 serde_json::to_vec(&info) 1775 .map(Some) 1776 .map_err(VmError::SerializeJson) 1777 } else { 1778 // Update VmConfig by adding the new device. 1779 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1780 add_to_config(&mut config.disks, disk_cfg); 1781 Ok(None) 1782 } 1783 } 1784 1785 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1786 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1787 1788 { 1789 // Validate the configuration change in a cloned configuration 1790 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1791 add_to_config(&mut config.fs, fs_cfg.clone()); 1792 config.validate().map_err(VmError::ConfigValidation)?; 1793 } 1794 1795 if let Some(ref mut vm) = self.vm { 1796 let info = vm.add_fs(fs_cfg).map_err(|e| { 1797 error!("Error when adding new fs to the VM: {:?}", e); 1798 e 1799 })?; 1800 serde_json::to_vec(&info) 1801 .map(Some) 1802 .map_err(VmError::SerializeJson) 1803 } else { 1804 // Update VmConfig by adding the new device. 1805 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1806 add_to_config(&mut config.fs, fs_cfg); 1807 Ok(None) 1808 } 1809 } 1810 1811 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1812 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1813 1814 { 1815 // Validate the configuration change in a cloned configuration 1816 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1817 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1818 config.validate().map_err(VmError::ConfigValidation)?; 1819 } 1820 1821 if let Some(ref mut vm) = self.vm { 1822 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1823 error!("Error when adding new pmem device to the VM: {:?}", e); 1824 e 1825 })?; 1826 serde_json::to_vec(&info) 1827 .map(Some) 1828 .map_err(VmError::SerializeJson) 1829 } else { 1830 // Update VmConfig by adding the new device. 1831 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1832 add_to_config(&mut config.pmem, pmem_cfg); 1833 Ok(None) 1834 } 1835 } 1836 1837 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1838 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1839 1840 { 1841 // Validate the configuration change in a cloned configuration 1842 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1843 add_to_config(&mut config.net, net_cfg.clone()); 1844 config.validate().map_err(VmError::ConfigValidation)?; 1845 } 1846 1847 if let Some(ref mut vm) = self.vm { 1848 let info = vm.add_net(net_cfg).map_err(|e| { 1849 error!("Error when adding new network device to the VM: {:?}", e); 1850 e 1851 })?; 1852 serde_json::to_vec(&info) 1853 .map(Some) 1854 .map_err(VmError::SerializeJson) 1855 } else { 1856 // Update VmConfig by adding the new device. 1857 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1858 add_to_config(&mut config.net, net_cfg); 1859 Ok(None) 1860 } 1861 } 1862 1863 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1864 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1865 1866 { 1867 // Validate the configuration change in a cloned configuration 1868 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1869 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1870 config.validate().map_err(VmError::ConfigValidation)?; 1871 } 1872 1873 if let Some(ref mut vm) = self.vm { 1874 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1875 error!("Error when adding new vDPA device to the VM: {:?}", e); 1876 e 1877 })?; 1878 serde_json::to_vec(&info) 1879 .map(Some) 1880 .map_err(VmError::SerializeJson) 1881 } else { 1882 // Update VmConfig by adding the new device. 1883 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1884 add_to_config(&mut config.vdpa, vdpa_cfg); 1885 Ok(None) 1886 } 1887 } 1888 1889 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1890 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1891 1892 { 1893 // Validate the configuration change in a cloned configuration 1894 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1895 1896 if config.vsock.is_some() { 1897 return Err(VmError::TooManyVsockDevices); 1898 } 1899 1900 config.vsock = Some(vsock_cfg.clone()); 1901 config.validate().map_err(VmError::ConfigValidation)?; 1902 } 1903 1904 if let Some(ref mut vm) = self.vm { 1905 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1906 error!("Error when adding new vsock device to the VM: {:?}", e); 1907 e 1908 })?; 1909 serde_json::to_vec(&info) 1910 .map(Some) 1911 .map_err(VmError::SerializeJson) 1912 } else { 1913 // Update VmConfig by adding the new device. 1914 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1915 config.vsock = Some(vsock_cfg); 1916 Ok(None) 1917 } 1918 } 1919 1920 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1921 if let Some(ref mut vm) = self.vm { 1922 let info = vm.counters().map_err(|e| { 1923 error!("Error when getting counters from the VM: {:?}", e); 1924 e 1925 })?; 1926 serde_json::to_vec(&info) 1927 .map(Some) 1928 .map_err(VmError::SerializeJson) 1929 } else { 1930 Err(VmError::VmNotRunning) 1931 } 1932 } 1933 1934 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1935 if let Some(ref mut vm) = self.vm { 1936 vm.power_button() 1937 } else { 1938 Err(VmError::VmNotRunning) 1939 } 1940 } 1941 1942 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1943 if let Some(ref mut vm) = self.vm { 1944 vm.nmi() 1945 } else { 1946 Err(VmError::VmNotRunning) 1947 } 1948 } 1949 1950 fn vm_receive_migration( 1951 &mut self, 1952 receive_data_migration: VmReceiveMigrationData, 1953 ) -> result::Result<(), MigratableError> { 1954 info!( 1955 "Receiving migration: receiver_url = {}", 1956 receive_data_migration.receiver_url 1957 ); 1958 1959 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1960 let listener = UnixListener::bind(&path).map_err(|e| { 1961 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1962 })?; 1963 let (mut socket, _addr) = listener.accept().map_err(|e| { 1964 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1965 })?; 1966 std::fs::remove_file(&path).map_err(|e| { 1967 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1968 })?; 1969 1970 let mut started = false; 1971 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1972 let mut existing_memory_files = None; 1973 loop { 1974 let req = Request::read_from(&mut socket)?; 1975 match req.command() { 1976 Command::Invalid => info!("Invalid Command Received"), 1977 Command::Start => { 1978 info!("Start Command Received"); 1979 started = true; 1980 1981 Response::ok().write_to(&mut socket)?; 1982 } 1983 Command::Config => { 1984 info!("Config Command Received"); 1985 1986 if !started { 1987 warn!("Migration not started yet"); 1988 Response::error().write_to(&mut socket)?; 1989 continue; 1990 } 1991 memory_manager = Some(self.vm_receive_config( 1992 &req, 1993 &mut socket, 1994 existing_memory_files.take(), 1995 )?); 1996 } 1997 Command::State => { 1998 info!("State Command Received"); 1999 2000 if !started { 2001 warn!("Migration not started yet"); 2002 Response::error().write_to(&mut socket)?; 2003 continue; 2004 } 2005 if let Some(mm) = memory_manager.take() { 2006 self.vm_receive_state(&req, &mut socket, mm)?; 2007 } else { 2008 warn!("Configuration not sent yet"); 2009 Response::error().write_to(&mut socket)?; 2010 } 2011 } 2012 Command::Memory => { 2013 info!("Memory Command Received"); 2014 2015 if !started { 2016 warn!("Migration not started yet"); 2017 Response::error().write_to(&mut socket)?; 2018 continue; 2019 } 2020 if let Some(mm) = memory_manager.as_ref() { 2021 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2022 } else { 2023 warn!("Configuration not sent yet"); 2024 Response::error().write_to(&mut socket)?; 2025 } 2026 } 2027 Command::MemoryFd => { 2028 info!("MemoryFd Command Received"); 2029 2030 if !started { 2031 warn!("Migration not started yet"); 2032 Response::error().write_to(&mut socket)?; 2033 continue; 2034 } 2035 2036 let mut buf = [0u8; 4]; 2037 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2038 MigratableError::MigrateReceive(anyhow!( 2039 "Error receiving slot from socket: {}", 2040 e 2041 )) 2042 })?; 2043 2044 if existing_memory_files.is_none() { 2045 existing_memory_files = Some(HashMap::default()) 2046 } 2047 2048 if let Some(ref mut existing_memory_files) = existing_memory_files { 2049 let slot = u32::from_le_bytes(buf); 2050 existing_memory_files.insert(slot, file.unwrap()); 2051 } 2052 2053 Response::ok().write_to(&mut socket)?; 2054 } 2055 Command::Complete => { 2056 info!("Complete Command Received"); 2057 if let Some(ref mut vm) = self.vm.as_mut() { 2058 vm.resume()?; 2059 Response::ok().write_to(&mut socket)?; 2060 } else { 2061 warn!("VM not created yet"); 2062 Response::error().write_to(&mut socket)?; 2063 } 2064 break; 2065 } 2066 Command::Abandon => { 2067 info!("Abandon Command Received"); 2068 self.vm = None; 2069 self.vm_config = None; 2070 Response::ok().write_to(&mut socket).ok(); 2071 break; 2072 } 2073 } 2074 } 2075 2076 Ok(()) 2077 } 2078 2079 fn vm_send_migration( 2080 &mut self, 2081 send_data_migration: VmSendMigrationData, 2082 ) -> result::Result<(), MigratableError> { 2083 info!( 2084 "Sending migration: destination_url = {}, local = {}", 2085 send_data_migration.destination_url, send_data_migration.local 2086 ); 2087 2088 if !self 2089 .vm_config 2090 .as_ref() 2091 .unwrap() 2092 .lock() 2093 .unwrap() 2094 .backed_by_shared_memory() 2095 && send_data_migration.local 2096 { 2097 return Err(MigratableError::MigrateSend(anyhow!( 2098 "Local migration requires shared memory or hugepages enabled" 2099 ))); 2100 } 2101 2102 if let Some(vm) = self.vm.as_mut() { 2103 Self::send_migration( 2104 vm, 2105 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2106 self.hypervisor.clone(), 2107 send_data_migration, 2108 ) 2109 .map_err(|migration_err| { 2110 error!("Migration failed: {:?}", migration_err); 2111 2112 // Stop logging dirty pages 2113 if let Err(e) = vm.stop_dirty_log() { 2114 return e; 2115 } 2116 2117 if vm.get_state().unwrap() == VmState::Paused { 2118 if let Err(e) = vm.resume() { 2119 return e; 2120 } 2121 } 2122 2123 migration_err 2124 })?; 2125 2126 // Shutdown the VM after the migration succeeded 2127 self.exit_evt.write(1).map_err(|e| { 2128 MigratableError::MigrateSend(anyhow!( 2129 "Failed shutting down the VM after migration: {:?}", 2130 e 2131 )) 2132 }) 2133 } else { 2134 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2135 } 2136 } 2137 } 2138 2139 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2140 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2141 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2142 2143 #[cfg(test)] 2144 mod unit_tests { 2145 use super::*; 2146 #[cfg(target_arch = "x86_64")] 2147 use crate::config::DebugConsoleConfig; 2148 use config::{ 2149 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2150 RngConfig, 2151 }; 2152 2153 fn create_dummy_vmm() -> Vmm { 2154 Vmm::new( 2155 VmmVersionInfo::new("dummy", "dummy"), 2156 EventFd::new(EFD_NONBLOCK).unwrap(), 2157 #[cfg(feature = "guest_debug")] 2158 EventFd::new(EFD_NONBLOCK).unwrap(), 2159 #[cfg(feature = "guest_debug")] 2160 EventFd::new(EFD_NONBLOCK).unwrap(), 2161 SeccompAction::Allow, 2162 hypervisor::new().unwrap(), 2163 EventFd::new(EFD_NONBLOCK).unwrap(), 2164 ) 2165 .unwrap() 2166 } 2167 2168 fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> { 2169 Arc::new(Mutex::new(VmConfig { 2170 cpus: CpusConfig { 2171 boot_vcpus: 1, 2172 max_vcpus: 1, 2173 topology: None, 2174 kvm_hyperv: false, 2175 max_phys_bits: 46, 2176 affinity: None, 2177 features: config::CpuFeatures::default(), 2178 }, 2179 memory: MemoryConfig { 2180 size: 536_870_912, 2181 mergeable: false, 2182 hotplug_method: HotplugMethod::Acpi, 2183 hotplug_size: None, 2184 hotplugged_size: None, 2185 shared: true, 2186 hugepages: false, 2187 hugepage_size: None, 2188 prefault: false, 2189 zones: None, 2190 thp: true, 2191 }, 2192 payload: Some(PayloadConfig { 2193 kernel: Some(PathBuf::from("/path/to/kernel")), 2194 firmware: None, 2195 cmdline: None, 2196 initramfs: None, 2197 #[cfg(feature = "igvm")] 2198 igvm: None, 2199 #[cfg(feature = "sev_snp")] 2200 host_data: None, 2201 }), 2202 rate_limit_groups: None, 2203 disks: None, 2204 net: None, 2205 rng: RngConfig { 2206 src: PathBuf::from("/dev/urandom"), 2207 iommu: false, 2208 }, 2209 balloon: None, 2210 fs: None, 2211 pmem: None, 2212 serial: ConsoleConfig { 2213 file: None, 2214 mode: ConsoleOutputMode::Null, 2215 iommu: false, 2216 socket: None, 2217 }, 2218 console: ConsoleConfig { 2219 file: None, 2220 mode: ConsoleOutputMode::Tty, 2221 iommu: false, 2222 socket: None, 2223 }, 2224 #[cfg(target_arch = "x86_64")] 2225 debug_console: DebugConsoleConfig::default(), 2226 devices: None, 2227 user_devices: None, 2228 vdpa: None, 2229 vsock: None, 2230 #[cfg(feature = "pvmemcontrol")] 2231 pvmemcontrol: None, 2232 pvpanic: false, 2233 iommu: false, 2234 #[cfg(target_arch = "x86_64")] 2235 sgx_epc: None, 2236 numa: None, 2237 watchdog: false, 2238 #[cfg(feature = "guest_debug")] 2239 gdb: false, 2240 pci_segments: None, 2241 platform: None, 2242 tpm: None, 2243 preserved_fds: None, 2244 landlock_enable: false, 2245 landlock_rules: None, 2246 })) 2247 } 2248 2249 #[test] 2250 fn test_vmm_vm_create() { 2251 let mut vmm = create_dummy_vmm(); 2252 let config = create_dummy_vm_config(); 2253 2254 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2255 assert!(matches!( 2256 vmm.vm_create(config), 2257 Err(VmError::VmAlreadyCreated) 2258 )); 2259 } 2260 2261 #[test] 2262 fn test_vmm_vm_cold_add_device() { 2263 let mut vmm = create_dummy_vmm(); 2264 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2265 2266 assert!(matches!( 2267 vmm.vm_add_device(device_config.clone()), 2268 Err(VmError::VmNotCreated) 2269 )); 2270 2271 let _ = vmm.vm_create(create_dummy_vm_config()); 2272 assert!(vmm 2273 .vm_config 2274 .as_ref() 2275 .unwrap() 2276 .lock() 2277 .unwrap() 2278 .devices 2279 .is_none()); 2280 2281 let result = vmm.vm_add_device(device_config.clone()); 2282 assert!(result.is_ok()); 2283 assert!(result.unwrap().is_none()); 2284 assert_eq!( 2285 vmm.vm_config 2286 .as_ref() 2287 .unwrap() 2288 .lock() 2289 .unwrap() 2290 .devices 2291 .clone() 2292 .unwrap() 2293 .len(), 2294 1 2295 ); 2296 assert_eq!( 2297 vmm.vm_config 2298 .as_ref() 2299 .unwrap() 2300 .lock() 2301 .unwrap() 2302 .devices 2303 .clone() 2304 .unwrap()[0], 2305 device_config 2306 ); 2307 } 2308 2309 #[test] 2310 fn test_vmm_vm_cold_add_user_device() { 2311 let mut vmm = create_dummy_vmm(); 2312 let user_device_config = 2313 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2314 2315 assert!(matches!( 2316 vmm.vm_add_user_device(user_device_config.clone()), 2317 Err(VmError::VmNotCreated) 2318 )); 2319 2320 let _ = vmm.vm_create(create_dummy_vm_config()); 2321 assert!(vmm 2322 .vm_config 2323 .as_ref() 2324 .unwrap() 2325 .lock() 2326 .unwrap() 2327 .user_devices 2328 .is_none()); 2329 2330 let result = vmm.vm_add_user_device(user_device_config.clone()); 2331 assert!(result.is_ok()); 2332 assert!(result.unwrap().is_none()); 2333 assert_eq!( 2334 vmm.vm_config 2335 .as_ref() 2336 .unwrap() 2337 .lock() 2338 .unwrap() 2339 .user_devices 2340 .clone() 2341 .unwrap() 2342 .len(), 2343 1 2344 ); 2345 assert_eq!( 2346 vmm.vm_config 2347 .as_ref() 2348 .unwrap() 2349 .lock() 2350 .unwrap() 2351 .user_devices 2352 .clone() 2353 .unwrap()[0], 2354 user_device_config 2355 ); 2356 } 2357 2358 #[test] 2359 fn test_vmm_vm_cold_add_disk() { 2360 let mut vmm = create_dummy_vmm(); 2361 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2362 2363 assert!(matches!( 2364 vmm.vm_add_disk(disk_config.clone()), 2365 Err(VmError::VmNotCreated) 2366 )); 2367 2368 let _ = vmm.vm_create(create_dummy_vm_config()); 2369 assert!(vmm 2370 .vm_config 2371 .as_ref() 2372 .unwrap() 2373 .lock() 2374 .unwrap() 2375 .disks 2376 .is_none()); 2377 2378 let result = vmm.vm_add_disk(disk_config.clone()); 2379 assert!(result.is_ok()); 2380 assert!(result.unwrap().is_none()); 2381 assert_eq!( 2382 vmm.vm_config 2383 .as_ref() 2384 .unwrap() 2385 .lock() 2386 .unwrap() 2387 .disks 2388 .clone() 2389 .unwrap() 2390 .len(), 2391 1 2392 ); 2393 assert_eq!( 2394 vmm.vm_config 2395 .as_ref() 2396 .unwrap() 2397 .lock() 2398 .unwrap() 2399 .disks 2400 .clone() 2401 .unwrap()[0], 2402 disk_config 2403 ); 2404 } 2405 2406 #[test] 2407 fn test_vmm_vm_cold_add_fs() { 2408 let mut vmm = create_dummy_vmm(); 2409 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2410 2411 assert!(matches!( 2412 vmm.vm_add_fs(fs_config.clone()), 2413 Err(VmError::VmNotCreated) 2414 )); 2415 2416 let _ = vmm.vm_create(create_dummy_vm_config()); 2417 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2418 2419 let result = vmm.vm_add_fs(fs_config.clone()); 2420 assert!(result.is_ok()); 2421 assert!(result.unwrap().is_none()); 2422 assert_eq!( 2423 vmm.vm_config 2424 .as_ref() 2425 .unwrap() 2426 .lock() 2427 .unwrap() 2428 .fs 2429 .clone() 2430 .unwrap() 2431 .len(), 2432 1 2433 ); 2434 assert_eq!( 2435 vmm.vm_config 2436 .as_ref() 2437 .unwrap() 2438 .lock() 2439 .unwrap() 2440 .fs 2441 .clone() 2442 .unwrap()[0], 2443 fs_config 2444 ); 2445 } 2446 2447 #[test] 2448 fn test_vmm_vm_cold_add_pmem() { 2449 let mut vmm = create_dummy_vmm(); 2450 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2451 2452 assert!(matches!( 2453 vmm.vm_add_pmem(pmem_config.clone()), 2454 Err(VmError::VmNotCreated) 2455 )); 2456 2457 let _ = vmm.vm_create(create_dummy_vm_config()); 2458 assert!(vmm 2459 .vm_config 2460 .as_ref() 2461 .unwrap() 2462 .lock() 2463 .unwrap() 2464 .pmem 2465 .is_none()); 2466 2467 let result = vmm.vm_add_pmem(pmem_config.clone()); 2468 assert!(result.is_ok()); 2469 assert!(result.unwrap().is_none()); 2470 assert_eq!( 2471 vmm.vm_config 2472 .as_ref() 2473 .unwrap() 2474 .lock() 2475 .unwrap() 2476 .pmem 2477 .clone() 2478 .unwrap() 2479 .len(), 2480 1 2481 ); 2482 assert_eq!( 2483 vmm.vm_config 2484 .as_ref() 2485 .unwrap() 2486 .lock() 2487 .unwrap() 2488 .pmem 2489 .clone() 2490 .unwrap()[0], 2491 pmem_config 2492 ); 2493 } 2494 2495 #[test] 2496 fn test_vmm_vm_cold_add_net() { 2497 let mut vmm = create_dummy_vmm(); 2498 let net_config = NetConfig::parse( 2499 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2500 ) 2501 .unwrap(); 2502 2503 assert!(matches!( 2504 vmm.vm_add_net(net_config.clone()), 2505 Err(VmError::VmNotCreated) 2506 )); 2507 2508 let _ = vmm.vm_create(create_dummy_vm_config()); 2509 assert!(vmm 2510 .vm_config 2511 .as_ref() 2512 .unwrap() 2513 .lock() 2514 .unwrap() 2515 .net 2516 .is_none()); 2517 2518 let result = vmm.vm_add_net(net_config.clone()); 2519 assert!(result.is_ok()); 2520 assert!(result.unwrap().is_none()); 2521 assert_eq!( 2522 vmm.vm_config 2523 .as_ref() 2524 .unwrap() 2525 .lock() 2526 .unwrap() 2527 .net 2528 .clone() 2529 .unwrap() 2530 .len(), 2531 1 2532 ); 2533 assert_eq!( 2534 vmm.vm_config 2535 .as_ref() 2536 .unwrap() 2537 .lock() 2538 .unwrap() 2539 .net 2540 .clone() 2541 .unwrap()[0], 2542 net_config 2543 ); 2544 } 2545 2546 #[test] 2547 fn test_vmm_vm_cold_add_vdpa() { 2548 let mut vmm = create_dummy_vmm(); 2549 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2550 2551 assert!(matches!( 2552 vmm.vm_add_vdpa(vdpa_config.clone()), 2553 Err(VmError::VmNotCreated) 2554 )); 2555 2556 let _ = vmm.vm_create(create_dummy_vm_config()); 2557 assert!(vmm 2558 .vm_config 2559 .as_ref() 2560 .unwrap() 2561 .lock() 2562 .unwrap() 2563 .vdpa 2564 .is_none()); 2565 2566 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2567 assert!(result.is_ok()); 2568 assert!(result.unwrap().is_none()); 2569 assert_eq!( 2570 vmm.vm_config 2571 .as_ref() 2572 .unwrap() 2573 .lock() 2574 .unwrap() 2575 .vdpa 2576 .clone() 2577 .unwrap() 2578 .len(), 2579 1 2580 ); 2581 assert_eq!( 2582 vmm.vm_config 2583 .as_ref() 2584 .unwrap() 2585 .lock() 2586 .unwrap() 2587 .vdpa 2588 .clone() 2589 .unwrap()[0], 2590 vdpa_config 2591 ); 2592 } 2593 2594 #[test] 2595 fn test_vmm_vm_cold_add_vsock() { 2596 let mut vmm = create_dummy_vmm(); 2597 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2598 2599 assert!(matches!( 2600 vmm.vm_add_vsock(vsock_config.clone()), 2601 Err(VmError::VmNotCreated) 2602 )); 2603 2604 let _ = vmm.vm_create(create_dummy_vm_config()); 2605 assert!(vmm 2606 .vm_config 2607 .as_ref() 2608 .unwrap() 2609 .lock() 2610 .unwrap() 2611 .vsock 2612 .is_none()); 2613 2614 let result = vmm.vm_add_vsock(vsock_config.clone()); 2615 assert!(result.is_ok()); 2616 assert!(result.unwrap().is_none()); 2617 assert_eq!( 2618 vmm.vm_config 2619 .as_ref() 2620 .unwrap() 2621 .lock() 2622 .unwrap() 2623 .vsock 2624 .clone() 2625 .unwrap(), 2626 vsock_config 2627 ); 2628 } 2629 } 2630