1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io; 14 use std::io::{stdout, Read, Write}; 15 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 16 use std::os::unix::net::UnixListener; 17 use std::os::unix::net::UnixStream; 18 use std::panic::AssertUnwindSafe; 19 use std::path::PathBuf; 20 use std::rc::Rc; 21 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 22 use std::sync::{Arc, Mutex}; 23 use std::time::Instant; 24 use std::{result, thread}; 25 26 use anyhow::anyhow; 27 #[cfg(feature = "dbus_api")] 28 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 29 use api::http::HttpApiHandle; 30 use console_devices::{pre_create_console_devices, ConsoleInfo}; 31 use landlock::LandlockError; 32 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 33 use memory_manager::MemoryManagerSnapshotData; 34 use pci::PciBdf; 35 use seccompiler::{apply_filter, SeccompAction}; 36 use serde::ser::{SerializeStruct, Serializer}; 37 use serde::{Deserialize, Serialize}; 38 use signal_hook::iterator::{Handle, Signals}; 39 use thiserror::Error; 40 use tracer::trace_scoped; 41 use vm_memory::bitmap::AtomicBitmap; 42 use vm_memory::{ReadVolatile, WriteVolatile}; 43 use vm_migration::{protocol::*, Migratable}; 44 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 45 use vmm_sys_util::eventfd::EventFd; 46 use vmm_sys_util::signal::unblock_signal; 47 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 48 49 use crate::api::{ 50 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 51 VmSendMigrationData, VmmPingResponse, 52 }; 53 use crate::config::{ 54 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 55 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 56 }; 57 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 58 use crate::coredump::GuestDebuggable; 59 use crate::landlock::Landlock; 60 use crate::memory_manager::MemoryManager; 61 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 62 use crate::migration::get_vm_snapshot; 63 use crate::migration::{recv_vm_config, recv_vm_state}; 64 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 65 use crate::vm::{Error as VmError, Vm, VmState}; 66 67 mod acpi; 68 pub mod api; 69 mod clone3; 70 pub mod config; 71 pub mod console_devices; 72 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 73 mod coredump; 74 pub mod cpu; 75 pub mod device_manager; 76 pub mod device_tree; 77 #[cfg(feature = "guest_debug")] 78 mod gdb; 79 #[cfg(feature = "igvm")] 80 mod igvm; 81 pub mod interrupt; 82 pub mod landlock; 83 pub mod memory_manager; 84 pub mod migration; 85 mod pci_segment; 86 pub mod seccomp_filters; 87 mod serial_manager; 88 mod sigwinch_listener; 89 pub mod vm; 90 pub mod vm_config; 91 92 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 93 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 94 95 /// Errors associated with VMM management 96 #[derive(Debug, Error)] 97 pub enum Error { 98 /// API request receive error 99 #[error("Error receiving API request: {0}")] 100 ApiRequestRecv(#[source] RecvError), 101 102 /// API response send error 103 #[error("Error sending API request: {0}")] 104 ApiResponseSend(#[source] SendError<ApiResponse>), 105 106 /// Cannot bind to the UNIX domain socket path 107 #[error("Error binding to UNIX domain socket: {0}")] 108 Bind(#[source] io::Error), 109 110 /// Cannot clone EventFd. 111 #[error("Error cloning EventFd: {0}")] 112 EventFdClone(#[source] io::Error), 113 114 /// Cannot create EventFd. 115 #[error("Error creating EventFd: {0}")] 116 EventFdCreate(#[source] io::Error), 117 118 /// Cannot read from EventFd. 119 #[error("Error reading from EventFd: {0}")] 120 EventFdRead(#[source] io::Error), 121 122 /// Cannot create epoll context. 123 #[error("Error creating epoll context: {0}")] 124 Epoll(#[source] io::Error), 125 126 /// Cannot create HTTP thread 127 #[error("Error spawning HTTP thread: {0}")] 128 HttpThreadSpawn(#[source] io::Error), 129 130 /// Cannot create D-Bus thread 131 #[cfg(feature = "dbus_api")] 132 #[error("Error spawning D-Bus thread: {0}")] 133 DBusThreadSpawn(#[source] io::Error), 134 135 /// Cannot start D-Bus session 136 #[cfg(feature = "dbus_api")] 137 #[error("Error starting D-Bus session: {0}")] 138 CreateDBusSession(#[source] zbus::Error), 139 140 /// Cannot create `event-monitor` thread 141 #[error("Error spawning `event-monitor` thread: {0}")] 142 EventMonitorThreadSpawn(#[source] io::Error), 143 144 /// Cannot handle the VM STDIN stream 145 #[error("Error handling VM stdin: {0:?}")] 146 Stdin(VmError), 147 148 /// Cannot handle the VM pty stream 149 #[error("Error handling VM pty: {0:?}")] 150 Pty(VmError), 151 152 /// Cannot reboot the VM 153 #[error("Error rebooting VM: {0:?}")] 154 VmReboot(VmError), 155 156 /// Cannot create VMM thread 157 #[error("Error spawning VMM thread {0:?}")] 158 VmmThreadSpawn(#[source] io::Error), 159 160 /// Cannot shut the VMM down 161 #[error("Error shutting down VMM: {0:?}")] 162 VmmShutdown(VmError), 163 164 /// Cannot create seccomp filter 165 #[error("Error creating seccomp filter: {0}")] 166 CreateSeccompFilter(seccompiler::Error), 167 168 /// Cannot apply seccomp filter 169 #[error("Error applying seccomp filter: {0}")] 170 ApplySeccompFilter(seccompiler::Error), 171 172 /// Error activating virtio devices 173 #[error("Error activating virtio devices: {0:?}")] 174 ActivateVirtioDevices(VmError), 175 176 /// Error creating API server 177 #[error("Error creating API server {0:?}")] 178 CreateApiServer(micro_http::ServerError), 179 180 /// Error binding API server socket 181 #[error("Error creation API server's socket {0:?}")] 182 CreateApiServerSocket(#[source] io::Error), 183 184 #[cfg(feature = "guest_debug")] 185 #[error("Failed to start the GDB thread: {0}")] 186 GdbThreadSpawn(io::Error), 187 188 /// GDB request receive error 189 #[cfg(feature = "guest_debug")] 190 #[error("Error receiving GDB request: {0}")] 191 GdbRequestRecv(#[source] RecvError), 192 193 /// GDB response send error 194 #[cfg(feature = "guest_debug")] 195 #[error("Error sending GDB request: {0}")] 196 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 197 198 #[error("Cannot spawn a signal handler thread: {0}")] 199 SignalHandlerSpawn(#[source] io::Error), 200 201 #[error("Failed to join on threads: {0:?}")] 202 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 203 204 /// Cannot create Landlock object 205 #[error("Error creating landlock object: {0}")] 206 CreateLandlock(LandlockError), 207 208 /// Cannot apply landlock based sandboxing 209 #[error("Error applying landlock: {0}")] 210 ApplyLandlock(LandlockError), 211 } 212 pub type Result<T> = result::Result<T, Error>; 213 214 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 215 #[repr(u64)] 216 pub enum EpollDispatch { 217 Exit = 0, 218 Reset = 1, 219 Api = 2, 220 ActivateVirtioDevices = 3, 221 Debug = 4, 222 Unknown, 223 } 224 225 impl From<u64> for EpollDispatch { 226 fn from(v: u64) -> Self { 227 use EpollDispatch::*; 228 match v { 229 0 => Exit, 230 1 => Reset, 231 2 => Api, 232 3 => ActivateVirtioDevices, 233 4 => Debug, 234 _ => Unknown, 235 } 236 } 237 } 238 239 pub struct EpollContext { 240 epoll_file: File, 241 } 242 243 impl EpollContext { 244 pub fn new() -> result::Result<EpollContext, io::Error> { 245 let epoll_fd = epoll::create(true)?; 246 // Use 'File' to enforce closing on 'epoll_fd' 247 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 248 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 249 250 Ok(EpollContext { epoll_file }) 251 } 252 253 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 254 where 255 T: AsRawFd, 256 { 257 let dispatch_index = token as u64; 258 epoll::ctl( 259 self.epoll_file.as_raw_fd(), 260 epoll::ControlOptions::EPOLL_CTL_ADD, 261 fd.as_raw_fd(), 262 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 263 )?; 264 265 Ok(()) 266 } 267 268 #[cfg(fuzzing)] 269 pub fn add_event_custom<T>( 270 &mut self, 271 fd: &T, 272 id: u64, 273 evts: epoll::Events, 274 ) -> result::Result<(), io::Error> 275 where 276 T: AsRawFd, 277 { 278 epoll::ctl( 279 self.epoll_file.as_raw_fd(), 280 epoll::ControlOptions::EPOLL_CTL_ADD, 281 fd.as_raw_fd(), 282 epoll::Event::new(evts, id), 283 )?; 284 285 Ok(()) 286 } 287 } 288 289 impl AsRawFd for EpollContext { 290 fn as_raw_fd(&self) -> RawFd { 291 self.epoll_file.as_raw_fd() 292 } 293 } 294 295 pub struct PciDeviceInfo { 296 pub id: String, 297 pub bdf: PciBdf, 298 } 299 300 impl Serialize for PciDeviceInfo { 301 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 302 where 303 S: Serializer, 304 { 305 let bdf_str = self.bdf.to_string(); 306 307 // Serialize the structure. 308 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 309 state.serialize_field("id", &self.id)?; 310 state.serialize_field("bdf", &bdf_str)?; 311 state.end() 312 } 313 } 314 315 pub fn feature_list() -> Vec<String> { 316 vec![ 317 #[cfg(feature = "dbus_api")] 318 "dbus_api".to_string(), 319 #[cfg(feature = "dhat-heap")] 320 "dhat-heap".to_string(), 321 #[cfg(feature = "guest_debug")] 322 "guest_debug".to_string(), 323 #[cfg(feature = "igvm")] 324 "igvm".to_string(), 325 #[cfg(feature = "io_uring")] 326 "io_uring".to_string(), 327 #[cfg(feature = "kvm")] 328 "kvm".to_string(), 329 #[cfg(feature = "mshv")] 330 "mshv".to_string(), 331 #[cfg(feature = "sev_snp")] 332 "sev_snp".to_string(), 333 #[cfg(feature = "tdx")] 334 "tdx".to_string(), 335 #[cfg(feature = "tracing")] 336 "tracing".to_string(), 337 ] 338 } 339 340 pub fn start_event_monitor_thread( 341 mut monitor: event_monitor::Monitor, 342 seccomp_action: &SeccompAction, 343 landlock_enable: bool, 344 hypervisor_type: hypervisor::HypervisorType, 345 exit_event: EventFd, 346 ) -> Result<thread::JoinHandle<Result<()>>> { 347 // Retrieve seccomp filter 348 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 349 .map_err(Error::CreateSeccompFilter)?; 350 351 thread::Builder::new() 352 .name("event-monitor".to_owned()) 353 .spawn(move || { 354 // Apply seccomp filter 355 if !seccomp_filter.is_empty() { 356 apply_filter(&seccomp_filter) 357 .map_err(Error::ApplySeccompFilter) 358 .map_err(|e| { 359 error!("Error applying seccomp filter: {:?}", e); 360 exit_event.write(1).ok(); 361 e 362 })?; 363 } 364 if landlock_enable { 365 Landlock::new() 366 .map_err(Error::CreateLandlock)? 367 .restrict_self() 368 .map_err(Error::ApplyLandlock) 369 .map_err(|e| { 370 error!("Error applying landlock to event monitor thread: {:?}", e); 371 exit_event.write(1).ok(); 372 e 373 })?; 374 } 375 376 std::panic::catch_unwind(AssertUnwindSafe(move || { 377 while let Ok(event) = monitor.rx.recv() { 378 let event = Arc::new(event); 379 380 if let Some(ref mut file) = monitor.file { 381 file.write_all(event.as_bytes().as_ref()).ok(); 382 file.write_all(b"\n\n").ok(); 383 } 384 385 for tx in monitor.broadcast.iter() { 386 tx.send(event.clone()).ok(); 387 } 388 } 389 })) 390 .map_err(|_| { 391 error!("`event-monitor` thread panicked"); 392 exit_event.write(1).ok(); 393 }) 394 .ok(); 395 396 Ok(()) 397 }) 398 .map_err(Error::EventMonitorThreadSpawn) 399 } 400 401 #[allow(unused_variables)] 402 #[allow(clippy::too_many_arguments)] 403 pub fn start_vmm_thread( 404 vmm_version: VmmVersionInfo, 405 http_path: &Option<String>, 406 http_fd: Option<RawFd>, 407 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 408 api_event: EventFd, 409 api_sender: Sender<ApiRequest>, 410 api_receiver: Receiver<ApiRequest>, 411 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 412 #[cfg(feature = "guest_debug")] debug_event: EventFd, 413 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 414 exit_event: EventFd, 415 seccomp_action: &SeccompAction, 416 hypervisor: Arc<dyn hypervisor::Hypervisor>, 417 landlock_enable: bool, 418 ) -> Result<VmmThreadHandle> { 419 #[cfg(feature = "guest_debug")] 420 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 421 #[cfg(feature = "guest_debug")] 422 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 423 #[cfg(feature = "guest_debug")] 424 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 425 #[cfg(feature = "guest_debug")] 426 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 427 428 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 429 let hypervisor_type = hypervisor.hypervisor_type(); 430 431 // Retrieve seccomp filter 432 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 433 .map_err(Error::CreateSeccompFilter)?; 434 435 let vmm_seccomp_action = seccomp_action.clone(); 436 let thread = { 437 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 438 thread::Builder::new() 439 .name("vmm".to_string()) 440 .spawn(move || { 441 // Apply seccomp filter for VMM thread. 442 if !vmm_seccomp_filter.is_empty() { 443 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 444 } 445 446 let mut vmm = Vmm::new( 447 vmm_version, 448 api_event, 449 #[cfg(feature = "guest_debug")] 450 debug_event, 451 #[cfg(feature = "guest_debug")] 452 vm_debug_event, 453 vmm_seccomp_action, 454 hypervisor, 455 exit_event, 456 )?; 457 458 vmm.setup_signal_handler(landlock_enable)?; 459 460 vmm.control_loop( 461 Rc::new(api_receiver), 462 #[cfg(feature = "guest_debug")] 463 Rc::new(gdb_receiver), 464 ) 465 }) 466 .map_err(Error::VmmThreadSpawn)? 467 }; 468 469 // The VMM thread is started, we can start the dbus thread 470 // and start serving HTTP requests 471 #[cfg(feature = "dbus_api")] 472 let dbus_shutdown_chs = match dbus_options { 473 Some(opts) => { 474 let (_, chs) = api::start_dbus_thread( 475 opts, 476 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 477 api_sender.clone(), 478 seccomp_action, 479 exit_event.try_clone().map_err(Error::EventFdClone)?, 480 hypervisor_type, 481 )?; 482 Some(chs) 483 } 484 None => None, 485 }; 486 487 let http_api_handle = if let Some(http_path) = http_path { 488 Some(api::start_http_path_thread( 489 http_path, 490 api_event_clone, 491 api_sender, 492 seccomp_action, 493 exit_event, 494 hypervisor_type, 495 landlock_enable, 496 )?) 497 } else if let Some(http_fd) = http_fd { 498 Some(api::start_http_fd_thread( 499 http_fd, 500 api_event_clone, 501 api_sender, 502 seccomp_action, 503 exit_event, 504 hypervisor_type, 505 landlock_enable, 506 )?) 507 } else { 508 None 509 }; 510 511 #[cfg(feature = "guest_debug")] 512 if let Some(debug_path) = debug_path { 513 let target = gdb::GdbStub::new( 514 gdb_sender, 515 gdb_debug_event, 516 gdb_vm_debug_event, 517 gdb_hw_breakpoints, 518 ); 519 thread::Builder::new() 520 .name("gdb".to_owned()) 521 .spawn(move || gdb::gdb_thread(target, &debug_path)) 522 .map_err(Error::GdbThreadSpawn)?; 523 } 524 525 Ok(VmmThreadHandle { 526 thread_handle: thread, 527 #[cfg(feature = "dbus_api")] 528 dbus_shutdown_chs, 529 http_api_handle, 530 }) 531 } 532 533 #[derive(Clone, Deserialize, Serialize)] 534 struct VmMigrationConfig { 535 vm_config: Arc<Mutex<VmConfig>>, 536 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 537 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 538 memory_manager_data: MemoryManagerSnapshotData, 539 } 540 541 #[derive(Debug, Clone)] 542 pub struct VmmVersionInfo { 543 pub build_version: String, 544 pub version: String, 545 } 546 547 impl VmmVersionInfo { 548 pub fn new(build_version: &str, version: &str) -> Self { 549 Self { 550 build_version: build_version.to_owned(), 551 version: version.to_owned(), 552 } 553 } 554 } 555 556 pub struct VmmThreadHandle { 557 pub thread_handle: thread::JoinHandle<Result<()>>, 558 #[cfg(feature = "dbus_api")] 559 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 560 pub http_api_handle: Option<HttpApiHandle>, 561 } 562 563 pub struct Vmm { 564 epoll: EpollContext, 565 exit_evt: EventFd, 566 reset_evt: EventFd, 567 api_evt: EventFd, 568 #[cfg(feature = "guest_debug")] 569 debug_evt: EventFd, 570 #[cfg(feature = "guest_debug")] 571 vm_debug_evt: EventFd, 572 version: VmmVersionInfo, 573 vm: Option<Vm>, 574 vm_config: Option<Arc<Mutex<VmConfig>>>, 575 seccomp_action: SeccompAction, 576 hypervisor: Arc<dyn hypervisor::Hypervisor>, 577 activate_evt: EventFd, 578 signals: Option<Handle>, 579 threads: Vec<thread::JoinHandle<()>>, 580 original_termios_opt: Arc<Mutex<Option<termios>>>, 581 console_resize_pipe: Option<Arc<File>>, 582 console_info: Option<ConsoleInfo>, 583 } 584 585 impl Vmm { 586 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 587 588 fn signal_handler( 589 mut signals: Signals, 590 original_termios_opt: Arc<Mutex<Option<termios>>>, 591 exit_evt: &EventFd, 592 ) { 593 for sig in &Self::HANDLED_SIGNALS { 594 unblock_signal(*sig).unwrap(); 595 } 596 597 for signal in signals.forever() { 598 match signal { 599 SIGTERM | SIGINT => { 600 if exit_evt.write(1).is_err() { 601 // Resetting the terminal is usually done as the VMM exits 602 if let Ok(lock) = original_termios_opt.lock() { 603 if let Some(termios) = *lock { 604 // SAFETY: FFI call 605 let _ = unsafe { 606 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 607 }; 608 } 609 } else { 610 warn!("Failed to lock original termios"); 611 } 612 613 std::process::exit(1); 614 } 615 } 616 _ => (), 617 } 618 } 619 } 620 621 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 622 let signals = Signals::new(Self::HANDLED_SIGNALS); 623 match signals { 624 Ok(signals) => { 625 self.signals = Some(signals.handle()); 626 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 627 let original_termios_opt = Arc::clone(&self.original_termios_opt); 628 629 let signal_handler_seccomp_filter = get_seccomp_filter( 630 &self.seccomp_action, 631 Thread::SignalHandler, 632 self.hypervisor.hypervisor_type(), 633 ) 634 .map_err(Error::CreateSeccompFilter)?; 635 self.threads.push( 636 thread::Builder::new() 637 .name("vmm_signal_handler".to_string()) 638 .spawn(move || { 639 if !signal_handler_seccomp_filter.is_empty() { 640 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 641 .map_err(Error::ApplySeccompFilter) 642 { 643 error!("Error applying seccomp filter: {:?}", e); 644 exit_evt.write(1).ok(); 645 return; 646 } 647 } 648 if landlock_enable{ 649 match Landlock::new() { 650 Ok(landlock) => { 651 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 652 error!("Error applying Landlock to signal handler thread: {:?}", e); 653 exit_evt.write(1).ok(); 654 }); 655 } 656 Err(e) => { 657 error!("Error creating Landlock object: {:?}", e); 658 exit_evt.write(1).ok(); 659 } 660 }; 661 } 662 663 std::panic::catch_unwind(AssertUnwindSafe(|| { 664 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 665 })) 666 .map_err(|_| { 667 error!("vmm signal_handler thread panicked"); 668 exit_evt.write(1).ok() 669 }) 670 .ok(); 671 }) 672 .map_err(Error::SignalHandlerSpawn)?, 673 ); 674 } 675 Err(e) => error!("Signal not found {}", e), 676 } 677 Ok(()) 678 } 679 680 #[allow(clippy::too_many_arguments)] 681 fn new( 682 vmm_version: VmmVersionInfo, 683 api_evt: EventFd, 684 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 685 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 686 seccomp_action: SeccompAction, 687 hypervisor: Arc<dyn hypervisor::Hypervisor>, 688 exit_evt: EventFd, 689 ) -> Result<Self> { 690 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 691 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 692 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 693 694 epoll 695 .add_event(&exit_evt, EpollDispatch::Exit) 696 .map_err(Error::Epoll)?; 697 698 epoll 699 .add_event(&reset_evt, EpollDispatch::Reset) 700 .map_err(Error::Epoll)?; 701 702 epoll 703 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 704 .map_err(Error::Epoll)?; 705 706 epoll 707 .add_event(&api_evt, EpollDispatch::Api) 708 .map_err(Error::Epoll)?; 709 710 #[cfg(feature = "guest_debug")] 711 epoll 712 .add_event(&debug_evt, EpollDispatch::Debug) 713 .map_err(Error::Epoll)?; 714 715 Ok(Vmm { 716 epoll, 717 exit_evt, 718 reset_evt, 719 api_evt, 720 #[cfg(feature = "guest_debug")] 721 debug_evt, 722 #[cfg(feature = "guest_debug")] 723 vm_debug_evt, 724 version: vmm_version, 725 vm: None, 726 vm_config: None, 727 seccomp_action, 728 hypervisor, 729 activate_evt, 730 signals: None, 731 threads: vec![], 732 original_termios_opt: Arc::new(Mutex::new(None)), 733 console_resize_pipe: None, 734 console_info: None, 735 }) 736 } 737 738 fn vm_receive_config<T>( 739 &mut self, 740 req: &Request, 741 socket: &mut T, 742 existing_memory_files: Option<HashMap<u32, File>>, 743 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 744 where 745 T: Read + Write, 746 { 747 // Read in config data along with memory manager data 748 let mut data: Vec<u8> = Vec::new(); 749 data.resize_with(req.length() as usize, Default::default); 750 socket 751 .read_exact(&mut data) 752 .map_err(MigratableError::MigrateSocket)?; 753 754 let vm_migration_config: VmMigrationConfig = 755 serde_json::from_slice(&data).map_err(|e| { 756 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 757 })?; 758 759 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 760 self.vm_check_cpuid_compatibility( 761 &vm_migration_config.vm_config, 762 &vm_migration_config.common_cpuid, 763 )?; 764 765 let config = vm_migration_config.vm_config.clone(); 766 self.vm_config = Some(vm_migration_config.vm_config); 767 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 768 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 769 })?); 770 771 if self 772 .vm_config 773 .as_ref() 774 .unwrap() 775 .lock() 776 .unwrap() 777 .landlock_enable 778 { 779 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 780 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 781 })?; 782 } 783 784 let vm = Vm::create_hypervisor_vm( 785 &self.hypervisor, 786 #[cfg(feature = "tdx")] 787 false, 788 #[cfg(feature = "sev_snp")] 789 false, 790 ) 791 .map_err(|e| { 792 MigratableError::MigrateReceive(anyhow!( 793 "Error creating hypervisor VM from snapshot: {:?}", 794 e 795 )) 796 })?; 797 798 let phys_bits = 799 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 800 801 let memory_manager = MemoryManager::new( 802 vm, 803 &config.lock().unwrap().memory.clone(), 804 None, 805 phys_bits, 806 #[cfg(feature = "tdx")] 807 false, 808 Some(&vm_migration_config.memory_manager_data), 809 existing_memory_files, 810 #[cfg(target_arch = "x86_64")] 811 None, 812 ) 813 .map_err(|e| { 814 MigratableError::MigrateReceive(anyhow!( 815 "Error creating MemoryManager from snapshot: {:?}", 816 e 817 )) 818 })?; 819 820 Response::ok().write_to(socket)?; 821 822 Ok(memory_manager) 823 } 824 825 fn vm_receive_state<T>( 826 &mut self, 827 req: &Request, 828 socket: &mut T, 829 mm: Arc<Mutex<MemoryManager>>, 830 ) -> std::result::Result<(), MigratableError> 831 where 832 T: Read + Write, 833 { 834 // Read in state data 835 let mut data: Vec<u8> = Vec::new(); 836 data.resize_with(req.length() as usize, Default::default); 837 socket 838 .read_exact(&mut data) 839 .map_err(MigratableError::MigrateSocket)?; 840 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 841 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 842 })?; 843 844 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 845 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 846 })?; 847 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 848 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 849 })?; 850 #[cfg(feature = "guest_debug")] 851 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 852 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 853 })?; 854 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 855 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 856 })?; 857 858 let timestamp = Instant::now(); 859 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 860 let mut vm = Vm::new_from_memory_manager( 861 self.vm_config.clone().unwrap(), 862 mm, 863 hypervisor_vm, 864 exit_evt, 865 reset_evt, 866 #[cfg(feature = "guest_debug")] 867 debug_evt, 868 &self.seccomp_action, 869 self.hypervisor.clone(), 870 activate_evt, 871 timestamp, 872 self.console_info.clone(), 873 self.console_resize_pipe.as_ref().map(Arc::clone), 874 Arc::clone(&self.original_termios_opt), 875 Some(snapshot), 876 ) 877 .map_err(|e| { 878 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 879 })?; 880 881 // Create VM 882 vm.restore().map_err(|e| { 883 Response::error().write_to(socket).ok(); 884 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 885 })?; 886 self.vm = Some(vm); 887 888 Response::ok().write_to(socket)?; 889 890 Ok(()) 891 } 892 893 fn vm_receive_memory<T>( 894 &mut self, 895 req: &Request, 896 socket: &mut T, 897 memory_manager: &mut MemoryManager, 898 ) -> std::result::Result<(), MigratableError> 899 where 900 T: Read + ReadVolatile + Write, 901 { 902 // Read table 903 let table = MemoryRangeTable::read_from(socket, req.length())?; 904 905 // And then read the memory itself 906 memory_manager 907 .receive_memory_regions(&table, socket) 908 .inspect_err(|_| { 909 Response::error().write_to(socket).ok(); 910 })?; 911 Response::ok().write_to(socket)?; 912 Ok(()) 913 } 914 915 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 916 url.strip_prefix("unix:") 917 .ok_or_else(|| { 918 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 919 }) 920 .map(|s| s.into()) 921 } 922 923 // Returns true if there were dirty pages to send 924 fn vm_maybe_send_dirty_pages<T>( 925 vm: &mut Vm, 926 socket: &mut T, 927 ) -> result::Result<bool, MigratableError> 928 where 929 T: Read + Write + WriteVolatile, 930 { 931 // Send (dirty) memory table 932 let table = vm.dirty_log()?; 933 934 // But if there are no regions go straight to pause 935 if table.regions().is_empty() { 936 return Ok(false); 937 } 938 939 Request::memory(table.length()).write_to(socket).unwrap(); 940 table.write_to(socket)?; 941 // And then the memory itself 942 vm.send_memory_regions(&table, socket)?; 943 Response::read_from(socket)?.ok_or_abandon( 944 socket, 945 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 946 )?; 947 948 Ok(true) 949 } 950 951 fn send_migration( 952 vm: &mut Vm, 953 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 954 dyn hypervisor::Hypervisor, 955 >, 956 send_data_migration: VmSendMigrationData, 957 ) -> result::Result<(), MigratableError> { 958 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 959 let mut socket = UnixStream::connect(path).map_err(|e| { 960 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 961 })?; 962 963 // Start the migration 964 Request::start().write_to(&mut socket)?; 965 Response::read_from(&mut socket)?.ok_or_abandon( 966 &mut socket, 967 MigratableError::MigrateSend(anyhow!("Error starting migration")), 968 )?; 969 970 // Send config 971 let vm_config = vm.get_config(); 972 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 973 let common_cpuid = { 974 #[cfg(feature = "tdx")] 975 if vm_config.lock().unwrap().is_tdx_enabled() { 976 return Err(MigratableError::MigrateSend(anyhow!( 977 "Live Migration is not supported when TDX is enabled" 978 ))); 979 }; 980 981 let amx = vm_config.lock().unwrap().cpus.features.amx; 982 let phys_bits = 983 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 984 arch::generate_common_cpuid( 985 &hypervisor, 986 &arch::CpuidConfig { 987 sgx_epc_sections: None, 988 phys_bits, 989 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 990 #[cfg(feature = "tdx")] 991 tdx: false, 992 amx, 993 }, 994 ) 995 .map_err(|e| { 996 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 997 })? 998 }; 999 1000 if send_data_migration.local { 1001 vm.send_memory_fds(&mut socket)?; 1002 } 1003 1004 let vm_migration_config = VmMigrationConfig { 1005 vm_config, 1006 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1007 common_cpuid, 1008 memory_manager_data: vm.memory_manager_data(), 1009 }; 1010 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1011 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1012 socket 1013 .write_all(&config_data) 1014 .map_err(MigratableError::MigrateSocket)?; 1015 Response::read_from(&mut socket)?.ok_or_abandon( 1016 &mut socket, 1017 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1018 )?; 1019 1020 // Let every Migratable object know about the migration being started. 1021 vm.start_migration()?; 1022 1023 if send_data_migration.local { 1024 // Now pause VM 1025 vm.pause()?; 1026 } else { 1027 // Start logging dirty pages 1028 vm.start_dirty_log()?; 1029 1030 // Send memory table 1031 let table = vm.memory_range_table()?; 1032 Request::memory(table.length()) 1033 .write_to(&mut socket) 1034 .unwrap(); 1035 table.write_to(&mut socket)?; 1036 // And then the memory itself 1037 vm.send_memory_regions(&table, &mut socket)?; 1038 Response::read_from(&mut socket)?.ok_or_abandon( 1039 &mut socket, 1040 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1041 )?; 1042 1043 // Try at most 5 passes of dirty memory sending 1044 const MAX_DIRTY_MIGRATIONS: usize = 5; 1045 for i in 0..MAX_DIRTY_MIGRATIONS { 1046 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1047 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1048 break; 1049 } 1050 } 1051 1052 // Now pause VM 1053 vm.pause()?; 1054 1055 // Send last batch of dirty pages 1056 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1057 1058 // Stop logging dirty pages 1059 vm.stop_dirty_log()?; 1060 } 1061 // Capture snapshot and send it 1062 let vm_snapshot = vm.snapshot()?; 1063 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1064 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1065 socket 1066 .write_all(&snapshot_data) 1067 .map_err(MigratableError::MigrateSocket)?; 1068 Response::read_from(&mut socket)?.ok_or_abandon( 1069 &mut socket, 1070 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1071 )?; 1072 // Complete the migration 1073 Request::complete().write_to(&mut socket)?; 1074 Response::read_from(&mut socket)?.ok_or_abandon( 1075 &mut socket, 1076 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1077 )?; 1078 1079 info!("Migration complete"); 1080 1081 // Let every Migratable object know about the migration being complete 1082 vm.complete_migration() 1083 } 1084 1085 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1086 fn vm_check_cpuid_compatibility( 1087 &self, 1088 src_vm_config: &Arc<Mutex<VmConfig>>, 1089 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1090 ) -> result::Result<(), MigratableError> { 1091 #[cfg(feature = "tdx")] 1092 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1093 return Err(MigratableError::MigrateReceive(anyhow!( 1094 "Live Migration is not supported when TDX is enabled" 1095 ))); 1096 }; 1097 1098 // We check the `CPUID` compatibility of between the source vm and destination, which is 1099 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1100 let dest_cpuid = &{ 1101 let vm_config = &src_vm_config.lock().unwrap(); 1102 1103 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1104 arch::generate_common_cpuid( 1105 &self.hypervisor.clone(), 1106 &arch::CpuidConfig { 1107 sgx_epc_sections: None, 1108 phys_bits, 1109 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1110 #[cfg(feature = "tdx")] 1111 tdx: false, 1112 amx: vm_config.cpus.features.amx, 1113 }, 1114 ) 1115 .map_err(|e| { 1116 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1117 })? 1118 }; 1119 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1120 MigratableError::MigrateReceive(anyhow!( 1121 "Error checking cpu feature compatibility': {:?}", 1122 e 1123 )) 1124 }) 1125 } 1126 1127 fn control_loop( 1128 &mut self, 1129 api_receiver: Rc<Receiver<ApiRequest>>, 1130 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1131 ) -> Result<()> { 1132 const EPOLL_EVENTS_LEN: usize = 100; 1133 1134 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1135 let epoll_fd = self.epoll.as_raw_fd(); 1136 1137 'outer: loop { 1138 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1139 Ok(res) => res, 1140 Err(e) => { 1141 if e.kind() == io::ErrorKind::Interrupted { 1142 // It's well defined from the epoll_wait() syscall 1143 // documentation that the epoll loop can be interrupted 1144 // before any of the requested events occurred or the 1145 // timeout expired. In both those cases, epoll_wait() 1146 // returns an error of type EINTR, but this should not 1147 // be considered as a regular error. Instead it is more 1148 // appropriate to retry, by calling into epoll_wait(). 1149 continue; 1150 } 1151 return Err(Error::Epoll(e)); 1152 } 1153 }; 1154 1155 for event in events.iter().take(num_events) { 1156 let dispatch_event: EpollDispatch = event.data.into(); 1157 match dispatch_event { 1158 EpollDispatch::Unknown => { 1159 let event = event.data; 1160 warn!("Unknown VMM loop event: {}", event); 1161 } 1162 EpollDispatch::Exit => { 1163 info!("VM exit event"); 1164 // Consume the event. 1165 self.exit_evt.read().map_err(Error::EventFdRead)?; 1166 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1167 1168 break 'outer; 1169 } 1170 EpollDispatch::Reset => { 1171 info!("VM reset event"); 1172 // Consume the event. 1173 self.reset_evt.read().map_err(Error::EventFdRead)?; 1174 self.vm_reboot().map_err(Error::VmReboot)?; 1175 } 1176 EpollDispatch::ActivateVirtioDevices => { 1177 if let Some(ref vm) = self.vm { 1178 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1179 info!( 1180 "Trying to activate pending virtio devices: count = {}", 1181 count 1182 ); 1183 vm.activate_virtio_devices() 1184 .map_err(Error::ActivateVirtioDevices)?; 1185 } 1186 } 1187 EpollDispatch::Api => { 1188 // Consume the events. 1189 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1190 // Read from the API receiver channel 1191 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1192 1193 if api_request(self)? { 1194 break 'outer; 1195 } 1196 } 1197 } 1198 #[cfg(feature = "guest_debug")] 1199 EpollDispatch::Debug => { 1200 // Consume the events. 1201 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1202 // Read from the API receiver channel 1203 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1204 1205 let response = if let Some(ref mut vm) = self.vm { 1206 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1207 } else { 1208 Err(VmError::VmNotRunning) 1209 } 1210 .map_err(gdb::Error::Vm); 1211 1212 gdb_request 1213 .sender 1214 .send(response) 1215 .map_err(Error::GdbResponseSend)?; 1216 } 1217 } 1218 #[cfg(not(feature = "guest_debug"))] 1219 EpollDispatch::Debug => {} 1220 } 1221 } 1222 } 1223 1224 // Trigger the termination of the signal_handler thread 1225 if let Some(signals) = self.signals.take() { 1226 signals.close(); 1227 } 1228 1229 // Wait for all the threads to finish 1230 for thread in self.threads.drain(..) { 1231 thread.join().map_err(Error::ThreadCleanup)? 1232 } 1233 1234 Ok(()) 1235 } 1236 } 1237 1238 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1239 vm_config.lock().unwrap().apply_landlock()?; 1240 Ok(()) 1241 } 1242 1243 impl RequestHandler for Vmm { 1244 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1245 // We only store the passed VM config. 1246 // The VM will be created when being asked to boot it. 1247 if self.vm_config.is_none() { 1248 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1249 self.console_info = 1250 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1251 1252 if self 1253 .vm_config 1254 .as_ref() 1255 .unwrap() 1256 .lock() 1257 .unwrap() 1258 .landlock_enable 1259 { 1260 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1261 .map_err(VmError::ApplyLandlock)?; 1262 } 1263 Ok(()) 1264 } else { 1265 Err(VmError::VmAlreadyCreated) 1266 } 1267 } 1268 1269 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1270 tracer::start(); 1271 info!("Booting VM"); 1272 event!("vm", "booting"); 1273 let r = { 1274 trace_scoped!("vm_boot"); 1275 // If we don't have a config, we cannot boot a VM. 1276 if self.vm_config.is_none() { 1277 return Err(VmError::VmMissingConfig); 1278 }; 1279 1280 // console_info is set to None in vm_shutdown. re-populate here if empty 1281 if self.console_info.is_none() { 1282 self.console_info = 1283 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1284 } 1285 1286 // Create a new VM if we don't have one yet. 1287 if self.vm.is_none() { 1288 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1289 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1290 #[cfg(feature = "guest_debug")] 1291 let vm_debug_evt = self 1292 .vm_debug_evt 1293 .try_clone() 1294 .map_err(VmError::EventFdClone)?; 1295 let activate_evt = self 1296 .activate_evt 1297 .try_clone() 1298 .map_err(VmError::EventFdClone)?; 1299 1300 if let Some(ref vm_config) = self.vm_config { 1301 let vm = Vm::new( 1302 Arc::clone(vm_config), 1303 exit_evt, 1304 reset_evt, 1305 #[cfg(feature = "guest_debug")] 1306 vm_debug_evt, 1307 &self.seccomp_action, 1308 self.hypervisor.clone(), 1309 activate_evt, 1310 self.console_info.clone(), 1311 self.console_resize_pipe.as_ref().map(Arc::clone), 1312 Arc::clone(&self.original_termios_opt), 1313 None, 1314 None, 1315 None, 1316 )?; 1317 1318 self.vm = Some(vm); 1319 } 1320 } 1321 1322 // Now we can boot the VM. 1323 if let Some(ref mut vm) = self.vm { 1324 vm.boot() 1325 } else { 1326 Err(VmError::VmNotCreated) 1327 } 1328 }; 1329 tracer::end(); 1330 if r.is_ok() { 1331 event!("vm", "booted"); 1332 } 1333 r 1334 } 1335 1336 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1337 if let Some(ref mut vm) = self.vm { 1338 vm.pause().map_err(VmError::Pause) 1339 } else { 1340 Err(VmError::VmNotRunning) 1341 } 1342 } 1343 1344 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1345 if let Some(ref mut vm) = self.vm { 1346 vm.resume().map_err(VmError::Resume) 1347 } else { 1348 Err(VmError::VmNotRunning) 1349 } 1350 } 1351 1352 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1353 if let Some(ref mut vm) = self.vm { 1354 // Drain console_info so that FDs are not reused 1355 let _ = self.console_info.take(); 1356 vm.snapshot() 1357 .map_err(VmError::Snapshot) 1358 .and_then(|snapshot| { 1359 vm.send(&snapshot, destination_url) 1360 .map_err(VmError::SnapshotSend) 1361 }) 1362 } else { 1363 Err(VmError::VmNotRunning) 1364 } 1365 } 1366 1367 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1368 if self.vm.is_some() || self.vm_config.is_some() { 1369 return Err(VmError::VmAlreadyCreated); 1370 } 1371 1372 let source_url = restore_cfg.source_url.as_path().to_str(); 1373 if source_url.is_none() { 1374 return Err(VmError::InvalidRestoreSourceUrl); 1375 } 1376 // Safe to unwrap as we checked it was Some(&str). 1377 let source_url = source_url.unwrap(); 1378 1379 let vm_config = Arc::new(Mutex::new( 1380 recv_vm_config(source_url).map_err(VmError::Restore)?, 1381 )); 1382 restore_cfg 1383 .validate(&vm_config.lock().unwrap().clone()) 1384 .map_err(VmError::ConfigValidation)?; 1385 1386 // Update VM's net configurations with new fds received for restore operation 1387 if let (Some(restored_nets), Some(vm_net_configs)) = 1388 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1389 { 1390 for net in restored_nets.iter() { 1391 for net_config in vm_net_configs.iter_mut() { 1392 // update only if the net dev is backed by FDs 1393 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1394 net_config.fds.clone_from(&net.fds); 1395 } 1396 } 1397 } 1398 } 1399 1400 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1401 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1402 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1403 1404 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1405 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1406 .map_err(VmError::Restore)?; 1407 1408 self.vm_config = Some(Arc::clone(&vm_config)); 1409 1410 // console_info is set to None in vm_snapshot. re-populate here if empty 1411 if self.console_info.is_none() { 1412 self.console_info = 1413 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1414 } 1415 1416 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1417 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1418 #[cfg(feature = "guest_debug")] 1419 let debug_evt = self 1420 .vm_debug_evt 1421 .try_clone() 1422 .map_err(VmError::EventFdClone)?; 1423 let activate_evt = self 1424 .activate_evt 1425 .try_clone() 1426 .map_err(VmError::EventFdClone)?; 1427 1428 let vm = Vm::new( 1429 vm_config, 1430 exit_evt, 1431 reset_evt, 1432 #[cfg(feature = "guest_debug")] 1433 debug_evt, 1434 &self.seccomp_action, 1435 self.hypervisor.clone(), 1436 activate_evt, 1437 self.console_info.clone(), 1438 self.console_resize_pipe.as_ref().map(Arc::clone), 1439 Arc::clone(&self.original_termios_opt), 1440 Some(snapshot), 1441 Some(source_url), 1442 Some(restore_cfg.prefault), 1443 )?; 1444 self.vm = Some(vm); 1445 1446 if self 1447 .vm_config 1448 .as_ref() 1449 .unwrap() 1450 .lock() 1451 .unwrap() 1452 .landlock_enable 1453 { 1454 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1455 .map_err(VmError::ApplyLandlock)?; 1456 } 1457 1458 // Now we can restore the rest of the VM. 1459 if let Some(ref mut vm) = self.vm { 1460 vm.restore() 1461 } else { 1462 Err(VmError::VmNotCreated) 1463 } 1464 } 1465 1466 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1467 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1468 if let Some(ref mut vm) = self.vm { 1469 vm.coredump(destination_url).map_err(VmError::Coredump) 1470 } else { 1471 Err(VmError::VmNotRunning) 1472 } 1473 } 1474 1475 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1476 let r = if let Some(ref mut vm) = self.vm.take() { 1477 // Drain console_info so that the FDs are not reused 1478 let _ = self.console_info.take(); 1479 vm.shutdown() 1480 } else { 1481 Err(VmError::VmNotRunning) 1482 }; 1483 1484 if r.is_ok() { 1485 event!("vm", "shutdown"); 1486 } 1487 1488 r 1489 } 1490 1491 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1492 event!("vm", "rebooting"); 1493 1494 // First we stop the current VM 1495 let config = if let Some(mut vm) = self.vm.take() { 1496 let config = vm.get_config(); 1497 vm.shutdown()?; 1498 config 1499 } else { 1500 return Err(VmError::VmNotCreated); 1501 }; 1502 1503 // vm.shutdown() closes all the console devices, so set console_info to None 1504 // so that the closed FD #s are not reused. 1505 let _ = self.console_info.take(); 1506 1507 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1508 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1509 #[cfg(feature = "guest_debug")] 1510 let debug_evt = self 1511 .vm_debug_evt 1512 .try_clone() 1513 .map_err(VmError::EventFdClone)?; 1514 let activate_evt = self 1515 .activate_evt 1516 .try_clone() 1517 .map_err(VmError::EventFdClone)?; 1518 1519 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1520 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1521 // during the boot process. 1522 if self.reset_evt.read().is_ok() { 1523 warn!("Spurious second reset event received. Ignoring."); 1524 } 1525 1526 self.console_info = 1527 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1528 1529 // Then we create the new VM 1530 let mut vm = Vm::new( 1531 config, 1532 exit_evt, 1533 reset_evt, 1534 #[cfg(feature = "guest_debug")] 1535 debug_evt, 1536 &self.seccomp_action, 1537 self.hypervisor.clone(), 1538 activate_evt, 1539 self.console_info.clone(), 1540 self.console_resize_pipe.as_ref().map(Arc::clone), 1541 Arc::clone(&self.original_termios_opt), 1542 None, 1543 None, 1544 None, 1545 )?; 1546 1547 // And we boot it 1548 vm.boot()?; 1549 1550 self.vm = Some(vm); 1551 1552 event!("vm", "rebooted"); 1553 1554 Ok(()) 1555 } 1556 1557 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1558 match &self.vm_config { 1559 Some(vm_config) => { 1560 let state = match &self.vm { 1561 Some(vm) => vm.get_state()?, 1562 None => VmState::Created, 1563 }; 1564 let config = vm_config.lock().unwrap().clone(); 1565 1566 let mut memory_actual_size = config.memory.total_size(); 1567 if let Some(vm) = &self.vm { 1568 memory_actual_size -= vm.balloon_size(); 1569 } 1570 1571 let device_tree = self 1572 .vm 1573 .as_ref() 1574 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1575 1576 Ok(VmInfoResponse { 1577 config: Box::new(config), 1578 state, 1579 memory_actual_size, 1580 device_tree, 1581 }) 1582 } 1583 None => Err(VmError::VmNotCreated), 1584 } 1585 } 1586 1587 fn vmm_ping(&self) -> VmmPingResponse { 1588 let VmmVersionInfo { 1589 build_version, 1590 version, 1591 } = self.version.clone(); 1592 1593 VmmPingResponse { 1594 build_version, 1595 version, 1596 pid: std::process::id() as i64, 1597 features: feature_list(), 1598 } 1599 } 1600 1601 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1602 if self.vm_config.is_none() { 1603 return Ok(()); 1604 } 1605 1606 // If a VM is booted, we first try to shut it down. 1607 if self.vm.is_some() { 1608 self.vm_shutdown()?; 1609 } 1610 1611 self.vm_config = None; 1612 1613 event!("vm", "deleted"); 1614 1615 Ok(()) 1616 } 1617 1618 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1619 self.vm_delete()?; 1620 event!("vmm", "shutdown"); 1621 Ok(()) 1622 } 1623 1624 fn vm_resize( 1625 &mut self, 1626 desired_vcpus: Option<u8>, 1627 desired_ram: Option<u64>, 1628 desired_balloon: Option<u64>, 1629 ) -> result::Result<(), VmError> { 1630 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1631 1632 if let Some(ref mut vm) = self.vm { 1633 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1634 error!("Error when resizing VM: {:?}", e); 1635 Err(e) 1636 } else { 1637 Ok(()) 1638 } 1639 } else { 1640 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1641 if let Some(desired_vcpus) = desired_vcpus { 1642 config.cpus.boot_vcpus = desired_vcpus; 1643 } 1644 if let Some(desired_ram) = desired_ram { 1645 config.memory.size = desired_ram; 1646 } 1647 if let Some(desired_balloon) = desired_balloon { 1648 if let Some(balloon_config) = &mut config.balloon { 1649 balloon_config.size = desired_balloon; 1650 } 1651 } 1652 Ok(()) 1653 } 1654 } 1655 1656 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1657 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1658 1659 if let Some(ref mut vm) = self.vm { 1660 if let Err(e) = vm.resize_zone(id, desired_ram) { 1661 error!("Error when resizing VM: {:?}", e); 1662 Err(e) 1663 } else { 1664 Ok(()) 1665 } 1666 } else { 1667 // Update VmConfig by setting the new desired ram. 1668 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1669 1670 if let Some(zones) = &mut memory_config.zones { 1671 for zone in zones.iter_mut() { 1672 if zone.id == id { 1673 zone.size = desired_ram; 1674 return Ok(()); 1675 } 1676 } 1677 } 1678 1679 error!("Could not find the memory zone {} for the resize", id); 1680 Err(VmError::ResizeZone) 1681 } 1682 } 1683 1684 fn vm_add_device( 1685 &mut self, 1686 device_cfg: DeviceConfig, 1687 ) -> result::Result<Option<Vec<u8>>, VmError> { 1688 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1689 1690 { 1691 // Validate the configuration change in a cloned configuration 1692 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1693 add_to_config(&mut config.devices, device_cfg.clone()); 1694 config.validate().map_err(VmError::ConfigValidation)?; 1695 } 1696 1697 if let Some(ref mut vm) = self.vm { 1698 let info = vm.add_device(device_cfg).map_err(|e| { 1699 error!("Error when adding new device to the VM: {:?}", e); 1700 e 1701 })?; 1702 serde_json::to_vec(&info) 1703 .map(Some) 1704 .map_err(VmError::SerializeJson) 1705 } else { 1706 // Update VmConfig by adding the new device. 1707 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1708 add_to_config(&mut config.devices, device_cfg); 1709 Ok(None) 1710 } 1711 } 1712 1713 fn vm_add_user_device( 1714 &mut self, 1715 device_cfg: UserDeviceConfig, 1716 ) -> result::Result<Option<Vec<u8>>, VmError> { 1717 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1718 1719 { 1720 // Validate the configuration change in a cloned configuration 1721 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1722 add_to_config(&mut config.user_devices, device_cfg.clone()); 1723 config.validate().map_err(VmError::ConfigValidation)?; 1724 } 1725 1726 if let Some(ref mut vm) = self.vm { 1727 let info = vm.add_user_device(device_cfg).map_err(|e| { 1728 error!("Error when adding new user device to the VM: {:?}", e); 1729 e 1730 })?; 1731 serde_json::to_vec(&info) 1732 .map(Some) 1733 .map_err(VmError::SerializeJson) 1734 } else { 1735 // Update VmConfig by adding the new device. 1736 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1737 add_to_config(&mut config.user_devices, device_cfg); 1738 Ok(None) 1739 } 1740 } 1741 1742 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1743 if let Some(ref mut vm) = self.vm { 1744 if let Err(e) = vm.remove_device(id) { 1745 error!("Error when removing device from the VM: {:?}", e); 1746 Err(e) 1747 } else { 1748 Ok(()) 1749 } 1750 } else if let Some(ref config) = self.vm_config { 1751 let mut config = config.lock().unwrap(); 1752 if config.remove_device(&id) { 1753 Ok(()) 1754 } else { 1755 Err(VmError::NoDeviceToRemove(id)) 1756 } 1757 } else { 1758 Err(VmError::VmNotCreated) 1759 } 1760 } 1761 1762 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1763 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1764 1765 { 1766 // Validate the configuration change in a cloned configuration 1767 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1768 add_to_config(&mut config.disks, disk_cfg.clone()); 1769 config.validate().map_err(VmError::ConfigValidation)?; 1770 } 1771 1772 if let Some(ref mut vm) = self.vm { 1773 let info = vm.add_disk(disk_cfg).map_err(|e| { 1774 error!("Error when adding new disk to the VM: {:?}", e); 1775 e 1776 })?; 1777 serde_json::to_vec(&info) 1778 .map(Some) 1779 .map_err(VmError::SerializeJson) 1780 } else { 1781 // Update VmConfig by adding the new device. 1782 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1783 add_to_config(&mut config.disks, disk_cfg); 1784 Ok(None) 1785 } 1786 } 1787 1788 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1789 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1790 1791 { 1792 // Validate the configuration change in a cloned configuration 1793 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1794 add_to_config(&mut config.fs, fs_cfg.clone()); 1795 config.validate().map_err(VmError::ConfigValidation)?; 1796 } 1797 1798 if let Some(ref mut vm) = self.vm { 1799 let info = vm.add_fs(fs_cfg).map_err(|e| { 1800 error!("Error when adding new fs to the VM: {:?}", e); 1801 e 1802 })?; 1803 serde_json::to_vec(&info) 1804 .map(Some) 1805 .map_err(VmError::SerializeJson) 1806 } else { 1807 // Update VmConfig by adding the new device. 1808 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1809 add_to_config(&mut config.fs, fs_cfg); 1810 Ok(None) 1811 } 1812 } 1813 1814 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1815 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1816 1817 { 1818 // Validate the configuration change in a cloned configuration 1819 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1820 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1821 config.validate().map_err(VmError::ConfigValidation)?; 1822 } 1823 1824 if let Some(ref mut vm) = self.vm { 1825 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1826 error!("Error when adding new pmem device to the VM: {:?}", e); 1827 e 1828 })?; 1829 serde_json::to_vec(&info) 1830 .map(Some) 1831 .map_err(VmError::SerializeJson) 1832 } else { 1833 // Update VmConfig by adding the new device. 1834 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1835 add_to_config(&mut config.pmem, pmem_cfg); 1836 Ok(None) 1837 } 1838 } 1839 1840 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1841 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1842 1843 { 1844 // Validate the configuration change in a cloned configuration 1845 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1846 add_to_config(&mut config.net, net_cfg.clone()); 1847 config.validate().map_err(VmError::ConfigValidation)?; 1848 } 1849 1850 if let Some(ref mut vm) = self.vm { 1851 let info = vm.add_net(net_cfg).map_err(|e| { 1852 error!("Error when adding new network device to the VM: {:?}", e); 1853 e 1854 })?; 1855 serde_json::to_vec(&info) 1856 .map(Some) 1857 .map_err(VmError::SerializeJson) 1858 } else { 1859 // Update VmConfig by adding the new device. 1860 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1861 add_to_config(&mut config.net, net_cfg); 1862 Ok(None) 1863 } 1864 } 1865 1866 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1867 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1868 1869 { 1870 // Validate the configuration change in a cloned configuration 1871 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1872 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1873 config.validate().map_err(VmError::ConfigValidation)?; 1874 } 1875 1876 if let Some(ref mut vm) = self.vm { 1877 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1878 error!("Error when adding new vDPA device to the VM: {:?}", e); 1879 e 1880 })?; 1881 serde_json::to_vec(&info) 1882 .map(Some) 1883 .map_err(VmError::SerializeJson) 1884 } else { 1885 // Update VmConfig by adding the new device. 1886 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1887 add_to_config(&mut config.vdpa, vdpa_cfg); 1888 Ok(None) 1889 } 1890 } 1891 1892 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1893 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1894 1895 { 1896 // Validate the configuration change in a cloned configuration 1897 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1898 1899 if config.vsock.is_some() { 1900 return Err(VmError::TooManyVsockDevices); 1901 } 1902 1903 config.vsock = Some(vsock_cfg.clone()); 1904 config.validate().map_err(VmError::ConfigValidation)?; 1905 } 1906 1907 if let Some(ref mut vm) = self.vm { 1908 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1909 error!("Error when adding new vsock device to the VM: {:?}", e); 1910 e 1911 })?; 1912 serde_json::to_vec(&info) 1913 .map(Some) 1914 .map_err(VmError::SerializeJson) 1915 } else { 1916 // Update VmConfig by adding the new device. 1917 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1918 config.vsock = Some(vsock_cfg); 1919 Ok(None) 1920 } 1921 } 1922 1923 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1924 if let Some(ref mut vm) = self.vm { 1925 let info = vm.counters().map_err(|e| { 1926 error!("Error when getting counters from the VM: {:?}", e); 1927 e 1928 })?; 1929 serde_json::to_vec(&info) 1930 .map(Some) 1931 .map_err(VmError::SerializeJson) 1932 } else { 1933 Err(VmError::VmNotRunning) 1934 } 1935 } 1936 1937 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1938 if let Some(ref mut vm) = self.vm { 1939 vm.power_button() 1940 } else { 1941 Err(VmError::VmNotRunning) 1942 } 1943 } 1944 1945 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1946 if let Some(ref mut vm) = self.vm { 1947 vm.nmi() 1948 } else { 1949 Err(VmError::VmNotRunning) 1950 } 1951 } 1952 1953 fn vm_receive_migration( 1954 &mut self, 1955 receive_data_migration: VmReceiveMigrationData, 1956 ) -> result::Result<(), MigratableError> { 1957 info!( 1958 "Receiving migration: receiver_url = {}", 1959 receive_data_migration.receiver_url 1960 ); 1961 1962 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1963 let listener = UnixListener::bind(&path).map_err(|e| { 1964 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1965 })?; 1966 let (mut socket, _addr) = listener.accept().map_err(|e| { 1967 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1968 })?; 1969 std::fs::remove_file(&path).map_err(|e| { 1970 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1971 })?; 1972 1973 let mut started = false; 1974 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1975 let mut existing_memory_files = None; 1976 loop { 1977 let req = Request::read_from(&mut socket)?; 1978 match req.command() { 1979 Command::Invalid => info!("Invalid Command Received"), 1980 Command::Start => { 1981 info!("Start Command Received"); 1982 started = true; 1983 1984 Response::ok().write_to(&mut socket)?; 1985 } 1986 Command::Config => { 1987 info!("Config Command Received"); 1988 1989 if !started { 1990 warn!("Migration not started yet"); 1991 Response::error().write_to(&mut socket)?; 1992 continue; 1993 } 1994 memory_manager = Some(self.vm_receive_config( 1995 &req, 1996 &mut socket, 1997 existing_memory_files.take(), 1998 )?); 1999 } 2000 Command::State => { 2001 info!("State Command Received"); 2002 2003 if !started { 2004 warn!("Migration not started yet"); 2005 Response::error().write_to(&mut socket)?; 2006 continue; 2007 } 2008 if let Some(mm) = memory_manager.take() { 2009 self.vm_receive_state(&req, &mut socket, mm)?; 2010 } else { 2011 warn!("Configuration not sent yet"); 2012 Response::error().write_to(&mut socket)?; 2013 } 2014 } 2015 Command::Memory => { 2016 info!("Memory Command Received"); 2017 2018 if !started { 2019 warn!("Migration not started yet"); 2020 Response::error().write_to(&mut socket)?; 2021 continue; 2022 } 2023 if let Some(mm) = memory_manager.as_ref() { 2024 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2025 } else { 2026 warn!("Configuration not sent yet"); 2027 Response::error().write_to(&mut socket)?; 2028 } 2029 } 2030 Command::MemoryFd => { 2031 info!("MemoryFd Command Received"); 2032 2033 if !started { 2034 warn!("Migration not started yet"); 2035 Response::error().write_to(&mut socket)?; 2036 continue; 2037 } 2038 2039 let mut buf = [0u8; 4]; 2040 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2041 MigratableError::MigrateReceive(anyhow!( 2042 "Error receiving slot from socket: {}", 2043 e 2044 )) 2045 })?; 2046 2047 if existing_memory_files.is_none() { 2048 existing_memory_files = Some(HashMap::default()) 2049 } 2050 2051 if let Some(ref mut existing_memory_files) = existing_memory_files { 2052 let slot = u32::from_le_bytes(buf); 2053 existing_memory_files.insert(slot, file.unwrap()); 2054 } 2055 2056 Response::ok().write_to(&mut socket)?; 2057 } 2058 Command::Complete => { 2059 info!("Complete Command Received"); 2060 if let Some(ref mut vm) = self.vm.as_mut() { 2061 vm.resume()?; 2062 Response::ok().write_to(&mut socket)?; 2063 } else { 2064 warn!("VM not created yet"); 2065 Response::error().write_to(&mut socket)?; 2066 } 2067 break; 2068 } 2069 Command::Abandon => { 2070 info!("Abandon Command Received"); 2071 self.vm = None; 2072 self.vm_config = None; 2073 Response::ok().write_to(&mut socket).ok(); 2074 break; 2075 } 2076 } 2077 } 2078 2079 Ok(()) 2080 } 2081 2082 fn vm_send_migration( 2083 &mut self, 2084 send_data_migration: VmSendMigrationData, 2085 ) -> result::Result<(), MigratableError> { 2086 info!( 2087 "Sending migration: destination_url = {}, local = {}", 2088 send_data_migration.destination_url, send_data_migration.local 2089 ); 2090 2091 if !self 2092 .vm_config 2093 .as_ref() 2094 .unwrap() 2095 .lock() 2096 .unwrap() 2097 .backed_by_shared_memory() 2098 && send_data_migration.local 2099 { 2100 return Err(MigratableError::MigrateSend(anyhow!( 2101 "Local migration requires shared memory or hugepages enabled" 2102 ))); 2103 } 2104 2105 if let Some(vm) = self.vm.as_mut() { 2106 Self::send_migration( 2107 vm, 2108 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2109 self.hypervisor.clone(), 2110 send_data_migration, 2111 ) 2112 .map_err(|migration_err| { 2113 error!("Migration failed: {:?}", migration_err); 2114 2115 // Stop logging dirty pages 2116 if let Err(e) = vm.stop_dirty_log() { 2117 return e; 2118 } 2119 2120 if vm.get_state().unwrap() == VmState::Paused { 2121 if let Err(e) = vm.resume() { 2122 return e; 2123 } 2124 } 2125 2126 migration_err 2127 })?; 2128 2129 // Shutdown the VM after the migration succeeded 2130 self.exit_evt.write(1).map_err(|e| { 2131 MigratableError::MigrateSend(anyhow!( 2132 "Failed shutting down the VM after migration: {:?}", 2133 e 2134 )) 2135 }) 2136 } else { 2137 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2138 } 2139 } 2140 } 2141 2142 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2143 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2144 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2145 2146 #[cfg(test)] 2147 mod unit_tests { 2148 use config::{ 2149 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2150 RngConfig, 2151 }; 2152 2153 use super::*; 2154 #[cfg(target_arch = "x86_64")] 2155 use crate::config::DebugConsoleConfig; 2156 2157 fn create_dummy_vmm() -> Vmm { 2158 Vmm::new( 2159 VmmVersionInfo::new("dummy", "dummy"), 2160 EventFd::new(EFD_NONBLOCK).unwrap(), 2161 #[cfg(feature = "guest_debug")] 2162 EventFd::new(EFD_NONBLOCK).unwrap(), 2163 #[cfg(feature = "guest_debug")] 2164 EventFd::new(EFD_NONBLOCK).unwrap(), 2165 SeccompAction::Allow, 2166 hypervisor::new().unwrap(), 2167 EventFd::new(EFD_NONBLOCK).unwrap(), 2168 ) 2169 .unwrap() 2170 } 2171 2172 fn create_dummy_vm_config() -> Box<VmConfig> { 2173 Box::new(VmConfig { 2174 cpus: CpusConfig { 2175 boot_vcpus: 1, 2176 max_vcpus: 1, 2177 topology: None, 2178 kvm_hyperv: false, 2179 max_phys_bits: 46, 2180 affinity: None, 2181 features: config::CpuFeatures::default(), 2182 }, 2183 memory: MemoryConfig { 2184 size: 536_870_912, 2185 mergeable: false, 2186 hotplug_method: HotplugMethod::Acpi, 2187 hotplug_size: None, 2188 hotplugged_size: None, 2189 shared: true, 2190 hugepages: false, 2191 hugepage_size: None, 2192 prefault: false, 2193 zones: None, 2194 thp: true, 2195 }, 2196 payload: Some(PayloadConfig { 2197 kernel: Some(PathBuf::from("/path/to/kernel")), 2198 firmware: None, 2199 cmdline: None, 2200 initramfs: None, 2201 #[cfg(feature = "igvm")] 2202 igvm: None, 2203 #[cfg(feature = "sev_snp")] 2204 host_data: None, 2205 }), 2206 rate_limit_groups: None, 2207 disks: None, 2208 net: None, 2209 rng: RngConfig { 2210 src: PathBuf::from("/dev/urandom"), 2211 iommu: false, 2212 }, 2213 balloon: None, 2214 fs: None, 2215 pmem: None, 2216 serial: ConsoleConfig { 2217 file: None, 2218 mode: ConsoleOutputMode::Null, 2219 iommu: false, 2220 socket: None, 2221 }, 2222 console: ConsoleConfig { 2223 file: None, 2224 mode: ConsoleOutputMode::Tty, 2225 iommu: false, 2226 socket: None, 2227 }, 2228 #[cfg(target_arch = "x86_64")] 2229 debug_console: DebugConsoleConfig::default(), 2230 devices: None, 2231 user_devices: None, 2232 vdpa: None, 2233 vsock: None, 2234 #[cfg(feature = "pvmemcontrol")] 2235 pvmemcontrol: None, 2236 pvpanic: false, 2237 iommu: false, 2238 #[cfg(target_arch = "x86_64")] 2239 sgx_epc: None, 2240 numa: None, 2241 watchdog: false, 2242 #[cfg(feature = "guest_debug")] 2243 gdb: false, 2244 pci_segments: None, 2245 platform: None, 2246 tpm: None, 2247 preserved_fds: None, 2248 landlock_enable: false, 2249 landlock_rules: None, 2250 }) 2251 } 2252 2253 #[test] 2254 fn test_vmm_vm_create() { 2255 let mut vmm = create_dummy_vmm(); 2256 let config = create_dummy_vm_config(); 2257 2258 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2259 assert!(matches!( 2260 vmm.vm_create(config), 2261 Err(VmError::VmAlreadyCreated) 2262 )); 2263 } 2264 2265 #[test] 2266 fn test_vmm_vm_cold_add_device() { 2267 let mut vmm = create_dummy_vmm(); 2268 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2269 2270 assert!(matches!( 2271 vmm.vm_add_device(device_config.clone()), 2272 Err(VmError::VmNotCreated) 2273 )); 2274 2275 let _ = vmm.vm_create(create_dummy_vm_config()); 2276 assert!(vmm 2277 .vm_config 2278 .as_ref() 2279 .unwrap() 2280 .lock() 2281 .unwrap() 2282 .devices 2283 .is_none()); 2284 2285 let result = vmm.vm_add_device(device_config.clone()); 2286 assert!(result.is_ok()); 2287 assert!(result.unwrap().is_none()); 2288 assert_eq!( 2289 vmm.vm_config 2290 .as_ref() 2291 .unwrap() 2292 .lock() 2293 .unwrap() 2294 .devices 2295 .clone() 2296 .unwrap() 2297 .len(), 2298 1 2299 ); 2300 assert_eq!( 2301 vmm.vm_config 2302 .as_ref() 2303 .unwrap() 2304 .lock() 2305 .unwrap() 2306 .devices 2307 .clone() 2308 .unwrap()[0], 2309 device_config 2310 ); 2311 } 2312 2313 #[test] 2314 fn test_vmm_vm_cold_add_user_device() { 2315 let mut vmm = create_dummy_vmm(); 2316 let user_device_config = 2317 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2318 2319 assert!(matches!( 2320 vmm.vm_add_user_device(user_device_config.clone()), 2321 Err(VmError::VmNotCreated) 2322 )); 2323 2324 let _ = vmm.vm_create(create_dummy_vm_config()); 2325 assert!(vmm 2326 .vm_config 2327 .as_ref() 2328 .unwrap() 2329 .lock() 2330 .unwrap() 2331 .user_devices 2332 .is_none()); 2333 2334 let result = vmm.vm_add_user_device(user_device_config.clone()); 2335 assert!(result.is_ok()); 2336 assert!(result.unwrap().is_none()); 2337 assert_eq!( 2338 vmm.vm_config 2339 .as_ref() 2340 .unwrap() 2341 .lock() 2342 .unwrap() 2343 .user_devices 2344 .clone() 2345 .unwrap() 2346 .len(), 2347 1 2348 ); 2349 assert_eq!( 2350 vmm.vm_config 2351 .as_ref() 2352 .unwrap() 2353 .lock() 2354 .unwrap() 2355 .user_devices 2356 .clone() 2357 .unwrap()[0], 2358 user_device_config 2359 ); 2360 } 2361 2362 #[test] 2363 fn test_vmm_vm_cold_add_disk() { 2364 let mut vmm = create_dummy_vmm(); 2365 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2366 2367 assert!(matches!( 2368 vmm.vm_add_disk(disk_config.clone()), 2369 Err(VmError::VmNotCreated) 2370 )); 2371 2372 let _ = vmm.vm_create(create_dummy_vm_config()); 2373 assert!(vmm 2374 .vm_config 2375 .as_ref() 2376 .unwrap() 2377 .lock() 2378 .unwrap() 2379 .disks 2380 .is_none()); 2381 2382 let result = vmm.vm_add_disk(disk_config.clone()); 2383 assert!(result.is_ok()); 2384 assert!(result.unwrap().is_none()); 2385 assert_eq!( 2386 vmm.vm_config 2387 .as_ref() 2388 .unwrap() 2389 .lock() 2390 .unwrap() 2391 .disks 2392 .clone() 2393 .unwrap() 2394 .len(), 2395 1 2396 ); 2397 assert_eq!( 2398 vmm.vm_config 2399 .as_ref() 2400 .unwrap() 2401 .lock() 2402 .unwrap() 2403 .disks 2404 .clone() 2405 .unwrap()[0], 2406 disk_config 2407 ); 2408 } 2409 2410 #[test] 2411 fn test_vmm_vm_cold_add_fs() { 2412 let mut vmm = create_dummy_vmm(); 2413 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2414 2415 assert!(matches!( 2416 vmm.vm_add_fs(fs_config.clone()), 2417 Err(VmError::VmNotCreated) 2418 )); 2419 2420 let _ = vmm.vm_create(create_dummy_vm_config()); 2421 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2422 2423 let result = vmm.vm_add_fs(fs_config.clone()); 2424 assert!(result.is_ok()); 2425 assert!(result.unwrap().is_none()); 2426 assert_eq!( 2427 vmm.vm_config 2428 .as_ref() 2429 .unwrap() 2430 .lock() 2431 .unwrap() 2432 .fs 2433 .clone() 2434 .unwrap() 2435 .len(), 2436 1 2437 ); 2438 assert_eq!( 2439 vmm.vm_config 2440 .as_ref() 2441 .unwrap() 2442 .lock() 2443 .unwrap() 2444 .fs 2445 .clone() 2446 .unwrap()[0], 2447 fs_config 2448 ); 2449 } 2450 2451 #[test] 2452 fn test_vmm_vm_cold_add_pmem() { 2453 let mut vmm = create_dummy_vmm(); 2454 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2455 2456 assert!(matches!( 2457 vmm.vm_add_pmem(pmem_config.clone()), 2458 Err(VmError::VmNotCreated) 2459 )); 2460 2461 let _ = vmm.vm_create(create_dummy_vm_config()); 2462 assert!(vmm 2463 .vm_config 2464 .as_ref() 2465 .unwrap() 2466 .lock() 2467 .unwrap() 2468 .pmem 2469 .is_none()); 2470 2471 let result = vmm.vm_add_pmem(pmem_config.clone()); 2472 assert!(result.is_ok()); 2473 assert!(result.unwrap().is_none()); 2474 assert_eq!( 2475 vmm.vm_config 2476 .as_ref() 2477 .unwrap() 2478 .lock() 2479 .unwrap() 2480 .pmem 2481 .clone() 2482 .unwrap() 2483 .len(), 2484 1 2485 ); 2486 assert_eq!( 2487 vmm.vm_config 2488 .as_ref() 2489 .unwrap() 2490 .lock() 2491 .unwrap() 2492 .pmem 2493 .clone() 2494 .unwrap()[0], 2495 pmem_config 2496 ); 2497 } 2498 2499 #[test] 2500 fn test_vmm_vm_cold_add_net() { 2501 let mut vmm = create_dummy_vmm(); 2502 let net_config = NetConfig::parse( 2503 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2504 ) 2505 .unwrap(); 2506 2507 assert!(matches!( 2508 vmm.vm_add_net(net_config.clone()), 2509 Err(VmError::VmNotCreated) 2510 )); 2511 2512 let _ = vmm.vm_create(create_dummy_vm_config()); 2513 assert!(vmm 2514 .vm_config 2515 .as_ref() 2516 .unwrap() 2517 .lock() 2518 .unwrap() 2519 .net 2520 .is_none()); 2521 2522 let result = vmm.vm_add_net(net_config.clone()); 2523 assert!(result.is_ok()); 2524 assert!(result.unwrap().is_none()); 2525 assert_eq!( 2526 vmm.vm_config 2527 .as_ref() 2528 .unwrap() 2529 .lock() 2530 .unwrap() 2531 .net 2532 .clone() 2533 .unwrap() 2534 .len(), 2535 1 2536 ); 2537 assert_eq!( 2538 vmm.vm_config 2539 .as_ref() 2540 .unwrap() 2541 .lock() 2542 .unwrap() 2543 .net 2544 .clone() 2545 .unwrap()[0], 2546 net_config 2547 ); 2548 } 2549 2550 #[test] 2551 fn test_vmm_vm_cold_add_vdpa() { 2552 let mut vmm = create_dummy_vmm(); 2553 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2554 2555 assert!(matches!( 2556 vmm.vm_add_vdpa(vdpa_config.clone()), 2557 Err(VmError::VmNotCreated) 2558 )); 2559 2560 let _ = vmm.vm_create(create_dummy_vm_config()); 2561 assert!(vmm 2562 .vm_config 2563 .as_ref() 2564 .unwrap() 2565 .lock() 2566 .unwrap() 2567 .vdpa 2568 .is_none()); 2569 2570 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2571 assert!(result.is_ok()); 2572 assert!(result.unwrap().is_none()); 2573 assert_eq!( 2574 vmm.vm_config 2575 .as_ref() 2576 .unwrap() 2577 .lock() 2578 .unwrap() 2579 .vdpa 2580 .clone() 2581 .unwrap() 2582 .len(), 2583 1 2584 ); 2585 assert_eq!( 2586 vmm.vm_config 2587 .as_ref() 2588 .unwrap() 2589 .lock() 2590 .unwrap() 2591 .vdpa 2592 .clone() 2593 .unwrap()[0], 2594 vdpa_config 2595 ); 2596 } 2597 2598 #[test] 2599 fn test_vmm_vm_cold_add_vsock() { 2600 let mut vmm = create_dummy_vmm(); 2601 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2602 2603 assert!(matches!( 2604 vmm.vm_add_vsock(vsock_config.clone()), 2605 Err(VmError::VmNotCreated) 2606 )); 2607 2608 let _ = vmm.vm_create(create_dummy_vm_config()); 2609 assert!(vmm 2610 .vm_config 2611 .as_ref() 2612 .unwrap() 2613 .lock() 2614 .unwrap() 2615 .vsock 2616 .is_none()); 2617 2618 let result = vmm.vm_add_vsock(vsock_config.clone()); 2619 assert!(result.is_ok()); 2620 assert!(result.unwrap().is_none()); 2621 assert_eq!( 2622 vmm.vm_config 2623 .as_ref() 2624 .unwrap() 2625 .lock() 2626 .unwrap() 2627 .vsock 2628 .clone() 2629 .unwrap(), 2630 vsock_config 2631 ); 2632 } 2633 } 2634