1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 15 use std::os::unix::net::{UnixListener, UnixStream}; 16 use std::panic::AssertUnwindSafe; 17 use std::path::PathBuf; 18 use std::rc::Rc; 19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 use std::{io, result, thread}; 23 24 use anyhow::anyhow; 25 #[cfg(feature = "dbus_api")] 26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 27 use api::http::HttpApiHandle; 28 use console_devices::{pre_create_console_devices, ConsoleInfo}; 29 use landlock::LandlockError; 30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 31 use memory_manager::MemoryManagerSnapshotData; 32 use pci::PciBdf; 33 use seccompiler::{apply_filter, SeccompAction}; 34 use serde::ser::{SerializeStruct, Serializer}; 35 use serde::{Deserialize, Serialize}; 36 use signal_hook::iterator::{Handle, Signals}; 37 use thiserror::Error; 38 use tracer::trace_scoped; 39 use vm_memory::bitmap::AtomicBitmap; 40 use vm_memory::{ReadVolatile, WriteVolatile}; 41 use vm_migration::protocol::*; 42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 43 use vmm_sys_util::eventfd::EventFd; 44 use vmm_sys_util::signal::unblock_signal; 45 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 46 47 use crate::api::{ 48 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 49 VmSendMigrationData, VmmPingResponse, 50 }; 51 use crate::config::{add_to_config, RestoreConfig}; 52 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 53 use crate::coredump::GuestDebuggable; 54 use crate::landlock::Landlock; 55 use crate::memory_manager::MemoryManager; 56 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 57 use crate::migration::get_vm_snapshot; 58 use crate::migration::{recv_vm_config, recv_vm_state}; 59 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 60 use crate::vm::{Error as VmError, Vm, VmState}; 61 use crate::vm_config::{ 62 DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, 63 VmConfig, VsockConfig, 64 }; 65 66 mod acpi; 67 pub mod api; 68 mod clone3; 69 pub mod config; 70 pub mod console_devices; 71 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 72 mod coredump; 73 pub mod cpu; 74 pub mod device_manager; 75 pub mod device_tree; 76 #[cfg(feature = "guest_debug")] 77 mod gdb; 78 #[cfg(feature = "igvm")] 79 mod igvm; 80 pub mod interrupt; 81 pub mod landlock; 82 pub mod memory_manager; 83 pub mod migration; 84 mod pci_segment; 85 pub mod seccomp_filters; 86 mod serial_manager; 87 mod sigwinch_listener; 88 pub mod vm; 89 pub mod vm_config; 90 91 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 92 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 93 94 /// Errors associated with VMM management 95 #[derive(Debug, Error)] 96 pub enum Error { 97 /// API request receive error 98 #[error("Error receiving API request: {0}")] 99 ApiRequestRecv(#[source] RecvError), 100 101 /// API response send error 102 #[error("Error sending API request: {0}")] 103 ApiResponseSend(#[source] SendError<ApiResponse>), 104 105 /// Cannot bind to the UNIX domain socket path 106 #[error("Error binding to UNIX domain socket: {0}")] 107 Bind(#[source] io::Error), 108 109 /// Cannot clone EventFd. 110 #[error("Error cloning EventFd: {0}")] 111 EventFdClone(#[source] io::Error), 112 113 /// Cannot create EventFd. 114 #[error("Error creating EventFd: {0}")] 115 EventFdCreate(#[source] io::Error), 116 117 /// Cannot read from EventFd. 118 #[error("Error reading from EventFd: {0}")] 119 EventFdRead(#[source] io::Error), 120 121 /// Cannot create epoll context. 122 #[error("Error creating epoll context: {0}")] 123 Epoll(#[source] io::Error), 124 125 /// Cannot create HTTP thread 126 #[error("Error spawning HTTP thread: {0}")] 127 HttpThreadSpawn(#[source] io::Error), 128 129 /// Cannot create D-Bus thread 130 #[cfg(feature = "dbus_api")] 131 #[error("Error spawning D-Bus thread: {0}")] 132 DBusThreadSpawn(#[source] io::Error), 133 134 /// Cannot start D-Bus session 135 #[cfg(feature = "dbus_api")] 136 #[error("Error starting D-Bus session: {0}")] 137 CreateDBusSession(#[source] zbus::Error), 138 139 /// Cannot create `event-monitor` thread 140 #[error("Error spawning `event-monitor` thread: {0}")] 141 EventMonitorThreadSpawn(#[source] io::Error), 142 143 /// Cannot handle the VM STDIN stream 144 #[error("Error handling VM stdin: {0:?}")] 145 Stdin(VmError), 146 147 /// Cannot handle the VM pty stream 148 #[error("Error handling VM pty: {0:?}")] 149 Pty(VmError), 150 151 /// Cannot reboot the VM 152 #[error("Error rebooting VM: {0:?}")] 153 VmReboot(VmError), 154 155 /// Cannot create VMM thread 156 #[error("Error spawning VMM thread {0:?}")] 157 VmmThreadSpawn(#[source] io::Error), 158 159 /// Cannot shut the VMM down 160 #[error("Error shutting down VMM: {0:?}")] 161 VmmShutdown(VmError), 162 163 /// Cannot create seccomp filter 164 #[error("Error creating seccomp filter: {0}")] 165 CreateSeccompFilter(seccompiler::Error), 166 167 /// Cannot apply seccomp filter 168 #[error("Error applying seccomp filter: {0}")] 169 ApplySeccompFilter(seccompiler::Error), 170 171 /// Error activating virtio devices 172 #[error("Error activating virtio devices: {0:?}")] 173 ActivateVirtioDevices(VmError), 174 175 /// Error creating API server 176 #[error("Error creating API server {0:?}")] 177 CreateApiServer(micro_http::ServerError), 178 179 /// Error binding API server socket 180 #[error("Error creation API server's socket {0:?}")] 181 CreateApiServerSocket(#[source] io::Error), 182 183 #[cfg(feature = "guest_debug")] 184 #[error("Failed to start the GDB thread: {0}")] 185 GdbThreadSpawn(io::Error), 186 187 /// GDB request receive error 188 #[cfg(feature = "guest_debug")] 189 #[error("Error receiving GDB request: {0}")] 190 GdbRequestRecv(#[source] RecvError), 191 192 /// GDB response send error 193 #[cfg(feature = "guest_debug")] 194 #[error("Error sending GDB request: {0}")] 195 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 196 197 #[error("Cannot spawn a signal handler thread: {0}")] 198 SignalHandlerSpawn(#[source] io::Error), 199 200 #[error("Failed to join on threads: {0:?}")] 201 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 202 203 /// Cannot create Landlock object 204 #[error("Error creating landlock object: {0}")] 205 CreateLandlock(LandlockError), 206 207 /// Cannot apply landlock based sandboxing 208 #[error("Error applying landlock: {0}")] 209 ApplyLandlock(LandlockError), 210 } 211 pub type Result<T> = result::Result<T, Error>; 212 213 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 214 #[repr(u64)] 215 pub enum EpollDispatch { 216 Exit = 0, 217 Reset = 1, 218 Api = 2, 219 ActivateVirtioDevices = 3, 220 Debug = 4, 221 Unknown, 222 } 223 224 impl From<u64> for EpollDispatch { 225 fn from(v: u64) -> Self { 226 use EpollDispatch::*; 227 match v { 228 0 => Exit, 229 1 => Reset, 230 2 => Api, 231 3 => ActivateVirtioDevices, 232 4 => Debug, 233 _ => Unknown, 234 } 235 } 236 } 237 238 pub struct EpollContext { 239 epoll_file: File, 240 } 241 242 impl EpollContext { 243 pub fn new() -> result::Result<EpollContext, io::Error> { 244 let epoll_fd = epoll::create(true)?; 245 // Use 'File' to enforce closing on 'epoll_fd' 246 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 247 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 248 249 Ok(EpollContext { epoll_file }) 250 } 251 252 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 253 where 254 T: AsRawFd, 255 { 256 let dispatch_index = token as u64; 257 epoll::ctl( 258 self.epoll_file.as_raw_fd(), 259 epoll::ControlOptions::EPOLL_CTL_ADD, 260 fd.as_raw_fd(), 261 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 262 )?; 263 264 Ok(()) 265 } 266 267 #[cfg(fuzzing)] 268 pub fn add_event_custom<T>( 269 &mut self, 270 fd: &T, 271 id: u64, 272 evts: epoll::Events, 273 ) -> result::Result<(), io::Error> 274 where 275 T: AsRawFd, 276 { 277 epoll::ctl( 278 self.epoll_file.as_raw_fd(), 279 epoll::ControlOptions::EPOLL_CTL_ADD, 280 fd.as_raw_fd(), 281 epoll::Event::new(evts, id), 282 )?; 283 284 Ok(()) 285 } 286 } 287 288 impl AsRawFd for EpollContext { 289 fn as_raw_fd(&self) -> RawFd { 290 self.epoll_file.as_raw_fd() 291 } 292 } 293 294 pub struct PciDeviceInfo { 295 pub id: String, 296 pub bdf: PciBdf, 297 } 298 299 impl Serialize for PciDeviceInfo { 300 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 301 where 302 S: Serializer, 303 { 304 let bdf_str = self.bdf.to_string(); 305 306 // Serialize the structure. 307 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 308 state.serialize_field("id", &self.id)?; 309 state.serialize_field("bdf", &bdf_str)?; 310 state.end() 311 } 312 } 313 314 pub fn feature_list() -> Vec<String> { 315 vec![ 316 #[cfg(feature = "dbus_api")] 317 "dbus_api".to_string(), 318 #[cfg(feature = "dhat-heap")] 319 "dhat-heap".to_string(), 320 #[cfg(feature = "guest_debug")] 321 "guest_debug".to_string(), 322 #[cfg(feature = "igvm")] 323 "igvm".to_string(), 324 #[cfg(feature = "io_uring")] 325 "io_uring".to_string(), 326 #[cfg(feature = "kvm")] 327 "kvm".to_string(), 328 #[cfg(feature = "mshv")] 329 "mshv".to_string(), 330 #[cfg(feature = "sev_snp")] 331 "sev_snp".to_string(), 332 #[cfg(feature = "tdx")] 333 "tdx".to_string(), 334 #[cfg(feature = "tracing")] 335 "tracing".to_string(), 336 ] 337 } 338 339 pub fn start_event_monitor_thread( 340 mut monitor: event_monitor::Monitor, 341 seccomp_action: &SeccompAction, 342 landlock_enable: bool, 343 hypervisor_type: hypervisor::HypervisorType, 344 exit_event: EventFd, 345 ) -> Result<thread::JoinHandle<Result<()>>> { 346 // Retrieve seccomp filter 347 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 348 .map_err(Error::CreateSeccompFilter)?; 349 350 thread::Builder::new() 351 .name("event-monitor".to_owned()) 352 .spawn(move || { 353 // Apply seccomp filter 354 if !seccomp_filter.is_empty() { 355 apply_filter(&seccomp_filter) 356 .map_err(Error::ApplySeccompFilter) 357 .map_err(|e| { 358 error!("Error applying seccomp filter: {:?}", e); 359 exit_event.write(1).ok(); 360 e 361 })?; 362 } 363 if landlock_enable { 364 Landlock::new() 365 .map_err(Error::CreateLandlock)? 366 .restrict_self() 367 .map_err(Error::ApplyLandlock) 368 .map_err(|e| { 369 error!("Error applying landlock to event monitor thread: {:?}", e); 370 exit_event.write(1).ok(); 371 e 372 })?; 373 } 374 375 std::panic::catch_unwind(AssertUnwindSafe(move || { 376 while let Ok(event) = monitor.rx.recv() { 377 let event = Arc::new(event); 378 379 if let Some(ref mut file) = monitor.file { 380 file.write_all(event.as_bytes().as_ref()).ok(); 381 file.write_all(b"\n\n").ok(); 382 } 383 384 for tx in monitor.broadcast.iter() { 385 tx.send(event.clone()).ok(); 386 } 387 } 388 })) 389 .map_err(|_| { 390 error!("`event-monitor` thread panicked"); 391 exit_event.write(1).ok(); 392 }) 393 .ok(); 394 395 Ok(()) 396 }) 397 .map_err(Error::EventMonitorThreadSpawn) 398 } 399 400 #[allow(unused_variables)] 401 #[allow(clippy::too_many_arguments)] 402 pub fn start_vmm_thread( 403 vmm_version: VmmVersionInfo, 404 http_path: &Option<String>, 405 http_fd: Option<RawFd>, 406 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 407 api_event: EventFd, 408 api_sender: Sender<ApiRequest>, 409 api_receiver: Receiver<ApiRequest>, 410 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 411 #[cfg(feature = "guest_debug")] debug_event: EventFd, 412 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 413 exit_event: EventFd, 414 seccomp_action: &SeccompAction, 415 hypervisor: Arc<dyn hypervisor::Hypervisor>, 416 landlock_enable: bool, 417 ) -> Result<VmmThreadHandle> { 418 #[cfg(feature = "guest_debug")] 419 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 420 #[cfg(feature = "guest_debug")] 421 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 422 #[cfg(feature = "guest_debug")] 423 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 424 #[cfg(feature = "guest_debug")] 425 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 426 427 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 428 let hypervisor_type = hypervisor.hypervisor_type(); 429 430 // Retrieve seccomp filter 431 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 432 .map_err(Error::CreateSeccompFilter)?; 433 434 let vmm_seccomp_action = seccomp_action.clone(); 435 let thread = { 436 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 437 thread::Builder::new() 438 .name("vmm".to_string()) 439 .spawn(move || { 440 // Apply seccomp filter for VMM thread. 441 if !vmm_seccomp_filter.is_empty() { 442 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 443 } 444 445 let mut vmm = Vmm::new( 446 vmm_version, 447 api_event, 448 #[cfg(feature = "guest_debug")] 449 debug_event, 450 #[cfg(feature = "guest_debug")] 451 vm_debug_event, 452 vmm_seccomp_action, 453 hypervisor, 454 exit_event, 455 )?; 456 457 vmm.setup_signal_handler(landlock_enable)?; 458 459 vmm.control_loop( 460 Rc::new(api_receiver), 461 #[cfg(feature = "guest_debug")] 462 Rc::new(gdb_receiver), 463 ) 464 }) 465 .map_err(Error::VmmThreadSpawn)? 466 }; 467 468 // The VMM thread is started, we can start the dbus thread 469 // and start serving HTTP requests 470 #[cfg(feature = "dbus_api")] 471 let dbus_shutdown_chs = match dbus_options { 472 Some(opts) => { 473 let (_, chs) = api::start_dbus_thread( 474 opts, 475 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 476 api_sender.clone(), 477 seccomp_action, 478 exit_event.try_clone().map_err(Error::EventFdClone)?, 479 hypervisor_type, 480 )?; 481 Some(chs) 482 } 483 None => None, 484 }; 485 486 let http_api_handle = if let Some(http_path) = http_path { 487 Some(api::start_http_path_thread( 488 http_path, 489 api_event_clone, 490 api_sender, 491 seccomp_action, 492 exit_event, 493 hypervisor_type, 494 landlock_enable, 495 )?) 496 } else if let Some(http_fd) = http_fd { 497 Some(api::start_http_fd_thread( 498 http_fd, 499 api_event_clone, 500 api_sender, 501 seccomp_action, 502 exit_event, 503 hypervisor_type, 504 landlock_enable, 505 )?) 506 } else { 507 None 508 }; 509 510 #[cfg(feature = "guest_debug")] 511 if let Some(debug_path) = debug_path { 512 let target = gdb::GdbStub::new( 513 gdb_sender, 514 gdb_debug_event, 515 gdb_vm_debug_event, 516 gdb_hw_breakpoints, 517 ); 518 thread::Builder::new() 519 .name("gdb".to_owned()) 520 .spawn(move || gdb::gdb_thread(target, &debug_path)) 521 .map_err(Error::GdbThreadSpawn)?; 522 } 523 524 Ok(VmmThreadHandle { 525 thread_handle: thread, 526 #[cfg(feature = "dbus_api")] 527 dbus_shutdown_chs, 528 http_api_handle, 529 }) 530 } 531 532 #[derive(Clone, Deserialize, Serialize)] 533 struct VmMigrationConfig { 534 vm_config: Arc<Mutex<VmConfig>>, 535 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 536 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 537 memory_manager_data: MemoryManagerSnapshotData, 538 } 539 540 #[derive(Debug, Clone)] 541 pub struct VmmVersionInfo { 542 pub build_version: String, 543 pub version: String, 544 } 545 546 impl VmmVersionInfo { 547 pub fn new(build_version: &str, version: &str) -> Self { 548 Self { 549 build_version: build_version.to_owned(), 550 version: version.to_owned(), 551 } 552 } 553 } 554 555 pub struct VmmThreadHandle { 556 pub thread_handle: thread::JoinHandle<Result<()>>, 557 #[cfg(feature = "dbus_api")] 558 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 559 pub http_api_handle: Option<HttpApiHandle>, 560 } 561 562 pub struct Vmm { 563 epoll: EpollContext, 564 exit_evt: EventFd, 565 reset_evt: EventFd, 566 api_evt: EventFd, 567 #[cfg(feature = "guest_debug")] 568 debug_evt: EventFd, 569 #[cfg(feature = "guest_debug")] 570 vm_debug_evt: EventFd, 571 version: VmmVersionInfo, 572 vm: Option<Vm>, 573 vm_config: Option<Arc<Mutex<VmConfig>>>, 574 seccomp_action: SeccompAction, 575 hypervisor: Arc<dyn hypervisor::Hypervisor>, 576 activate_evt: EventFd, 577 signals: Option<Handle>, 578 threads: Vec<thread::JoinHandle<()>>, 579 original_termios_opt: Arc<Mutex<Option<termios>>>, 580 console_resize_pipe: Option<Arc<File>>, 581 console_info: Option<ConsoleInfo>, 582 } 583 584 impl Vmm { 585 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 586 587 fn signal_handler( 588 mut signals: Signals, 589 original_termios_opt: Arc<Mutex<Option<termios>>>, 590 exit_evt: &EventFd, 591 ) { 592 for sig in &Self::HANDLED_SIGNALS { 593 unblock_signal(*sig).unwrap(); 594 } 595 596 for signal in signals.forever() { 597 match signal { 598 SIGTERM | SIGINT => { 599 if exit_evt.write(1).is_err() { 600 // Resetting the terminal is usually done as the VMM exits 601 if let Ok(lock) = original_termios_opt.lock() { 602 if let Some(termios) = *lock { 603 // SAFETY: FFI call 604 let _ = unsafe { 605 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 606 }; 607 } 608 } else { 609 warn!("Failed to lock original termios"); 610 } 611 612 std::process::exit(1); 613 } 614 } 615 _ => (), 616 } 617 } 618 } 619 620 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 621 let signals = Signals::new(Self::HANDLED_SIGNALS); 622 match signals { 623 Ok(signals) => { 624 self.signals = Some(signals.handle()); 625 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 626 let original_termios_opt = Arc::clone(&self.original_termios_opt); 627 628 let signal_handler_seccomp_filter = get_seccomp_filter( 629 &self.seccomp_action, 630 Thread::SignalHandler, 631 self.hypervisor.hypervisor_type(), 632 ) 633 .map_err(Error::CreateSeccompFilter)?; 634 self.threads.push( 635 thread::Builder::new() 636 .name("vmm_signal_handler".to_string()) 637 .spawn(move || { 638 if !signal_handler_seccomp_filter.is_empty() { 639 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 640 .map_err(Error::ApplySeccompFilter) 641 { 642 error!("Error applying seccomp filter: {:?}", e); 643 exit_evt.write(1).ok(); 644 return; 645 } 646 } 647 if landlock_enable{ 648 match Landlock::new() { 649 Ok(landlock) => { 650 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 651 error!("Error applying Landlock to signal handler thread: {:?}", e); 652 exit_evt.write(1).ok(); 653 }); 654 } 655 Err(e) => { 656 error!("Error creating Landlock object: {:?}", e); 657 exit_evt.write(1).ok(); 658 } 659 }; 660 } 661 662 std::panic::catch_unwind(AssertUnwindSafe(|| { 663 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 664 })) 665 .map_err(|_| { 666 error!("vmm signal_handler thread panicked"); 667 exit_evt.write(1).ok() 668 }) 669 .ok(); 670 }) 671 .map_err(Error::SignalHandlerSpawn)?, 672 ); 673 } 674 Err(e) => error!("Signal not found {}", e), 675 } 676 Ok(()) 677 } 678 679 #[allow(clippy::too_many_arguments)] 680 fn new( 681 vmm_version: VmmVersionInfo, 682 api_evt: EventFd, 683 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 684 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 685 seccomp_action: SeccompAction, 686 hypervisor: Arc<dyn hypervisor::Hypervisor>, 687 exit_evt: EventFd, 688 ) -> Result<Self> { 689 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 690 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 691 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 692 693 epoll 694 .add_event(&exit_evt, EpollDispatch::Exit) 695 .map_err(Error::Epoll)?; 696 697 epoll 698 .add_event(&reset_evt, EpollDispatch::Reset) 699 .map_err(Error::Epoll)?; 700 701 epoll 702 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 703 .map_err(Error::Epoll)?; 704 705 epoll 706 .add_event(&api_evt, EpollDispatch::Api) 707 .map_err(Error::Epoll)?; 708 709 #[cfg(feature = "guest_debug")] 710 epoll 711 .add_event(&debug_evt, EpollDispatch::Debug) 712 .map_err(Error::Epoll)?; 713 714 Ok(Vmm { 715 epoll, 716 exit_evt, 717 reset_evt, 718 api_evt, 719 #[cfg(feature = "guest_debug")] 720 debug_evt, 721 #[cfg(feature = "guest_debug")] 722 vm_debug_evt, 723 version: vmm_version, 724 vm: None, 725 vm_config: None, 726 seccomp_action, 727 hypervisor, 728 activate_evt, 729 signals: None, 730 threads: vec![], 731 original_termios_opt: Arc::new(Mutex::new(None)), 732 console_resize_pipe: None, 733 console_info: None, 734 }) 735 } 736 737 fn vm_receive_config<T>( 738 &mut self, 739 req: &Request, 740 socket: &mut T, 741 existing_memory_files: Option<HashMap<u32, File>>, 742 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 743 where 744 T: Read + Write, 745 { 746 // Read in config data along with memory manager data 747 let mut data: Vec<u8> = Vec::new(); 748 data.resize_with(req.length() as usize, Default::default); 749 socket 750 .read_exact(&mut data) 751 .map_err(MigratableError::MigrateSocket)?; 752 753 let vm_migration_config: VmMigrationConfig = 754 serde_json::from_slice(&data).map_err(|e| { 755 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 756 })?; 757 758 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 759 self.vm_check_cpuid_compatibility( 760 &vm_migration_config.vm_config, 761 &vm_migration_config.common_cpuid, 762 )?; 763 764 let config = vm_migration_config.vm_config.clone(); 765 self.vm_config = Some(vm_migration_config.vm_config); 766 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 767 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 768 })?); 769 770 if self 771 .vm_config 772 .as_ref() 773 .unwrap() 774 .lock() 775 .unwrap() 776 .landlock_enable 777 { 778 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 779 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 780 })?; 781 } 782 783 let vm = Vm::create_hypervisor_vm( 784 &self.hypervisor, 785 #[cfg(feature = "tdx")] 786 false, 787 #[cfg(feature = "sev_snp")] 788 false, 789 ) 790 .map_err(|e| { 791 MigratableError::MigrateReceive(anyhow!( 792 "Error creating hypervisor VM from snapshot: {:?}", 793 e 794 )) 795 })?; 796 797 let phys_bits = 798 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 799 800 let memory_manager = MemoryManager::new( 801 vm, 802 &config.lock().unwrap().memory.clone(), 803 None, 804 phys_bits, 805 #[cfg(feature = "tdx")] 806 false, 807 Some(&vm_migration_config.memory_manager_data), 808 existing_memory_files, 809 #[cfg(target_arch = "x86_64")] 810 None, 811 ) 812 .map_err(|e| { 813 MigratableError::MigrateReceive(anyhow!( 814 "Error creating MemoryManager from snapshot: {:?}", 815 e 816 )) 817 })?; 818 819 Response::ok().write_to(socket)?; 820 821 Ok(memory_manager) 822 } 823 824 fn vm_receive_state<T>( 825 &mut self, 826 req: &Request, 827 socket: &mut T, 828 mm: Arc<Mutex<MemoryManager>>, 829 ) -> std::result::Result<(), MigratableError> 830 where 831 T: Read + Write, 832 { 833 // Read in state data 834 let mut data: Vec<u8> = Vec::new(); 835 data.resize_with(req.length() as usize, Default::default); 836 socket 837 .read_exact(&mut data) 838 .map_err(MigratableError::MigrateSocket)?; 839 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 840 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 841 })?; 842 843 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 844 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 845 })?; 846 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 847 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 848 })?; 849 #[cfg(feature = "guest_debug")] 850 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 851 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 852 })?; 853 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 854 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 855 })?; 856 857 let timestamp = Instant::now(); 858 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 859 let mut vm = Vm::new_from_memory_manager( 860 self.vm_config.clone().unwrap(), 861 mm, 862 hypervisor_vm, 863 exit_evt, 864 reset_evt, 865 #[cfg(feature = "guest_debug")] 866 debug_evt, 867 &self.seccomp_action, 868 self.hypervisor.clone(), 869 activate_evt, 870 timestamp, 871 self.console_info.clone(), 872 self.console_resize_pipe.clone(), 873 Arc::clone(&self.original_termios_opt), 874 Some(snapshot), 875 ) 876 .map_err(|e| { 877 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 878 })?; 879 880 // Create VM 881 vm.restore().map_err(|e| { 882 Response::error().write_to(socket).ok(); 883 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 884 })?; 885 self.vm = Some(vm); 886 887 Response::ok().write_to(socket)?; 888 889 Ok(()) 890 } 891 892 fn vm_receive_memory<T>( 893 &mut self, 894 req: &Request, 895 socket: &mut T, 896 memory_manager: &mut MemoryManager, 897 ) -> std::result::Result<(), MigratableError> 898 where 899 T: Read + ReadVolatile + Write, 900 { 901 // Read table 902 let table = MemoryRangeTable::read_from(socket, req.length())?; 903 904 // And then read the memory itself 905 memory_manager 906 .receive_memory_regions(&table, socket) 907 .inspect_err(|_| { 908 Response::error().write_to(socket).ok(); 909 })?; 910 Response::ok().write_to(socket)?; 911 Ok(()) 912 } 913 914 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 915 url.strip_prefix("unix:") 916 .ok_or_else(|| { 917 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 918 }) 919 .map(|s| s.into()) 920 } 921 922 // Returns true if there were dirty pages to send 923 fn vm_maybe_send_dirty_pages<T>( 924 vm: &mut Vm, 925 socket: &mut T, 926 ) -> result::Result<bool, MigratableError> 927 where 928 T: Read + Write + WriteVolatile, 929 { 930 // Send (dirty) memory table 931 let table = vm.dirty_log()?; 932 933 // But if there are no regions go straight to pause 934 if table.regions().is_empty() { 935 return Ok(false); 936 } 937 938 Request::memory(table.length()).write_to(socket).unwrap(); 939 table.write_to(socket)?; 940 // And then the memory itself 941 vm.send_memory_regions(&table, socket)?; 942 Response::read_from(socket)?.ok_or_abandon( 943 socket, 944 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 945 )?; 946 947 Ok(true) 948 } 949 950 fn send_migration( 951 vm: &mut Vm, 952 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 953 dyn hypervisor::Hypervisor, 954 >, 955 send_data_migration: VmSendMigrationData, 956 ) -> result::Result<(), MigratableError> { 957 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 958 let mut socket = UnixStream::connect(path).map_err(|e| { 959 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 960 })?; 961 962 // Start the migration 963 Request::start().write_to(&mut socket)?; 964 Response::read_from(&mut socket)?.ok_or_abandon( 965 &mut socket, 966 MigratableError::MigrateSend(anyhow!("Error starting migration")), 967 )?; 968 969 // Send config 970 let vm_config = vm.get_config(); 971 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 972 let common_cpuid = { 973 #[cfg(feature = "tdx")] 974 if vm_config.lock().unwrap().is_tdx_enabled() { 975 return Err(MigratableError::MigrateSend(anyhow!( 976 "Live Migration is not supported when TDX is enabled" 977 ))); 978 }; 979 980 let amx = vm_config.lock().unwrap().cpus.features.amx; 981 let phys_bits = 982 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 983 arch::generate_common_cpuid( 984 &hypervisor, 985 &arch::CpuidConfig { 986 sgx_epc_sections: None, 987 phys_bits, 988 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 989 #[cfg(feature = "tdx")] 990 tdx: false, 991 amx, 992 }, 993 ) 994 .map_err(|e| { 995 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 996 })? 997 }; 998 999 if send_data_migration.local { 1000 vm.send_memory_fds(&mut socket)?; 1001 } 1002 1003 let vm_migration_config = VmMigrationConfig { 1004 vm_config, 1005 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1006 common_cpuid, 1007 memory_manager_data: vm.memory_manager_data(), 1008 }; 1009 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1010 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1011 socket 1012 .write_all(&config_data) 1013 .map_err(MigratableError::MigrateSocket)?; 1014 Response::read_from(&mut socket)?.ok_or_abandon( 1015 &mut socket, 1016 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1017 )?; 1018 1019 // Let every Migratable object know about the migration being started. 1020 vm.start_migration()?; 1021 1022 if send_data_migration.local { 1023 // Now pause VM 1024 vm.pause()?; 1025 } else { 1026 // Start logging dirty pages 1027 vm.start_dirty_log()?; 1028 1029 // Send memory table 1030 let table = vm.memory_range_table()?; 1031 Request::memory(table.length()) 1032 .write_to(&mut socket) 1033 .unwrap(); 1034 table.write_to(&mut socket)?; 1035 // And then the memory itself 1036 vm.send_memory_regions(&table, &mut socket)?; 1037 Response::read_from(&mut socket)?.ok_or_abandon( 1038 &mut socket, 1039 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1040 )?; 1041 1042 // Try at most 5 passes of dirty memory sending 1043 const MAX_DIRTY_MIGRATIONS: usize = 5; 1044 for i in 0..MAX_DIRTY_MIGRATIONS { 1045 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1046 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1047 break; 1048 } 1049 } 1050 1051 // Now pause VM 1052 vm.pause()?; 1053 1054 // Send last batch of dirty pages 1055 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1056 1057 // Stop logging dirty pages 1058 vm.stop_dirty_log()?; 1059 } 1060 // Capture snapshot and send it 1061 let vm_snapshot = vm.snapshot()?; 1062 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1063 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1064 socket 1065 .write_all(&snapshot_data) 1066 .map_err(MigratableError::MigrateSocket)?; 1067 Response::read_from(&mut socket)?.ok_or_abandon( 1068 &mut socket, 1069 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1070 )?; 1071 // Complete the migration 1072 Request::complete().write_to(&mut socket)?; 1073 Response::read_from(&mut socket)?.ok_or_abandon( 1074 &mut socket, 1075 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1076 )?; 1077 1078 info!("Migration complete"); 1079 1080 // Let every Migratable object know about the migration being complete 1081 vm.complete_migration() 1082 } 1083 1084 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1085 fn vm_check_cpuid_compatibility( 1086 &self, 1087 src_vm_config: &Arc<Mutex<VmConfig>>, 1088 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1089 ) -> result::Result<(), MigratableError> { 1090 #[cfg(feature = "tdx")] 1091 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1092 return Err(MigratableError::MigrateReceive(anyhow!( 1093 "Live Migration is not supported when TDX is enabled" 1094 ))); 1095 }; 1096 1097 // We check the `CPUID` compatibility of between the source vm and destination, which is 1098 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1099 let dest_cpuid = &{ 1100 let vm_config = &src_vm_config.lock().unwrap(); 1101 1102 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1103 arch::generate_common_cpuid( 1104 &self.hypervisor.clone(), 1105 &arch::CpuidConfig { 1106 sgx_epc_sections: None, 1107 phys_bits, 1108 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1109 #[cfg(feature = "tdx")] 1110 tdx: false, 1111 amx: vm_config.cpus.features.amx, 1112 }, 1113 ) 1114 .map_err(|e| { 1115 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1116 })? 1117 }; 1118 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1119 MigratableError::MigrateReceive(anyhow!( 1120 "Error checking cpu feature compatibility': {:?}", 1121 e 1122 )) 1123 }) 1124 } 1125 1126 fn control_loop( 1127 &mut self, 1128 api_receiver: Rc<Receiver<ApiRequest>>, 1129 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1130 ) -> Result<()> { 1131 const EPOLL_EVENTS_LEN: usize = 100; 1132 1133 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1134 let epoll_fd = self.epoll.as_raw_fd(); 1135 1136 'outer: loop { 1137 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1138 Ok(res) => res, 1139 Err(e) => { 1140 if e.kind() == io::ErrorKind::Interrupted { 1141 // It's well defined from the epoll_wait() syscall 1142 // documentation that the epoll loop can be interrupted 1143 // before any of the requested events occurred or the 1144 // timeout expired. In both those cases, epoll_wait() 1145 // returns an error of type EINTR, but this should not 1146 // be considered as a regular error. Instead it is more 1147 // appropriate to retry, by calling into epoll_wait(). 1148 continue; 1149 } 1150 return Err(Error::Epoll(e)); 1151 } 1152 }; 1153 1154 for event in events.iter().take(num_events) { 1155 let dispatch_event: EpollDispatch = event.data.into(); 1156 match dispatch_event { 1157 EpollDispatch::Unknown => { 1158 let event = event.data; 1159 warn!("Unknown VMM loop event: {}", event); 1160 } 1161 EpollDispatch::Exit => { 1162 info!("VM exit event"); 1163 // Consume the event. 1164 self.exit_evt.read().map_err(Error::EventFdRead)?; 1165 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1166 1167 break 'outer; 1168 } 1169 EpollDispatch::Reset => { 1170 info!("VM reset event"); 1171 // Consume the event. 1172 self.reset_evt.read().map_err(Error::EventFdRead)?; 1173 self.vm_reboot().map_err(Error::VmReboot)?; 1174 } 1175 EpollDispatch::ActivateVirtioDevices => { 1176 if let Some(ref vm) = self.vm { 1177 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1178 info!( 1179 "Trying to activate pending virtio devices: count = {}", 1180 count 1181 ); 1182 vm.activate_virtio_devices() 1183 .map_err(Error::ActivateVirtioDevices)?; 1184 } 1185 } 1186 EpollDispatch::Api => { 1187 // Consume the events. 1188 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1189 // Read from the API receiver channel 1190 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1191 1192 if api_request(self)? { 1193 break 'outer; 1194 } 1195 } 1196 } 1197 #[cfg(feature = "guest_debug")] 1198 EpollDispatch::Debug => { 1199 // Consume the events. 1200 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1201 // Read from the API receiver channel 1202 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1203 1204 let response = if let Some(ref mut vm) = self.vm { 1205 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1206 } else { 1207 Err(VmError::VmNotRunning) 1208 } 1209 .map_err(gdb::Error::Vm); 1210 1211 gdb_request 1212 .sender 1213 .send(response) 1214 .map_err(Error::GdbResponseSend)?; 1215 } 1216 } 1217 #[cfg(not(feature = "guest_debug"))] 1218 EpollDispatch::Debug => {} 1219 } 1220 } 1221 } 1222 1223 // Trigger the termination of the signal_handler thread 1224 if let Some(signals) = self.signals.take() { 1225 signals.close(); 1226 } 1227 1228 // Wait for all the threads to finish 1229 for thread in self.threads.drain(..) { 1230 thread.join().map_err(Error::ThreadCleanup)? 1231 } 1232 1233 Ok(()) 1234 } 1235 } 1236 1237 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1238 vm_config.lock().unwrap().apply_landlock()?; 1239 Ok(()) 1240 } 1241 1242 impl RequestHandler for Vmm { 1243 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1244 // We only store the passed VM config. 1245 // The VM will be created when being asked to boot it. 1246 if self.vm_config.is_none() { 1247 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1248 self.console_info = 1249 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1250 1251 if self 1252 .vm_config 1253 .as_ref() 1254 .unwrap() 1255 .lock() 1256 .unwrap() 1257 .landlock_enable 1258 { 1259 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1260 .map_err(VmError::ApplyLandlock)?; 1261 } 1262 Ok(()) 1263 } else { 1264 Err(VmError::VmAlreadyCreated) 1265 } 1266 } 1267 1268 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1269 tracer::start(); 1270 info!("Booting VM"); 1271 event!("vm", "booting"); 1272 let r = { 1273 trace_scoped!("vm_boot"); 1274 // If we don't have a config, we cannot boot a VM. 1275 if self.vm_config.is_none() { 1276 return Err(VmError::VmMissingConfig); 1277 }; 1278 1279 // console_info is set to None in vm_shutdown. re-populate here if empty 1280 if self.console_info.is_none() { 1281 self.console_info = 1282 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1283 } 1284 1285 // Create a new VM if we don't have one yet. 1286 if self.vm.is_none() { 1287 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1288 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1289 #[cfg(feature = "guest_debug")] 1290 let vm_debug_evt = self 1291 .vm_debug_evt 1292 .try_clone() 1293 .map_err(VmError::EventFdClone)?; 1294 let activate_evt = self 1295 .activate_evt 1296 .try_clone() 1297 .map_err(VmError::EventFdClone)?; 1298 1299 if let Some(ref vm_config) = self.vm_config { 1300 let vm = Vm::new( 1301 Arc::clone(vm_config), 1302 exit_evt, 1303 reset_evt, 1304 #[cfg(feature = "guest_debug")] 1305 vm_debug_evt, 1306 &self.seccomp_action, 1307 self.hypervisor.clone(), 1308 activate_evt, 1309 self.console_info.clone(), 1310 self.console_resize_pipe.clone(), 1311 Arc::clone(&self.original_termios_opt), 1312 None, 1313 None, 1314 None, 1315 )?; 1316 1317 self.vm = Some(vm); 1318 } 1319 } 1320 1321 // Now we can boot the VM. 1322 if let Some(ref mut vm) = self.vm { 1323 vm.boot() 1324 } else { 1325 Err(VmError::VmNotCreated) 1326 } 1327 }; 1328 tracer::end(); 1329 if r.is_ok() { 1330 event!("vm", "booted"); 1331 } 1332 r 1333 } 1334 1335 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1336 if let Some(ref mut vm) = self.vm { 1337 vm.pause().map_err(VmError::Pause) 1338 } else { 1339 Err(VmError::VmNotRunning) 1340 } 1341 } 1342 1343 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1344 if let Some(ref mut vm) = self.vm { 1345 vm.resume().map_err(VmError::Resume) 1346 } else { 1347 Err(VmError::VmNotRunning) 1348 } 1349 } 1350 1351 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1352 if let Some(ref mut vm) = self.vm { 1353 // Drain console_info so that FDs are not reused 1354 let _ = self.console_info.take(); 1355 vm.snapshot() 1356 .map_err(VmError::Snapshot) 1357 .and_then(|snapshot| { 1358 vm.send(&snapshot, destination_url) 1359 .map_err(VmError::SnapshotSend) 1360 }) 1361 } else { 1362 Err(VmError::VmNotRunning) 1363 } 1364 } 1365 1366 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1367 if self.vm.is_some() || self.vm_config.is_some() { 1368 return Err(VmError::VmAlreadyCreated); 1369 } 1370 1371 let source_url = restore_cfg.source_url.as_path().to_str(); 1372 if source_url.is_none() { 1373 return Err(VmError::InvalidRestoreSourceUrl); 1374 } 1375 // Safe to unwrap as we checked it was Some(&str). 1376 let source_url = source_url.unwrap(); 1377 1378 let vm_config = Arc::new(Mutex::new( 1379 recv_vm_config(source_url).map_err(VmError::Restore)?, 1380 )); 1381 restore_cfg 1382 .validate(&vm_config.lock().unwrap().clone()) 1383 .map_err(VmError::ConfigValidation)?; 1384 1385 // Update VM's net configurations with new fds received for restore operation 1386 if let (Some(restored_nets), Some(vm_net_configs)) = 1387 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1388 { 1389 for net in restored_nets.iter() { 1390 for net_config in vm_net_configs.iter_mut() { 1391 // update only if the net dev is backed by FDs 1392 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1393 net_config.fds.clone_from(&net.fds); 1394 } 1395 } 1396 } 1397 } 1398 1399 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1400 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1401 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1402 1403 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1404 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1405 .map_err(VmError::Restore)?; 1406 1407 self.vm_config = Some(Arc::clone(&vm_config)); 1408 1409 // console_info is set to None in vm_snapshot. re-populate here if empty 1410 if self.console_info.is_none() { 1411 self.console_info = 1412 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1413 } 1414 1415 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1416 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1417 #[cfg(feature = "guest_debug")] 1418 let debug_evt = self 1419 .vm_debug_evt 1420 .try_clone() 1421 .map_err(VmError::EventFdClone)?; 1422 let activate_evt = self 1423 .activate_evt 1424 .try_clone() 1425 .map_err(VmError::EventFdClone)?; 1426 1427 let vm = Vm::new( 1428 vm_config, 1429 exit_evt, 1430 reset_evt, 1431 #[cfg(feature = "guest_debug")] 1432 debug_evt, 1433 &self.seccomp_action, 1434 self.hypervisor.clone(), 1435 activate_evt, 1436 self.console_info.clone(), 1437 self.console_resize_pipe.clone(), 1438 Arc::clone(&self.original_termios_opt), 1439 Some(snapshot), 1440 Some(source_url), 1441 Some(restore_cfg.prefault), 1442 )?; 1443 self.vm = Some(vm); 1444 1445 if self 1446 .vm_config 1447 .as_ref() 1448 .unwrap() 1449 .lock() 1450 .unwrap() 1451 .landlock_enable 1452 { 1453 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1454 .map_err(VmError::ApplyLandlock)?; 1455 } 1456 1457 // Now we can restore the rest of the VM. 1458 if let Some(ref mut vm) = self.vm { 1459 vm.restore() 1460 } else { 1461 Err(VmError::VmNotCreated) 1462 } 1463 } 1464 1465 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1466 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1467 if let Some(ref mut vm) = self.vm { 1468 vm.coredump(destination_url).map_err(VmError::Coredump) 1469 } else { 1470 Err(VmError::VmNotRunning) 1471 } 1472 } 1473 1474 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1475 let r = if let Some(ref mut vm) = self.vm.take() { 1476 // Drain console_info so that the FDs are not reused 1477 let _ = self.console_info.take(); 1478 vm.shutdown() 1479 } else { 1480 Err(VmError::VmNotRunning) 1481 }; 1482 1483 if r.is_ok() { 1484 event!("vm", "shutdown"); 1485 } 1486 1487 r 1488 } 1489 1490 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1491 event!("vm", "rebooting"); 1492 1493 // First we stop the current VM 1494 let config = if let Some(mut vm) = self.vm.take() { 1495 let config = vm.get_config(); 1496 vm.shutdown()?; 1497 config 1498 } else { 1499 return Err(VmError::VmNotCreated); 1500 }; 1501 1502 // vm.shutdown() closes all the console devices, so set console_info to None 1503 // so that the closed FD #s are not reused. 1504 let _ = self.console_info.take(); 1505 1506 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1507 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1508 #[cfg(feature = "guest_debug")] 1509 let debug_evt = self 1510 .vm_debug_evt 1511 .try_clone() 1512 .map_err(VmError::EventFdClone)?; 1513 let activate_evt = self 1514 .activate_evt 1515 .try_clone() 1516 .map_err(VmError::EventFdClone)?; 1517 1518 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1519 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1520 // during the boot process. 1521 if self.reset_evt.read().is_ok() { 1522 warn!("Spurious second reset event received. Ignoring."); 1523 } 1524 1525 self.console_info = 1526 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1527 1528 // Then we create the new VM 1529 let mut vm = Vm::new( 1530 config, 1531 exit_evt, 1532 reset_evt, 1533 #[cfg(feature = "guest_debug")] 1534 debug_evt, 1535 &self.seccomp_action, 1536 self.hypervisor.clone(), 1537 activate_evt, 1538 self.console_info.clone(), 1539 self.console_resize_pipe.clone(), 1540 Arc::clone(&self.original_termios_opt), 1541 None, 1542 None, 1543 None, 1544 )?; 1545 1546 // And we boot it 1547 vm.boot()?; 1548 1549 self.vm = Some(vm); 1550 1551 event!("vm", "rebooted"); 1552 1553 Ok(()) 1554 } 1555 1556 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1557 match &self.vm_config { 1558 Some(vm_config) => { 1559 let state = match &self.vm { 1560 Some(vm) => vm.get_state()?, 1561 None => VmState::Created, 1562 }; 1563 let config = vm_config.lock().unwrap().clone(); 1564 1565 let mut memory_actual_size = config.memory.total_size(); 1566 if let Some(vm) = &self.vm { 1567 memory_actual_size -= vm.balloon_size(); 1568 } 1569 1570 let device_tree = self 1571 .vm 1572 .as_ref() 1573 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1574 1575 Ok(VmInfoResponse { 1576 config: Box::new(config), 1577 state, 1578 memory_actual_size, 1579 device_tree, 1580 }) 1581 } 1582 None => Err(VmError::VmNotCreated), 1583 } 1584 } 1585 1586 fn vmm_ping(&self) -> VmmPingResponse { 1587 let VmmVersionInfo { 1588 build_version, 1589 version, 1590 } = self.version.clone(); 1591 1592 VmmPingResponse { 1593 build_version, 1594 version, 1595 pid: std::process::id() as i64, 1596 features: feature_list(), 1597 } 1598 } 1599 1600 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1601 if self.vm_config.is_none() { 1602 return Ok(()); 1603 } 1604 1605 // If a VM is booted, we first try to shut it down. 1606 if self.vm.is_some() { 1607 self.vm_shutdown()?; 1608 } 1609 1610 self.vm_config = None; 1611 1612 event!("vm", "deleted"); 1613 1614 Ok(()) 1615 } 1616 1617 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1618 self.vm_delete()?; 1619 event!("vmm", "shutdown"); 1620 Ok(()) 1621 } 1622 1623 fn vm_resize( 1624 &mut self, 1625 desired_vcpus: Option<u8>, 1626 desired_ram: Option<u64>, 1627 desired_balloon: Option<u64>, 1628 ) -> result::Result<(), VmError> { 1629 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1630 1631 if let Some(ref mut vm) = self.vm { 1632 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1633 error!("Error when resizing VM: {:?}", e); 1634 Err(e) 1635 } else { 1636 Ok(()) 1637 } 1638 } else { 1639 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1640 if let Some(desired_vcpus) = desired_vcpus { 1641 config.cpus.boot_vcpus = desired_vcpus; 1642 } 1643 if let Some(desired_ram) = desired_ram { 1644 config.memory.size = desired_ram; 1645 } 1646 if let Some(desired_balloon) = desired_balloon { 1647 if let Some(balloon_config) = &mut config.balloon { 1648 balloon_config.size = desired_balloon; 1649 } 1650 } 1651 Ok(()) 1652 } 1653 } 1654 1655 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1656 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1657 1658 if let Some(ref mut vm) = self.vm { 1659 if let Err(e) = vm.resize_zone(id, desired_ram) { 1660 error!("Error when resizing VM: {:?}", e); 1661 Err(e) 1662 } else { 1663 Ok(()) 1664 } 1665 } else { 1666 // Update VmConfig by setting the new desired ram. 1667 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1668 1669 if let Some(zones) = &mut memory_config.zones { 1670 for zone in zones.iter_mut() { 1671 if zone.id == id { 1672 zone.size = desired_ram; 1673 return Ok(()); 1674 } 1675 } 1676 } 1677 1678 error!("Could not find the memory zone {} for the resize", id); 1679 Err(VmError::ResizeZone) 1680 } 1681 } 1682 1683 fn vm_add_device( 1684 &mut self, 1685 device_cfg: DeviceConfig, 1686 ) -> result::Result<Option<Vec<u8>>, VmError> { 1687 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1688 1689 { 1690 // Validate the configuration change in a cloned configuration 1691 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1692 add_to_config(&mut config.devices, device_cfg.clone()); 1693 config.validate().map_err(VmError::ConfigValidation)?; 1694 } 1695 1696 if let Some(ref mut vm) = self.vm { 1697 let info = vm.add_device(device_cfg).map_err(|e| { 1698 error!("Error when adding new device to the VM: {:?}", e); 1699 e 1700 })?; 1701 serde_json::to_vec(&info) 1702 .map(Some) 1703 .map_err(VmError::SerializeJson) 1704 } else { 1705 // Update VmConfig by adding the new device. 1706 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1707 add_to_config(&mut config.devices, device_cfg); 1708 Ok(None) 1709 } 1710 } 1711 1712 fn vm_add_user_device( 1713 &mut self, 1714 device_cfg: UserDeviceConfig, 1715 ) -> result::Result<Option<Vec<u8>>, VmError> { 1716 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1717 1718 { 1719 // Validate the configuration change in a cloned configuration 1720 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1721 add_to_config(&mut config.user_devices, device_cfg.clone()); 1722 config.validate().map_err(VmError::ConfigValidation)?; 1723 } 1724 1725 if let Some(ref mut vm) = self.vm { 1726 let info = vm.add_user_device(device_cfg).map_err(|e| { 1727 error!("Error when adding new user device to the VM: {:?}", e); 1728 e 1729 })?; 1730 serde_json::to_vec(&info) 1731 .map(Some) 1732 .map_err(VmError::SerializeJson) 1733 } else { 1734 // Update VmConfig by adding the new device. 1735 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1736 add_to_config(&mut config.user_devices, device_cfg); 1737 Ok(None) 1738 } 1739 } 1740 1741 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1742 if let Some(ref mut vm) = self.vm { 1743 if let Err(e) = vm.remove_device(id) { 1744 error!("Error when removing device from the VM: {:?}", e); 1745 Err(e) 1746 } else { 1747 Ok(()) 1748 } 1749 } else if let Some(ref config) = self.vm_config { 1750 let mut config = config.lock().unwrap(); 1751 if config.remove_device(&id) { 1752 Ok(()) 1753 } else { 1754 Err(VmError::NoDeviceToRemove(id)) 1755 } 1756 } else { 1757 Err(VmError::VmNotCreated) 1758 } 1759 } 1760 1761 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1762 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1763 1764 { 1765 // Validate the configuration change in a cloned configuration 1766 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1767 add_to_config(&mut config.disks, disk_cfg.clone()); 1768 config.validate().map_err(VmError::ConfigValidation)?; 1769 } 1770 1771 if let Some(ref mut vm) = self.vm { 1772 let info = vm.add_disk(disk_cfg).map_err(|e| { 1773 error!("Error when adding new disk to the VM: {:?}", e); 1774 e 1775 })?; 1776 serde_json::to_vec(&info) 1777 .map(Some) 1778 .map_err(VmError::SerializeJson) 1779 } else { 1780 // Update VmConfig by adding the new device. 1781 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1782 add_to_config(&mut config.disks, disk_cfg); 1783 Ok(None) 1784 } 1785 } 1786 1787 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1788 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1789 1790 { 1791 // Validate the configuration change in a cloned configuration 1792 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1793 add_to_config(&mut config.fs, fs_cfg.clone()); 1794 config.validate().map_err(VmError::ConfigValidation)?; 1795 } 1796 1797 if let Some(ref mut vm) = self.vm { 1798 let info = vm.add_fs(fs_cfg).map_err(|e| { 1799 error!("Error when adding new fs to the VM: {:?}", e); 1800 e 1801 })?; 1802 serde_json::to_vec(&info) 1803 .map(Some) 1804 .map_err(VmError::SerializeJson) 1805 } else { 1806 // Update VmConfig by adding the new device. 1807 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1808 add_to_config(&mut config.fs, fs_cfg); 1809 Ok(None) 1810 } 1811 } 1812 1813 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1814 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1815 1816 { 1817 // Validate the configuration change in a cloned configuration 1818 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1819 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1820 config.validate().map_err(VmError::ConfigValidation)?; 1821 } 1822 1823 if let Some(ref mut vm) = self.vm { 1824 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1825 error!("Error when adding new pmem device to the VM: {:?}", e); 1826 e 1827 })?; 1828 serde_json::to_vec(&info) 1829 .map(Some) 1830 .map_err(VmError::SerializeJson) 1831 } else { 1832 // Update VmConfig by adding the new device. 1833 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1834 add_to_config(&mut config.pmem, pmem_cfg); 1835 Ok(None) 1836 } 1837 } 1838 1839 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1840 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1841 1842 { 1843 // Validate the configuration change in a cloned configuration 1844 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1845 add_to_config(&mut config.net, net_cfg.clone()); 1846 config.validate().map_err(VmError::ConfigValidation)?; 1847 } 1848 1849 if let Some(ref mut vm) = self.vm { 1850 let info = vm.add_net(net_cfg).map_err(|e| { 1851 error!("Error when adding new network device to the VM: {:?}", e); 1852 e 1853 })?; 1854 serde_json::to_vec(&info) 1855 .map(Some) 1856 .map_err(VmError::SerializeJson) 1857 } else { 1858 // Update VmConfig by adding the new device. 1859 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1860 add_to_config(&mut config.net, net_cfg); 1861 Ok(None) 1862 } 1863 } 1864 1865 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1866 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1867 1868 { 1869 // Validate the configuration change in a cloned configuration 1870 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1871 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1872 config.validate().map_err(VmError::ConfigValidation)?; 1873 } 1874 1875 if let Some(ref mut vm) = self.vm { 1876 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1877 error!("Error when adding new vDPA device to the VM: {:?}", e); 1878 e 1879 })?; 1880 serde_json::to_vec(&info) 1881 .map(Some) 1882 .map_err(VmError::SerializeJson) 1883 } else { 1884 // Update VmConfig by adding the new device. 1885 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1886 add_to_config(&mut config.vdpa, vdpa_cfg); 1887 Ok(None) 1888 } 1889 } 1890 1891 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1892 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1893 1894 { 1895 // Validate the configuration change in a cloned configuration 1896 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1897 1898 if config.vsock.is_some() { 1899 return Err(VmError::TooManyVsockDevices); 1900 } 1901 1902 config.vsock = Some(vsock_cfg.clone()); 1903 config.validate().map_err(VmError::ConfigValidation)?; 1904 } 1905 1906 if let Some(ref mut vm) = self.vm { 1907 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1908 error!("Error when adding new vsock device to the VM: {:?}", e); 1909 e 1910 })?; 1911 serde_json::to_vec(&info) 1912 .map(Some) 1913 .map_err(VmError::SerializeJson) 1914 } else { 1915 // Update VmConfig by adding the new device. 1916 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1917 config.vsock = Some(vsock_cfg); 1918 Ok(None) 1919 } 1920 } 1921 1922 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1923 if let Some(ref mut vm) = self.vm { 1924 let info = vm.counters().map_err(|e| { 1925 error!("Error when getting counters from the VM: {:?}", e); 1926 e 1927 })?; 1928 serde_json::to_vec(&info) 1929 .map(Some) 1930 .map_err(VmError::SerializeJson) 1931 } else { 1932 Err(VmError::VmNotRunning) 1933 } 1934 } 1935 1936 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1937 if let Some(ref mut vm) = self.vm { 1938 vm.power_button() 1939 } else { 1940 Err(VmError::VmNotRunning) 1941 } 1942 } 1943 1944 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1945 if let Some(ref mut vm) = self.vm { 1946 vm.nmi() 1947 } else { 1948 Err(VmError::VmNotRunning) 1949 } 1950 } 1951 1952 fn vm_receive_migration( 1953 &mut self, 1954 receive_data_migration: VmReceiveMigrationData, 1955 ) -> result::Result<(), MigratableError> { 1956 info!( 1957 "Receiving migration: receiver_url = {}", 1958 receive_data_migration.receiver_url 1959 ); 1960 1961 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1962 let listener = UnixListener::bind(&path).map_err(|e| { 1963 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1964 })?; 1965 let (mut socket, _addr) = listener.accept().map_err(|e| { 1966 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1967 })?; 1968 std::fs::remove_file(&path).map_err(|e| { 1969 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1970 })?; 1971 1972 let mut started = false; 1973 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1974 let mut existing_memory_files = None; 1975 loop { 1976 let req = Request::read_from(&mut socket)?; 1977 match req.command() { 1978 Command::Invalid => info!("Invalid Command Received"), 1979 Command::Start => { 1980 info!("Start Command Received"); 1981 started = true; 1982 1983 Response::ok().write_to(&mut socket)?; 1984 } 1985 Command::Config => { 1986 info!("Config Command Received"); 1987 1988 if !started { 1989 warn!("Migration not started yet"); 1990 Response::error().write_to(&mut socket)?; 1991 continue; 1992 } 1993 memory_manager = Some(self.vm_receive_config( 1994 &req, 1995 &mut socket, 1996 existing_memory_files.take(), 1997 )?); 1998 } 1999 Command::State => { 2000 info!("State Command Received"); 2001 2002 if !started { 2003 warn!("Migration not started yet"); 2004 Response::error().write_to(&mut socket)?; 2005 continue; 2006 } 2007 if let Some(mm) = memory_manager.take() { 2008 self.vm_receive_state(&req, &mut socket, mm)?; 2009 } else { 2010 warn!("Configuration not sent yet"); 2011 Response::error().write_to(&mut socket)?; 2012 } 2013 } 2014 Command::Memory => { 2015 info!("Memory Command Received"); 2016 2017 if !started { 2018 warn!("Migration not started yet"); 2019 Response::error().write_to(&mut socket)?; 2020 continue; 2021 } 2022 if let Some(mm) = memory_manager.as_ref() { 2023 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2024 } else { 2025 warn!("Configuration not sent yet"); 2026 Response::error().write_to(&mut socket)?; 2027 } 2028 } 2029 Command::MemoryFd => { 2030 info!("MemoryFd Command Received"); 2031 2032 if !started { 2033 warn!("Migration not started yet"); 2034 Response::error().write_to(&mut socket)?; 2035 continue; 2036 } 2037 2038 let mut buf = [0u8; 4]; 2039 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2040 MigratableError::MigrateReceive(anyhow!( 2041 "Error receiving slot from socket: {}", 2042 e 2043 )) 2044 })?; 2045 2046 if existing_memory_files.is_none() { 2047 existing_memory_files = Some(HashMap::default()) 2048 } 2049 2050 if let Some(ref mut existing_memory_files) = existing_memory_files { 2051 let slot = u32::from_le_bytes(buf); 2052 existing_memory_files.insert(slot, file.unwrap()); 2053 } 2054 2055 Response::ok().write_to(&mut socket)?; 2056 } 2057 Command::Complete => { 2058 info!("Complete Command Received"); 2059 if let Some(ref mut vm) = self.vm.as_mut() { 2060 vm.resume()?; 2061 Response::ok().write_to(&mut socket)?; 2062 } else { 2063 warn!("VM not created yet"); 2064 Response::error().write_to(&mut socket)?; 2065 } 2066 break; 2067 } 2068 Command::Abandon => { 2069 info!("Abandon Command Received"); 2070 self.vm = None; 2071 self.vm_config = None; 2072 Response::ok().write_to(&mut socket).ok(); 2073 break; 2074 } 2075 } 2076 } 2077 2078 Ok(()) 2079 } 2080 2081 fn vm_send_migration( 2082 &mut self, 2083 send_data_migration: VmSendMigrationData, 2084 ) -> result::Result<(), MigratableError> { 2085 info!( 2086 "Sending migration: destination_url = {}, local = {}", 2087 send_data_migration.destination_url, send_data_migration.local 2088 ); 2089 2090 if !self 2091 .vm_config 2092 .as_ref() 2093 .unwrap() 2094 .lock() 2095 .unwrap() 2096 .backed_by_shared_memory() 2097 && send_data_migration.local 2098 { 2099 return Err(MigratableError::MigrateSend(anyhow!( 2100 "Local migration requires shared memory or hugepages enabled" 2101 ))); 2102 } 2103 2104 if let Some(vm) = self.vm.as_mut() { 2105 Self::send_migration( 2106 vm, 2107 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2108 self.hypervisor.clone(), 2109 send_data_migration, 2110 ) 2111 .map_err(|migration_err| { 2112 error!("Migration failed: {:?}", migration_err); 2113 2114 // Stop logging dirty pages 2115 if let Err(e) = vm.stop_dirty_log() { 2116 return e; 2117 } 2118 2119 if vm.get_state().unwrap() == VmState::Paused { 2120 if let Err(e) = vm.resume() { 2121 return e; 2122 } 2123 } 2124 2125 migration_err 2126 })?; 2127 2128 // Shutdown the VM after the migration succeeded 2129 self.exit_evt.write(1).map_err(|e| { 2130 MigratableError::MigrateSend(anyhow!( 2131 "Failed shutting down the VM after migration: {:?}", 2132 e 2133 )) 2134 }) 2135 } else { 2136 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2137 } 2138 } 2139 } 2140 2141 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2142 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2143 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2144 2145 #[cfg(test)] 2146 mod unit_tests { 2147 use super::*; 2148 #[cfg(target_arch = "x86_64")] 2149 use crate::vm_config::DebugConsoleConfig; 2150 use crate::vm_config::{ 2151 ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig, 2152 PayloadConfig, RngConfig, 2153 }; 2154 2155 fn create_dummy_vmm() -> Vmm { 2156 Vmm::new( 2157 VmmVersionInfo::new("dummy", "dummy"), 2158 EventFd::new(EFD_NONBLOCK).unwrap(), 2159 #[cfg(feature = "guest_debug")] 2160 EventFd::new(EFD_NONBLOCK).unwrap(), 2161 #[cfg(feature = "guest_debug")] 2162 EventFd::new(EFD_NONBLOCK).unwrap(), 2163 SeccompAction::Allow, 2164 hypervisor::new().unwrap(), 2165 EventFd::new(EFD_NONBLOCK).unwrap(), 2166 ) 2167 .unwrap() 2168 } 2169 2170 fn create_dummy_vm_config() -> Box<VmConfig> { 2171 Box::new(VmConfig { 2172 cpus: CpusConfig { 2173 boot_vcpus: 1, 2174 max_vcpus: 1, 2175 topology: None, 2176 kvm_hyperv: false, 2177 max_phys_bits: 46, 2178 affinity: None, 2179 features: CpuFeatures::default(), 2180 }, 2181 memory: MemoryConfig { 2182 size: 536_870_912, 2183 mergeable: false, 2184 hotplug_method: HotplugMethod::Acpi, 2185 hotplug_size: None, 2186 hotplugged_size: None, 2187 shared: true, 2188 hugepages: false, 2189 hugepage_size: None, 2190 prefault: false, 2191 zones: None, 2192 thp: true, 2193 }, 2194 payload: Some(PayloadConfig { 2195 kernel: Some(PathBuf::from("/path/to/kernel")), 2196 firmware: None, 2197 cmdline: None, 2198 initramfs: None, 2199 #[cfg(feature = "igvm")] 2200 igvm: None, 2201 #[cfg(feature = "sev_snp")] 2202 host_data: None, 2203 }), 2204 rate_limit_groups: None, 2205 disks: None, 2206 net: None, 2207 rng: RngConfig { 2208 src: PathBuf::from("/dev/urandom"), 2209 iommu: false, 2210 }, 2211 balloon: None, 2212 fs: None, 2213 pmem: None, 2214 serial: ConsoleConfig { 2215 file: None, 2216 mode: ConsoleOutputMode::Null, 2217 iommu: false, 2218 socket: None, 2219 }, 2220 console: ConsoleConfig { 2221 file: None, 2222 mode: ConsoleOutputMode::Tty, 2223 iommu: false, 2224 socket: None, 2225 }, 2226 #[cfg(target_arch = "x86_64")] 2227 debug_console: DebugConsoleConfig::default(), 2228 devices: None, 2229 user_devices: None, 2230 vdpa: None, 2231 vsock: None, 2232 #[cfg(feature = "pvmemcontrol")] 2233 pvmemcontrol: None, 2234 pvpanic: false, 2235 iommu: false, 2236 #[cfg(target_arch = "x86_64")] 2237 sgx_epc: None, 2238 numa: None, 2239 watchdog: false, 2240 #[cfg(feature = "guest_debug")] 2241 gdb: false, 2242 pci_segments: None, 2243 platform: None, 2244 tpm: None, 2245 preserved_fds: None, 2246 landlock_enable: false, 2247 landlock_rules: None, 2248 }) 2249 } 2250 2251 #[test] 2252 fn test_vmm_vm_create() { 2253 let mut vmm = create_dummy_vmm(); 2254 let config = create_dummy_vm_config(); 2255 2256 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2257 assert!(matches!( 2258 vmm.vm_create(config), 2259 Err(VmError::VmAlreadyCreated) 2260 )); 2261 } 2262 2263 #[test] 2264 fn test_vmm_vm_cold_add_device() { 2265 let mut vmm = create_dummy_vmm(); 2266 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2267 2268 assert!(matches!( 2269 vmm.vm_add_device(device_config.clone()), 2270 Err(VmError::VmNotCreated) 2271 )); 2272 2273 let _ = vmm.vm_create(create_dummy_vm_config()); 2274 assert!(vmm 2275 .vm_config 2276 .as_ref() 2277 .unwrap() 2278 .lock() 2279 .unwrap() 2280 .devices 2281 .is_none()); 2282 2283 assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); 2284 assert_eq!( 2285 vmm.vm_config 2286 .as_ref() 2287 .unwrap() 2288 .lock() 2289 .unwrap() 2290 .devices 2291 .clone() 2292 .unwrap() 2293 .len(), 2294 1 2295 ); 2296 assert_eq!( 2297 vmm.vm_config 2298 .as_ref() 2299 .unwrap() 2300 .lock() 2301 .unwrap() 2302 .devices 2303 .clone() 2304 .unwrap()[0], 2305 device_config 2306 ); 2307 } 2308 2309 #[test] 2310 fn test_vmm_vm_cold_add_user_device() { 2311 let mut vmm = create_dummy_vmm(); 2312 let user_device_config = 2313 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2314 2315 assert!(matches!( 2316 vmm.vm_add_user_device(user_device_config.clone()), 2317 Err(VmError::VmNotCreated) 2318 )); 2319 2320 let _ = vmm.vm_create(create_dummy_vm_config()); 2321 assert!(vmm 2322 .vm_config 2323 .as_ref() 2324 .unwrap() 2325 .lock() 2326 .unwrap() 2327 .user_devices 2328 .is_none()); 2329 2330 assert!(vmm 2331 .vm_add_user_device(user_device_config.clone()) 2332 .unwrap() 2333 .is_none()); 2334 assert_eq!( 2335 vmm.vm_config 2336 .as_ref() 2337 .unwrap() 2338 .lock() 2339 .unwrap() 2340 .user_devices 2341 .clone() 2342 .unwrap() 2343 .len(), 2344 1 2345 ); 2346 assert_eq!( 2347 vmm.vm_config 2348 .as_ref() 2349 .unwrap() 2350 .lock() 2351 .unwrap() 2352 .user_devices 2353 .clone() 2354 .unwrap()[0], 2355 user_device_config 2356 ); 2357 } 2358 2359 #[test] 2360 fn test_vmm_vm_cold_add_disk() { 2361 let mut vmm = create_dummy_vmm(); 2362 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2363 2364 assert!(matches!( 2365 vmm.vm_add_disk(disk_config.clone()), 2366 Err(VmError::VmNotCreated) 2367 )); 2368 2369 let _ = vmm.vm_create(create_dummy_vm_config()); 2370 assert!(vmm 2371 .vm_config 2372 .as_ref() 2373 .unwrap() 2374 .lock() 2375 .unwrap() 2376 .disks 2377 .is_none()); 2378 2379 assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); 2380 assert_eq!( 2381 vmm.vm_config 2382 .as_ref() 2383 .unwrap() 2384 .lock() 2385 .unwrap() 2386 .disks 2387 .clone() 2388 .unwrap() 2389 .len(), 2390 1 2391 ); 2392 assert_eq!( 2393 vmm.vm_config 2394 .as_ref() 2395 .unwrap() 2396 .lock() 2397 .unwrap() 2398 .disks 2399 .clone() 2400 .unwrap()[0], 2401 disk_config 2402 ); 2403 } 2404 2405 #[test] 2406 fn test_vmm_vm_cold_add_fs() { 2407 let mut vmm = create_dummy_vmm(); 2408 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2409 2410 assert!(matches!( 2411 vmm.vm_add_fs(fs_config.clone()), 2412 Err(VmError::VmNotCreated) 2413 )); 2414 2415 let _ = vmm.vm_create(create_dummy_vm_config()); 2416 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2417 2418 assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none()); 2419 assert_eq!( 2420 vmm.vm_config 2421 .as_ref() 2422 .unwrap() 2423 .lock() 2424 .unwrap() 2425 .fs 2426 .clone() 2427 .unwrap() 2428 .len(), 2429 1 2430 ); 2431 assert_eq!( 2432 vmm.vm_config 2433 .as_ref() 2434 .unwrap() 2435 .lock() 2436 .unwrap() 2437 .fs 2438 .clone() 2439 .unwrap()[0], 2440 fs_config 2441 ); 2442 } 2443 2444 #[test] 2445 fn test_vmm_vm_cold_add_pmem() { 2446 let mut vmm = create_dummy_vmm(); 2447 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2448 2449 assert!(matches!( 2450 vmm.vm_add_pmem(pmem_config.clone()), 2451 Err(VmError::VmNotCreated) 2452 )); 2453 2454 let _ = vmm.vm_create(create_dummy_vm_config()); 2455 assert!(vmm 2456 .vm_config 2457 .as_ref() 2458 .unwrap() 2459 .lock() 2460 .unwrap() 2461 .pmem 2462 .is_none()); 2463 2464 assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); 2465 assert_eq!( 2466 vmm.vm_config 2467 .as_ref() 2468 .unwrap() 2469 .lock() 2470 .unwrap() 2471 .pmem 2472 .clone() 2473 .unwrap() 2474 .len(), 2475 1 2476 ); 2477 assert_eq!( 2478 vmm.vm_config 2479 .as_ref() 2480 .unwrap() 2481 .lock() 2482 .unwrap() 2483 .pmem 2484 .clone() 2485 .unwrap()[0], 2486 pmem_config 2487 ); 2488 } 2489 2490 #[test] 2491 fn test_vmm_vm_cold_add_net() { 2492 let mut vmm = create_dummy_vmm(); 2493 let net_config = NetConfig::parse( 2494 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2495 ) 2496 .unwrap(); 2497 2498 assert!(matches!( 2499 vmm.vm_add_net(net_config.clone()), 2500 Err(VmError::VmNotCreated) 2501 )); 2502 2503 let _ = vmm.vm_create(create_dummy_vm_config()); 2504 assert!(vmm 2505 .vm_config 2506 .as_ref() 2507 .unwrap() 2508 .lock() 2509 .unwrap() 2510 .net 2511 .is_none()); 2512 2513 assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); 2514 assert_eq!( 2515 vmm.vm_config 2516 .as_ref() 2517 .unwrap() 2518 .lock() 2519 .unwrap() 2520 .net 2521 .clone() 2522 .unwrap() 2523 .len(), 2524 1 2525 ); 2526 assert_eq!( 2527 vmm.vm_config 2528 .as_ref() 2529 .unwrap() 2530 .lock() 2531 .unwrap() 2532 .net 2533 .clone() 2534 .unwrap()[0], 2535 net_config 2536 ); 2537 } 2538 2539 #[test] 2540 fn test_vmm_vm_cold_add_vdpa() { 2541 let mut vmm = create_dummy_vmm(); 2542 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2543 2544 assert!(matches!( 2545 vmm.vm_add_vdpa(vdpa_config.clone()), 2546 Err(VmError::VmNotCreated) 2547 )); 2548 2549 let _ = vmm.vm_create(create_dummy_vm_config()); 2550 assert!(vmm 2551 .vm_config 2552 .as_ref() 2553 .unwrap() 2554 .lock() 2555 .unwrap() 2556 .vdpa 2557 .is_none()); 2558 2559 assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); 2560 assert_eq!( 2561 vmm.vm_config 2562 .as_ref() 2563 .unwrap() 2564 .lock() 2565 .unwrap() 2566 .vdpa 2567 .clone() 2568 .unwrap() 2569 .len(), 2570 1 2571 ); 2572 assert_eq!( 2573 vmm.vm_config 2574 .as_ref() 2575 .unwrap() 2576 .lock() 2577 .unwrap() 2578 .vdpa 2579 .clone() 2580 .unwrap()[0], 2581 vdpa_config 2582 ); 2583 } 2584 2585 #[test] 2586 fn test_vmm_vm_cold_add_vsock() { 2587 let mut vmm = create_dummy_vmm(); 2588 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2589 2590 assert!(matches!( 2591 vmm.vm_add_vsock(vsock_config.clone()), 2592 Err(VmError::VmNotCreated) 2593 )); 2594 2595 let _ = vmm.vm_create(create_dummy_vm_config()); 2596 assert!(vmm 2597 .vm_config 2598 .as_ref() 2599 .unwrap() 2600 .lock() 2601 .unwrap() 2602 .vsock 2603 .is_none()); 2604 2605 assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); 2606 assert_eq!( 2607 vmm.vm_config 2608 .as_ref() 2609 .unwrap() 2610 .lock() 2611 .unwrap() 2612 .vsock 2613 .clone() 2614 .unwrap(), 2615 vsock_config 2616 ); 2617 } 2618 } 2619