1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 15 use std::os::unix::net::{UnixListener, UnixStream}; 16 use std::panic::AssertUnwindSafe; 17 use std::path::PathBuf; 18 use std::rc::Rc; 19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 use std::{io, result, thread}; 23 24 use anyhow::anyhow; 25 #[cfg(feature = "dbus_api")] 26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 27 use api::http::HttpApiHandle; 28 use console_devices::{pre_create_console_devices, ConsoleInfo}; 29 use landlock::LandlockError; 30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 31 use memory_manager::MemoryManagerSnapshotData; 32 use pci::PciBdf; 33 use seccompiler::{apply_filter, SeccompAction}; 34 use serde::ser::{SerializeStruct, Serializer}; 35 use serde::{Deserialize, Serialize}; 36 use signal_hook::iterator::{Handle, Signals}; 37 use thiserror::Error; 38 use tracer::trace_scoped; 39 use vm_memory::bitmap::AtomicBitmap; 40 use vm_memory::{ReadVolatile, WriteVolatile}; 41 use vm_migration::protocol::*; 42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 43 use vmm_sys_util::eventfd::EventFd; 44 use vmm_sys_util::signal::unblock_signal; 45 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 46 47 use crate::api::{ 48 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 49 VmSendMigrationData, VmmPingResponse, 50 }; 51 use crate::config::{ 52 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 53 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 54 }; 55 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 56 use crate::coredump::GuestDebuggable; 57 use crate::landlock::Landlock; 58 use crate::memory_manager::MemoryManager; 59 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 60 use crate::migration::get_vm_snapshot; 61 use crate::migration::{recv_vm_config, recv_vm_state}; 62 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 63 use crate::vm::{Error as VmError, Vm, VmState}; 64 65 mod acpi; 66 pub mod api; 67 mod clone3; 68 pub mod config; 69 pub mod console_devices; 70 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 71 mod coredump; 72 pub mod cpu; 73 pub mod device_manager; 74 pub mod device_tree; 75 #[cfg(feature = "guest_debug")] 76 mod gdb; 77 #[cfg(feature = "igvm")] 78 mod igvm; 79 pub mod interrupt; 80 pub mod landlock; 81 pub mod memory_manager; 82 pub mod migration; 83 mod pci_segment; 84 pub mod seccomp_filters; 85 mod serial_manager; 86 mod sigwinch_listener; 87 pub mod vm; 88 pub mod vm_config; 89 90 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 91 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 92 93 /// Errors associated with VMM management 94 #[derive(Debug, Error)] 95 pub enum Error { 96 /// API request receive error 97 #[error("Error receiving API request: {0}")] 98 ApiRequestRecv(#[source] RecvError), 99 100 /// API response send error 101 #[error("Error sending API request: {0}")] 102 ApiResponseSend(#[source] SendError<ApiResponse>), 103 104 /// Cannot bind to the UNIX domain socket path 105 #[error("Error binding to UNIX domain socket: {0}")] 106 Bind(#[source] io::Error), 107 108 /// Cannot clone EventFd. 109 #[error("Error cloning EventFd: {0}")] 110 EventFdClone(#[source] io::Error), 111 112 /// Cannot create EventFd. 113 #[error("Error creating EventFd: {0}")] 114 EventFdCreate(#[source] io::Error), 115 116 /// Cannot read from EventFd. 117 #[error("Error reading from EventFd: {0}")] 118 EventFdRead(#[source] io::Error), 119 120 /// Cannot create epoll context. 121 #[error("Error creating epoll context: {0}")] 122 Epoll(#[source] io::Error), 123 124 /// Cannot create HTTP thread 125 #[error("Error spawning HTTP thread: {0}")] 126 HttpThreadSpawn(#[source] io::Error), 127 128 /// Cannot create D-Bus thread 129 #[cfg(feature = "dbus_api")] 130 #[error("Error spawning D-Bus thread: {0}")] 131 DBusThreadSpawn(#[source] io::Error), 132 133 /// Cannot start D-Bus session 134 #[cfg(feature = "dbus_api")] 135 #[error("Error starting D-Bus session: {0}")] 136 CreateDBusSession(#[source] zbus::Error), 137 138 /// Cannot create `event-monitor` thread 139 #[error("Error spawning `event-monitor` thread: {0}")] 140 EventMonitorThreadSpawn(#[source] io::Error), 141 142 /// Cannot handle the VM STDIN stream 143 #[error("Error handling VM stdin: {0:?}")] 144 Stdin(VmError), 145 146 /// Cannot handle the VM pty stream 147 #[error("Error handling VM pty: {0:?}")] 148 Pty(VmError), 149 150 /// Cannot reboot the VM 151 #[error("Error rebooting VM: {0:?}")] 152 VmReboot(VmError), 153 154 /// Cannot create VMM thread 155 #[error("Error spawning VMM thread {0:?}")] 156 VmmThreadSpawn(#[source] io::Error), 157 158 /// Cannot shut the VMM down 159 #[error("Error shutting down VMM: {0:?}")] 160 VmmShutdown(VmError), 161 162 /// Cannot create seccomp filter 163 #[error("Error creating seccomp filter: {0}")] 164 CreateSeccompFilter(seccompiler::Error), 165 166 /// Cannot apply seccomp filter 167 #[error("Error applying seccomp filter: {0}")] 168 ApplySeccompFilter(seccompiler::Error), 169 170 /// Error activating virtio devices 171 #[error("Error activating virtio devices: {0:?}")] 172 ActivateVirtioDevices(VmError), 173 174 /// Error creating API server 175 #[error("Error creating API server {0:?}")] 176 CreateApiServer(micro_http::ServerError), 177 178 /// Error binding API server socket 179 #[error("Error creation API server's socket {0:?}")] 180 CreateApiServerSocket(#[source] io::Error), 181 182 #[cfg(feature = "guest_debug")] 183 #[error("Failed to start the GDB thread: {0}")] 184 GdbThreadSpawn(io::Error), 185 186 /// GDB request receive error 187 #[cfg(feature = "guest_debug")] 188 #[error("Error receiving GDB request: {0}")] 189 GdbRequestRecv(#[source] RecvError), 190 191 /// GDB response send error 192 #[cfg(feature = "guest_debug")] 193 #[error("Error sending GDB request: {0}")] 194 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 195 196 #[error("Cannot spawn a signal handler thread: {0}")] 197 SignalHandlerSpawn(#[source] io::Error), 198 199 #[error("Failed to join on threads: {0:?}")] 200 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 201 202 /// Cannot create Landlock object 203 #[error("Error creating landlock object: {0}")] 204 CreateLandlock(LandlockError), 205 206 /// Cannot apply landlock based sandboxing 207 #[error("Error applying landlock: {0}")] 208 ApplyLandlock(LandlockError), 209 } 210 pub type Result<T> = result::Result<T, Error>; 211 212 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 213 #[repr(u64)] 214 pub enum EpollDispatch { 215 Exit = 0, 216 Reset = 1, 217 Api = 2, 218 ActivateVirtioDevices = 3, 219 Debug = 4, 220 Unknown, 221 } 222 223 impl From<u64> for EpollDispatch { 224 fn from(v: u64) -> Self { 225 use EpollDispatch::*; 226 match v { 227 0 => Exit, 228 1 => Reset, 229 2 => Api, 230 3 => ActivateVirtioDevices, 231 4 => Debug, 232 _ => Unknown, 233 } 234 } 235 } 236 237 pub struct EpollContext { 238 epoll_file: File, 239 } 240 241 impl EpollContext { 242 pub fn new() -> result::Result<EpollContext, io::Error> { 243 let epoll_fd = epoll::create(true)?; 244 // Use 'File' to enforce closing on 'epoll_fd' 245 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 246 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 247 248 Ok(EpollContext { epoll_file }) 249 } 250 251 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 252 where 253 T: AsRawFd, 254 { 255 let dispatch_index = token as u64; 256 epoll::ctl( 257 self.epoll_file.as_raw_fd(), 258 epoll::ControlOptions::EPOLL_CTL_ADD, 259 fd.as_raw_fd(), 260 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 261 )?; 262 263 Ok(()) 264 } 265 266 #[cfg(fuzzing)] 267 pub fn add_event_custom<T>( 268 &mut self, 269 fd: &T, 270 id: u64, 271 evts: epoll::Events, 272 ) -> result::Result<(), io::Error> 273 where 274 T: AsRawFd, 275 { 276 epoll::ctl( 277 self.epoll_file.as_raw_fd(), 278 epoll::ControlOptions::EPOLL_CTL_ADD, 279 fd.as_raw_fd(), 280 epoll::Event::new(evts, id), 281 )?; 282 283 Ok(()) 284 } 285 } 286 287 impl AsRawFd for EpollContext { 288 fn as_raw_fd(&self) -> RawFd { 289 self.epoll_file.as_raw_fd() 290 } 291 } 292 293 pub struct PciDeviceInfo { 294 pub id: String, 295 pub bdf: PciBdf, 296 } 297 298 impl Serialize for PciDeviceInfo { 299 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 300 where 301 S: Serializer, 302 { 303 let bdf_str = self.bdf.to_string(); 304 305 // Serialize the structure. 306 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 307 state.serialize_field("id", &self.id)?; 308 state.serialize_field("bdf", &bdf_str)?; 309 state.end() 310 } 311 } 312 313 pub fn feature_list() -> Vec<String> { 314 vec![ 315 #[cfg(feature = "dbus_api")] 316 "dbus_api".to_string(), 317 #[cfg(feature = "dhat-heap")] 318 "dhat-heap".to_string(), 319 #[cfg(feature = "guest_debug")] 320 "guest_debug".to_string(), 321 #[cfg(feature = "igvm")] 322 "igvm".to_string(), 323 #[cfg(feature = "io_uring")] 324 "io_uring".to_string(), 325 #[cfg(feature = "kvm")] 326 "kvm".to_string(), 327 #[cfg(feature = "mshv")] 328 "mshv".to_string(), 329 #[cfg(feature = "sev_snp")] 330 "sev_snp".to_string(), 331 #[cfg(feature = "tdx")] 332 "tdx".to_string(), 333 #[cfg(feature = "tracing")] 334 "tracing".to_string(), 335 ] 336 } 337 338 pub fn start_event_monitor_thread( 339 mut monitor: event_monitor::Monitor, 340 seccomp_action: &SeccompAction, 341 landlock_enable: bool, 342 hypervisor_type: hypervisor::HypervisorType, 343 exit_event: EventFd, 344 ) -> Result<thread::JoinHandle<Result<()>>> { 345 // Retrieve seccomp filter 346 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 347 .map_err(Error::CreateSeccompFilter)?; 348 349 thread::Builder::new() 350 .name("event-monitor".to_owned()) 351 .spawn(move || { 352 // Apply seccomp filter 353 if !seccomp_filter.is_empty() { 354 apply_filter(&seccomp_filter) 355 .map_err(Error::ApplySeccompFilter) 356 .map_err(|e| { 357 error!("Error applying seccomp filter: {:?}", e); 358 exit_event.write(1).ok(); 359 e 360 })?; 361 } 362 if landlock_enable { 363 Landlock::new() 364 .map_err(Error::CreateLandlock)? 365 .restrict_self() 366 .map_err(Error::ApplyLandlock) 367 .map_err(|e| { 368 error!("Error applying landlock to event monitor thread: {:?}", e); 369 exit_event.write(1).ok(); 370 e 371 })?; 372 } 373 374 std::panic::catch_unwind(AssertUnwindSafe(move || { 375 while let Ok(event) = monitor.rx.recv() { 376 let event = Arc::new(event); 377 378 if let Some(ref mut file) = monitor.file { 379 file.write_all(event.as_bytes().as_ref()).ok(); 380 file.write_all(b"\n\n").ok(); 381 } 382 383 for tx in monitor.broadcast.iter() { 384 tx.send(event.clone()).ok(); 385 } 386 } 387 })) 388 .map_err(|_| { 389 error!("`event-monitor` thread panicked"); 390 exit_event.write(1).ok(); 391 }) 392 .ok(); 393 394 Ok(()) 395 }) 396 .map_err(Error::EventMonitorThreadSpawn) 397 } 398 399 #[allow(unused_variables)] 400 #[allow(clippy::too_many_arguments)] 401 pub fn start_vmm_thread( 402 vmm_version: VmmVersionInfo, 403 http_path: &Option<String>, 404 http_fd: Option<RawFd>, 405 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 406 api_event: EventFd, 407 api_sender: Sender<ApiRequest>, 408 api_receiver: Receiver<ApiRequest>, 409 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 410 #[cfg(feature = "guest_debug")] debug_event: EventFd, 411 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 412 exit_event: EventFd, 413 seccomp_action: &SeccompAction, 414 hypervisor: Arc<dyn hypervisor::Hypervisor>, 415 landlock_enable: bool, 416 ) -> Result<VmmThreadHandle> { 417 #[cfg(feature = "guest_debug")] 418 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 419 #[cfg(feature = "guest_debug")] 420 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 421 #[cfg(feature = "guest_debug")] 422 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 423 #[cfg(feature = "guest_debug")] 424 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 425 426 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 427 let hypervisor_type = hypervisor.hypervisor_type(); 428 429 // Retrieve seccomp filter 430 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 431 .map_err(Error::CreateSeccompFilter)?; 432 433 let vmm_seccomp_action = seccomp_action.clone(); 434 let thread = { 435 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 436 thread::Builder::new() 437 .name("vmm".to_string()) 438 .spawn(move || { 439 // Apply seccomp filter for VMM thread. 440 if !vmm_seccomp_filter.is_empty() { 441 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 442 } 443 444 let mut vmm = Vmm::new( 445 vmm_version, 446 api_event, 447 #[cfg(feature = "guest_debug")] 448 debug_event, 449 #[cfg(feature = "guest_debug")] 450 vm_debug_event, 451 vmm_seccomp_action, 452 hypervisor, 453 exit_event, 454 )?; 455 456 vmm.setup_signal_handler(landlock_enable)?; 457 458 vmm.control_loop( 459 Rc::new(api_receiver), 460 #[cfg(feature = "guest_debug")] 461 Rc::new(gdb_receiver), 462 ) 463 }) 464 .map_err(Error::VmmThreadSpawn)? 465 }; 466 467 // The VMM thread is started, we can start the dbus thread 468 // and start serving HTTP requests 469 #[cfg(feature = "dbus_api")] 470 let dbus_shutdown_chs = match dbus_options { 471 Some(opts) => { 472 let (_, chs) = api::start_dbus_thread( 473 opts, 474 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 475 api_sender.clone(), 476 seccomp_action, 477 exit_event.try_clone().map_err(Error::EventFdClone)?, 478 hypervisor_type, 479 )?; 480 Some(chs) 481 } 482 None => None, 483 }; 484 485 let http_api_handle = if let Some(http_path) = http_path { 486 Some(api::start_http_path_thread( 487 http_path, 488 api_event_clone, 489 api_sender, 490 seccomp_action, 491 exit_event, 492 hypervisor_type, 493 landlock_enable, 494 )?) 495 } else if let Some(http_fd) = http_fd { 496 Some(api::start_http_fd_thread( 497 http_fd, 498 api_event_clone, 499 api_sender, 500 seccomp_action, 501 exit_event, 502 hypervisor_type, 503 landlock_enable, 504 )?) 505 } else { 506 None 507 }; 508 509 #[cfg(feature = "guest_debug")] 510 if let Some(debug_path) = debug_path { 511 let target = gdb::GdbStub::new( 512 gdb_sender, 513 gdb_debug_event, 514 gdb_vm_debug_event, 515 gdb_hw_breakpoints, 516 ); 517 thread::Builder::new() 518 .name("gdb".to_owned()) 519 .spawn(move || gdb::gdb_thread(target, &debug_path)) 520 .map_err(Error::GdbThreadSpawn)?; 521 } 522 523 Ok(VmmThreadHandle { 524 thread_handle: thread, 525 #[cfg(feature = "dbus_api")] 526 dbus_shutdown_chs, 527 http_api_handle, 528 }) 529 } 530 531 #[derive(Clone, Deserialize, Serialize)] 532 struct VmMigrationConfig { 533 vm_config: Arc<Mutex<VmConfig>>, 534 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 535 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 536 memory_manager_data: MemoryManagerSnapshotData, 537 } 538 539 #[derive(Debug, Clone)] 540 pub struct VmmVersionInfo { 541 pub build_version: String, 542 pub version: String, 543 } 544 545 impl VmmVersionInfo { 546 pub fn new(build_version: &str, version: &str) -> Self { 547 Self { 548 build_version: build_version.to_owned(), 549 version: version.to_owned(), 550 } 551 } 552 } 553 554 pub struct VmmThreadHandle { 555 pub thread_handle: thread::JoinHandle<Result<()>>, 556 #[cfg(feature = "dbus_api")] 557 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 558 pub http_api_handle: Option<HttpApiHandle>, 559 } 560 561 pub struct Vmm { 562 epoll: EpollContext, 563 exit_evt: EventFd, 564 reset_evt: EventFd, 565 api_evt: EventFd, 566 #[cfg(feature = "guest_debug")] 567 debug_evt: EventFd, 568 #[cfg(feature = "guest_debug")] 569 vm_debug_evt: EventFd, 570 version: VmmVersionInfo, 571 vm: Option<Vm>, 572 vm_config: Option<Arc<Mutex<VmConfig>>>, 573 seccomp_action: SeccompAction, 574 hypervisor: Arc<dyn hypervisor::Hypervisor>, 575 activate_evt: EventFd, 576 signals: Option<Handle>, 577 threads: Vec<thread::JoinHandle<()>>, 578 original_termios_opt: Arc<Mutex<Option<termios>>>, 579 console_resize_pipe: Option<Arc<File>>, 580 console_info: Option<ConsoleInfo>, 581 } 582 583 impl Vmm { 584 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 585 586 fn signal_handler( 587 mut signals: Signals, 588 original_termios_opt: Arc<Mutex<Option<termios>>>, 589 exit_evt: &EventFd, 590 ) { 591 for sig in &Self::HANDLED_SIGNALS { 592 unblock_signal(*sig).unwrap(); 593 } 594 595 for signal in signals.forever() { 596 match signal { 597 SIGTERM | SIGINT => { 598 if exit_evt.write(1).is_err() { 599 // Resetting the terminal is usually done as the VMM exits 600 if let Ok(lock) = original_termios_opt.lock() { 601 if let Some(termios) = *lock { 602 // SAFETY: FFI call 603 let _ = unsafe { 604 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 605 }; 606 } 607 } else { 608 warn!("Failed to lock original termios"); 609 } 610 611 std::process::exit(1); 612 } 613 } 614 _ => (), 615 } 616 } 617 } 618 619 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 620 let signals = Signals::new(Self::HANDLED_SIGNALS); 621 match signals { 622 Ok(signals) => { 623 self.signals = Some(signals.handle()); 624 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 625 let original_termios_opt = Arc::clone(&self.original_termios_opt); 626 627 let signal_handler_seccomp_filter = get_seccomp_filter( 628 &self.seccomp_action, 629 Thread::SignalHandler, 630 self.hypervisor.hypervisor_type(), 631 ) 632 .map_err(Error::CreateSeccompFilter)?; 633 self.threads.push( 634 thread::Builder::new() 635 .name("vmm_signal_handler".to_string()) 636 .spawn(move || { 637 if !signal_handler_seccomp_filter.is_empty() { 638 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 639 .map_err(Error::ApplySeccompFilter) 640 { 641 error!("Error applying seccomp filter: {:?}", e); 642 exit_evt.write(1).ok(); 643 return; 644 } 645 } 646 if landlock_enable{ 647 match Landlock::new() { 648 Ok(landlock) => { 649 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 650 error!("Error applying Landlock to signal handler thread: {:?}", e); 651 exit_evt.write(1).ok(); 652 }); 653 } 654 Err(e) => { 655 error!("Error creating Landlock object: {:?}", e); 656 exit_evt.write(1).ok(); 657 } 658 }; 659 } 660 661 std::panic::catch_unwind(AssertUnwindSafe(|| { 662 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 663 })) 664 .map_err(|_| { 665 error!("vmm signal_handler thread panicked"); 666 exit_evt.write(1).ok() 667 }) 668 .ok(); 669 }) 670 .map_err(Error::SignalHandlerSpawn)?, 671 ); 672 } 673 Err(e) => error!("Signal not found {}", e), 674 } 675 Ok(()) 676 } 677 678 #[allow(clippy::too_many_arguments)] 679 fn new( 680 vmm_version: VmmVersionInfo, 681 api_evt: EventFd, 682 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 683 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 684 seccomp_action: SeccompAction, 685 hypervisor: Arc<dyn hypervisor::Hypervisor>, 686 exit_evt: EventFd, 687 ) -> Result<Self> { 688 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 689 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 690 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 691 692 epoll 693 .add_event(&exit_evt, EpollDispatch::Exit) 694 .map_err(Error::Epoll)?; 695 696 epoll 697 .add_event(&reset_evt, EpollDispatch::Reset) 698 .map_err(Error::Epoll)?; 699 700 epoll 701 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 702 .map_err(Error::Epoll)?; 703 704 epoll 705 .add_event(&api_evt, EpollDispatch::Api) 706 .map_err(Error::Epoll)?; 707 708 #[cfg(feature = "guest_debug")] 709 epoll 710 .add_event(&debug_evt, EpollDispatch::Debug) 711 .map_err(Error::Epoll)?; 712 713 Ok(Vmm { 714 epoll, 715 exit_evt, 716 reset_evt, 717 api_evt, 718 #[cfg(feature = "guest_debug")] 719 debug_evt, 720 #[cfg(feature = "guest_debug")] 721 vm_debug_evt, 722 version: vmm_version, 723 vm: None, 724 vm_config: None, 725 seccomp_action, 726 hypervisor, 727 activate_evt, 728 signals: None, 729 threads: vec![], 730 original_termios_opt: Arc::new(Mutex::new(None)), 731 console_resize_pipe: None, 732 console_info: None, 733 }) 734 } 735 736 fn vm_receive_config<T>( 737 &mut self, 738 req: &Request, 739 socket: &mut T, 740 existing_memory_files: Option<HashMap<u32, File>>, 741 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 742 where 743 T: Read + Write, 744 { 745 // Read in config data along with memory manager data 746 let mut data: Vec<u8> = Vec::new(); 747 data.resize_with(req.length() as usize, Default::default); 748 socket 749 .read_exact(&mut data) 750 .map_err(MigratableError::MigrateSocket)?; 751 752 let vm_migration_config: VmMigrationConfig = 753 serde_json::from_slice(&data).map_err(|e| { 754 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 755 })?; 756 757 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 758 self.vm_check_cpuid_compatibility( 759 &vm_migration_config.vm_config, 760 &vm_migration_config.common_cpuid, 761 )?; 762 763 let config = vm_migration_config.vm_config.clone(); 764 self.vm_config = Some(vm_migration_config.vm_config); 765 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 766 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 767 })?); 768 769 if self 770 .vm_config 771 .as_ref() 772 .unwrap() 773 .lock() 774 .unwrap() 775 .landlock_enable 776 { 777 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 778 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 779 })?; 780 } 781 782 let vm = Vm::create_hypervisor_vm( 783 &self.hypervisor, 784 #[cfg(feature = "tdx")] 785 false, 786 #[cfg(feature = "sev_snp")] 787 false, 788 ) 789 .map_err(|e| { 790 MigratableError::MigrateReceive(anyhow!( 791 "Error creating hypervisor VM from snapshot: {:?}", 792 e 793 )) 794 })?; 795 796 let phys_bits = 797 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 798 799 let memory_manager = MemoryManager::new( 800 vm, 801 &config.lock().unwrap().memory.clone(), 802 None, 803 phys_bits, 804 #[cfg(feature = "tdx")] 805 false, 806 Some(&vm_migration_config.memory_manager_data), 807 existing_memory_files, 808 #[cfg(target_arch = "x86_64")] 809 None, 810 ) 811 .map_err(|e| { 812 MigratableError::MigrateReceive(anyhow!( 813 "Error creating MemoryManager from snapshot: {:?}", 814 e 815 )) 816 })?; 817 818 Response::ok().write_to(socket)?; 819 820 Ok(memory_manager) 821 } 822 823 fn vm_receive_state<T>( 824 &mut self, 825 req: &Request, 826 socket: &mut T, 827 mm: Arc<Mutex<MemoryManager>>, 828 ) -> std::result::Result<(), MigratableError> 829 where 830 T: Read + Write, 831 { 832 // Read in state data 833 let mut data: Vec<u8> = Vec::new(); 834 data.resize_with(req.length() as usize, Default::default); 835 socket 836 .read_exact(&mut data) 837 .map_err(MigratableError::MigrateSocket)?; 838 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 839 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 840 })?; 841 842 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 843 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 844 })?; 845 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 846 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 847 })?; 848 #[cfg(feature = "guest_debug")] 849 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 850 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 851 })?; 852 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 853 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 854 })?; 855 856 let timestamp = Instant::now(); 857 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 858 let mut vm = Vm::new_from_memory_manager( 859 self.vm_config.clone().unwrap(), 860 mm, 861 hypervisor_vm, 862 exit_evt, 863 reset_evt, 864 #[cfg(feature = "guest_debug")] 865 debug_evt, 866 &self.seccomp_action, 867 self.hypervisor.clone(), 868 activate_evt, 869 timestamp, 870 self.console_info.clone(), 871 self.console_resize_pipe.as_ref().map(Arc::clone), 872 Arc::clone(&self.original_termios_opt), 873 Some(snapshot), 874 ) 875 .map_err(|e| { 876 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 877 })?; 878 879 // Create VM 880 vm.restore().map_err(|e| { 881 Response::error().write_to(socket).ok(); 882 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 883 })?; 884 self.vm = Some(vm); 885 886 Response::ok().write_to(socket)?; 887 888 Ok(()) 889 } 890 891 fn vm_receive_memory<T>( 892 &mut self, 893 req: &Request, 894 socket: &mut T, 895 memory_manager: &mut MemoryManager, 896 ) -> std::result::Result<(), MigratableError> 897 where 898 T: Read + ReadVolatile + Write, 899 { 900 // Read table 901 let table = MemoryRangeTable::read_from(socket, req.length())?; 902 903 // And then read the memory itself 904 memory_manager 905 .receive_memory_regions(&table, socket) 906 .inspect_err(|_| { 907 Response::error().write_to(socket).ok(); 908 })?; 909 Response::ok().write_to(socket)?; 910 Ok(()) 911 } 912 913 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 914 url.strip_prefix("unix:") 915 .ok_or_else(|| { 916 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 917 }) 918 .map(|s| s.into()) 919 } 920 921 // Returns true if there were dirty pages to send 922 fn vm_maybe_send_dirty_pages<T>( 923 vm: &mut Vm, 924 socket: &mut T, 925 ) -> result::Result<bool, MigratableError> 926 where 927 T: Read + Write + WriteVolatile, 928 { 929 // Send (dirty) memory table 930 let table = vm.dirty_log()?; 931 932 // But if there are no regions go straight to pause 933 if table.regions().is_empty() { 934 return Ok(false); 935 } 936 937 Request::memory(table.length()).write_to(socket).unwrap(); 938 table.write_to(socket)?; 939 // And then the memory itself 940 vm.send_memory_regions(&table, socket)?; 941 Response::read_from(socket)?.ok_or_abandon( 942 socket, 943 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 944 )?; 945 946 Ok(true) 947 } 948 949 fn send_migration( 950 vm: &mut Vm, 951 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 952 dyn hypervisor::Hypervisor, 953 >, 954 send_data_migration: VmSendMigrationData, 955 ) -> result::Result<(), MigratableError> { 956 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 957 let mut socket = UnixStream::connect(path).map_err(|e| { 958 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 959 })?; 960 961 // Start the migration 962 Request::start().write_to(&mut socket)?; 963 Response::read_from(&mut socket)?.ok_or_abandon( 964 &mut socket, 965 MigratableError::MigrateSend(anyhow!("Error starting migration")), 966 )?; 967 968 // Send config 969 let vm_config = vm.get_config(); 970 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 971 let common_cpuid = { 972 #[cfg(feature = "tdx")] 973 if vm_config.lock().unwrap().is_tdx_enabled() { 974 return Err(MigratableError::MigrateSend(anyhow!( 975 "Live Migration is not supported when TDX is enabled" 976 ))); 977 }; 978 979 let amx = vm_config.lock().unwrap().cpus.features.amx; 980 let phys_bits = 981 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 982 arch::generate_common_cpuid( 983 &hypervisor, 984 &arch::CpuidConfig { 985 sgx_epc_sections: None, 986 phys_bits, 987 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 988 #[cfg(feature = "tdx")] 989 tdx: false, 990 amx, 991 }, 992 ) 993 .map_err(|e| { 994 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 995 })? 996 }; 997 998 if send_data_migration.local { 999 vm.send_memory_fds(&mut socket)?; 1000 } 1001 1002 let vm_migration_config = VmMigrationConfig { 1003 vm_config, 1004 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1005 common_cpuid, 1006 memory_manager_data: vm.memory_manager_data(), 1007 }; 1008 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1009 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1010 socket 1011 .write_all(&config_data) 1012 .map_err(MigratableError::MigrateSocket)?; 1013 Response::read_from(&mut socket)?.ok_or_abandon( 1014 &mut socket, 1015 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1016 )?; 1017 1018 // Let every Migratable object know about the migration being started. 1019 vm.start_migration()?; 1020 1021 if send_data_migration.local { 1022 // Now pause VM 1023 vm.pause()?; 1024 } else { 1025 // Start logging dirty pages 1026 vm.start_dirty_log()?; 1027 1028 // Send memory table 1029 let table = vm.memory_range_table()?; 1030 Request::memory(table.length()) 1031 .write_to(&mut socket) 1032 .unwrap(); 1033 table.write_to(&mut socket)?; 1034 // And then the memory itself 1035 vm.send_memory_regions(&table, &mut socket)?; 1036 Response::read_from(&mut socket)?.ok_or_abandon( 1037 &mut socket, 1038 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1039 )?; 1040 1041 // Try at most 5 passes of dirty memory sending 1042 const MAX_DIRTY_MIGRATIONS: usize = 5; 1043 for i in 0..MAX_DIRTY_MIGRATIONS { 1044 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1045 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1046 break; 1047 } 1048 } 1049 1050 // Now pause VM 1051 vm.pause()?; 1052 1053 // Send last batch of dirty pages 1054 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1055 1056 // Stop logging dirty pages 1057 vm.stop_dirty_log()?; 1058 } 1059 // Capture snapshot and send it 1060 let vm_snapshot = vm.snapshot()?; 1061 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1062 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1063 socket 1064 .write_all(&snapshot_data) 1065 .map_err(MigratableError::MigrateSocket)?; 1066 Response::read_from(&mut socket)?.ok_or_abandon( 1067 &mut socket, 1068 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1069 )?; 1070 // Complete the migration 1071 Request::complete().write_to(&mut socket)?; 1072 Response::read_from(&mut socket)?.ok_or_abandon( 1073 &mut socket, 1074 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1075 )?; 1076 1077 info!("Migration complete"); 1078 1079 // Let every Migratable object know about the migration being complete 1080 vm.complete_migration() 1081 } 1082 1083 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1084 fn vm_check_cpuid_compatibility( 1085 &self, 1086 src_vm_config: &Arc<Mutex<VmConfig>>, 1087 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1088 ) -> result::Result<(), MigratableError> { 1089 #[cfg(feature = "tdx")] 1090 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1091 return Err(MigratableError::MigrateReceive(anyhow!( 1092 "Live Migration is not supported when TDX is enabled" 1093 ))); 1094 }; 1095 1096 // We check the `CPUID` compatibility of between the source vm and destination, which is 1097 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1098 let dest_cpuid = &{ 1099 let vm_config = &src_vm_config.lock().unwrap(); 1100 1101 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1102 arch::generate_common_cpuid( 1103 &self.hypervisor.clone(), 1104 &arch::CpuidConfig { 1105 sgx_epc_sections: None, 1106 phys_bits, 1107 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1108 #[cfg(feature = "tdx")] 1109 tdx: false, 1110 amx: vm_config.cpus.features.amx, 1111 }, 1112 ) 1113 .map_err(|e| { 1114 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1115 })? 1116 }; 1117 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1118 MigratableError::MigrateReceive(anyhow!( 1119 "Error checking cpu feature compatibility': {:?}", 1120 e 1121 )) 1122 }) 1123 } 1124 1125 fn control_loop( 1126 &mut self, 1127 api_receiver: Rc<Receiver<ApiRequest>>, 1128 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1129 ) -> Result<()> { 1130 const EPOLL_EVENTS_LEN: usize = 100; 1131 1132 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1133 let epoll_fd = self.epoll.as_raw_fd(); 1134 1135 'outer: loop { 1136 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1137 Ok(res) => res, 1138 Err(e) => { 1139 if e.kind() == io::ErrorKind::Interrupted { 1140 // It's well defined from the epoll_wait() syscall 1141 // documentation that the epoll loop can be interrupted 1142 // before any of the requested events occurred or the 1143 // timeout expired. In both those cases, epoll_wait() 1144 // returns an error of type EINTR, but this should not 1145 // be considered as a regular error. Instead it is more 1146 // appropriate to retry, by calling into epoll_wait(). 1147 continue; 1148 } 1149 return Err(Error::Epoll(e)); 1150 } 1151 }; 1152 1153 for event in events.iter().take(num_events) { 1154 let dispatch_event: EpollDispatch = event.data.into(); 1155 match dispatch_event { 1156 EpollDispatch::Unknown => { 1157 let event = event.data; 1158 warn!("Unknown VMM loop event: {}", event); 1159 } 1160 EpollDispatch::Exit => { 1161 info!("VM exit event"); 1162 // Consume the event. 1163 self.exit_evt.read().map_err(Error::EventFdRead)?; 1164 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1165 1166 break 'outer; 1167 } 1168 EpollDispatch::Reset => { 1169 info!("VM reset event"); 1170 // Consume the event. 1171 self.reset_evt.read().map_err(Error::EventFdRead)?; 1172 self.vm_reboot().map_err(Error::VmReboot)?; 1173 } 1174 EpollDispatch::ActivateVirtioDevices => { 1175 if let Some(ref vm) = self.vm { 1176 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1177 info!( 1178 "Trying to activate pending virtio devices: count = {}", 1179 count 1180 ); 1181 vm.activate_virtio_devices() 1182 .map_err(Error::ActivateVirtioDevices)?; 1183 } 1184 } 1185 EpollDispatch::Api => { 1186 // Consume the events. 1187 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1188 // Read from the API receiver channel 1189 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1190 1191 if api_request(self)? { 1192 break 'outer; 1193 } 1194 } 1195 } 1196 #[cfg(feature = "guest_debug")] 1197 EpollDispatch::Debug => { 1198 // Consume the events. 1199 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1200 // Read from the API receiver channel 1201 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1202 1203 let response = if let Some(ref mut vm) = self.vm { 1204 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1205 } else { 1206 Err(VmError::VmNotRunning) 1207 } 1208 .map_err(gdb::Error::Vm); 1209 1210 gdb_request 1211 .sender 1212 .send(response) 1213 .map_err(Error::GdbResponseSend)?; 1214 } 1215 } 1216 #[cfg(not(feature = "guest_debug"))] 1217 EpollDispatch::Debug => {} 1218 } 1219 } 1220 } 1221 1222 // Trigger the termination of the signal_handler thread 1223 if let Some(signals) = self.signals.take() { 1224 signals.close(); 1225 } 1226 1227 // Wait for all the threads to finish 1228 for thread in self.threads.drain(..) { 1229 thread.join().map_err(Error::ThreadCleanup)? 1230 } 1231 1232 Ok(()) 1233 } 1234 } 1235 1236 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1237 vm_config.lock().unwrap().apply_landlock()?; 1238 Ok(()) 1239 } 1240 1241 impl RequestHandler for Vmm { 1242 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1243 // We only store the passed VM config. 1244 // The VM will be created when being asked to boot it. 1245 if self.vm_config.is_none() { 1246 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1247 self.console_info = 1248 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1249 1250 if self 1251 .vm_config 1252 .as_ref() 1253 .unwrap() 1254 .lock() 1255 .unwrap() 1256 .landlock_enable 1257 { 1258 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1259 .map_err(VmError::ApplyLandlock)?; 1260 } 1261 Ok(()) 1262 } else { 1263 Err(VmError::VmAlreadyCreated) 1264 } 1265 } 1266 1267 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1268 tracer::start(); 1269 info!("Booting VM"); 1270 event!("vm", "booting"); 1271 let r = { 1272 trace_scoped!("vm_boot"); 1273 // If we don't have a config, we cannot boot a VM. 1274 if self.vm_config.is_none() { 1275 return Err(VmError::VmMissingConfig); 1276 }; 1277 1278 // console_info is set to None in vm_shutdown. re-populate here if empty 1279 if self.console_info.is_none() { 1280 self.console_info = 1281 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1282 } 1283 1284 // Create a new VM if we don't have one yet. 1285 if self.vm.is_none() { 1286 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1287 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1288 #[cfg(feature = "guest_debug")] 1289 let vm_debug_evt = self 1290 .vm_debug_evt 1291 .try_clone() 1292 .map_err(VmError::EventFdClone)?; 1293 let activate_evt = self 1294 .activate_evt 1295 .try_clone() 1296 .map_err(VmError::EventFdClone)?; 1297 1298 if let Some(ref vm_config) = self.vm_config { 1299 let vm = Vm::new( 1300 Arc::clone(vm_config), 1301 exit_evt, 1302 reset_evt, 1303 #[cfg(feature = "guest_debug")] 1304 vm_debug_evt, 1305 &self.seccomp_action, 1306 self.hypervisor.clone(), 1307 activate_evt, 1308 self.console_info.clone(), 1309 self.console_resize_pipe.as_ref().map(Arc::clone), 1310 Arc::clone(&self.original_termios_opt), 1311 None, 1312 None, 1313 None, 1314 )?; 1315 1316 self.vm = Some(vm); 1317 } 1318 } 1319 1320 // Now we can boot the VM. 1321 if let Some(ref mut vm) = self.vm { 1322 vm.boot() 1323 } else { 1324 Err(VmError::VmNotCreated) 1325 } 1326 }; 1327 tracer::end(); 1328 if r.is_ok() { 1329 event!("vm", "booted"); 1330 } 1331 r 1332 } 1333 1334 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1335 if let Some(ref mut vm) = self.vm { 1336 vm.pause().map_err(VmError::Pause) 1337 } else { 1338 Err(VmError::VmNotRunning) 1339 } 1340 } 1341 1342 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1343 if let Some(ref mut vm) = self.vm { 1344 vm.resume().map_err(VmError::Resume) 1345 } else { 1346 Err(VmError::VmNotRunning) 1347 } 1348 } 1349 1350 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1351 if let Some(ref mut vm) = self.vm { 1352 // Drain console_info so that FDs are not reused 1353 let _ = self.console_info.take(); 1354 vm.snapshot() 1355 .map_err(VmError::Snapshot) 1356 .and_then(|snapshot| { 1357 vm.send(&snapshot, destination_url) 1358 .map_err(VmError::SnapshotSend) 1359 }) 1360 } else { 1361 Err(VmError::VmNotRunning) 1362 } 1363 } 1364 1365 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1366 if self.vm.is_some() || self.vm_config.is_some() { 1367 return Err(VmError::VmAlreadyCreated); 1368 } 1369 1370 let source_url = restore_cfg.source_url.as_path().to_str(); 1371 if source_url.is_none() { 1372 return Err(VmError::InvalidRestoreSourceUrl); 1373 } 1374 // Safe to unwrap as we checked it was Some(&str). 1375 let source_url = source_url.unwrap(); 1376 1377 let vm_config = Arc::new(Mutex::new( 1378 recv_vm_config(source_url).map_err(VmError::Restore)?, 1379 )); 1380 restore_cfg 1381 .validate(&vm_config.lock().unwrap().clone()) 1382 .map_err(VmError::ConfigValidation)?; 1383 1384 // Update VM's net configurations with new fds received for restore operation 1385 if let (Some(restored_nets), Some(vm_net_configs)) = 1386 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1387 { 1388 for net in restored_nets.iter() { 1389 for net_config in vm_net_configs.iter_mut() { 1390 // update only if the net dev is backed by FDs 1391 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1392 net_config.fds.clone_from(&net.fds); 1393 } 1394 } 1395 } 1396 } 1397 1398 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1399 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1400 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1401 1402 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1403 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1404 .map_err(VmError::Restore)?; 1405 1406 self.vm_config = Some(Arc::clone(&vm_config)); 1407 1408 // console_info is set to None in vm_snapshot. re-populate here if empty 1409 if self.console_info.is_none() { 1410 self.console_info = 1411 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1412 } 1413 1414 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1415 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1416 #[cfg(feature = "guest_debug")] 1417 let debug_evt = self 1418 .vm_debug_evt 1419 .try_clone() 1420 .map_err(VmError::EventFdClone)?; 1421 let activate_evt = self 1422 .activate_evt 1423 .try_clone() 1424 .map_err(VmError::EventFdClone)?; 1425 1426 let vm = Vm::new( 1427 vm_config, 1428 exit_evt, 1429 reset_evt, 1430 #[cfg(feature = "guest_debug")] 1431 debug_evt, 1432 &self.seccomp_action, 1433 self.hypervisor.clone(), 1434 activate_evt, 1435 self.console_info.clone(), 1436 self.console_resize_pipe.as_ref().map(Arc::clone), 1437 Arc::clone(&self.original_termios_opt), 1438 Some(snapshot), 1439 Some(source_url), 1440 Some(restore_cfg.prefault), 1441 )?; 1442 self.vm = Some(vm); 1443 1444 if self 1445 .vm_config 1446 .as_ref() 1447 .unwrap() 1448 .lock() 1449 .unwrap() 1450 .landlock_enable 1451 { 1452 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1453 .map_err(VmError::ApplyLandlock)?; 1454 } 1455 1456 // Now we can restore the rest of the VM. 1457 if let Some(ref mut vm) = self.vm { 1458 vm.restore() 1459 } else { 1460 Err(VmError::VmNotCreated) 1461 } 1462 } 1463 1464 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1465 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1466 if let Some(ref mut vm) = self.vm { 1467 vm.coredump(destination_url).map_err(VmError::Coredump) 1468 } else { 1469 Err(VmError::VmNotRunning) 1470 } 1471 } 1472 1473 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1474 let r = if let Some(ref mut vm) = self.vm.take() { 1475 // Drain console_info so that the FDs are not reused 1476 let _ = self.console_info.take(); 1477 vm.shutdown() 1478 } else { 1479 Err(VmError::VmNotRunning) 1480 }; 1481 1482 if r.is_ok() { 1483 event!("vm", "shutdown"); 1484 } 1485 1486 r 1487 } 1488 1489 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1490 event!("vm", "rebooting"); 1491 1492 // First we stop the current VM 1493 let config = if let Some(mut vm) = self.vm.take() { 1494 let config = vm.get_config(); 1495 vm.shutdown()?; 1496 config 1497 } else { 1498 return Err(VmError::VmNotCreated); 1499 }; 1500 1501 // vm.shutdown() closes all the console devices, so set console_info to None 1502 // so that the closed FD #s are not reused. 1503 let _ = self.console_info.take(); 1504 1505 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1506 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1507 #[cfg(feature = "guest_debug")] 1508 let debug_evt = self 1509 .vm_debug_evt 1510 .try_clone() 1511 .map_err(VmError::EventFdClone)?; 1512 let activate_evt = self 1513 .activate_evt 1514 .try_clone() 1515 .map_err(VmError::EventFdClone)?; 1516 1517 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1518 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1519 // during the boot process. 1520 if self.reset_evt.read().is_ok() { 1521 warn!("Spurious second reset event received. Ignoring."); 1522 } 1523 1524 self.console_info = 1525 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1526 1527 // Then we create the new VM 1528 let mut vm = Vm::new( 1529 config, 1530 exit_evt, 1531 reset_evt, 1532 #[cfg(feature = "guest_debug")] 1533 debug_evt, 1534 &self.seccomp_action, 1535 self.hypervisor.clone(), 1536 activate_evt, 1537 self.console_info.clone(), 1538 self.console_resize_pipe.as_ref().map(Arc::clone), 1539 Arc::clone(&self.original_termios_opt), 1540 None, 1541 None, 1542 None, 1543 )?; 1544 1545 // And we boot it 1546 vm.boot()?; 1547 1548 self.vm = Some(vm); 1549 1550 event!("vm", "rebooted"); 1551 1552 Ok(()) 1553 } 1554 1555 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1556 match &self.vm_config { 1557 Some(vm_config) => { 1558 let state = match &self.vm { 1559 Some(vm) => vm.get_state()?, 1560 None => VmState::Created, 1561 }; 1562 let config = vm_config.lock().unwrap().clone(); 1563 1564 let mut memory_actual_size = config.memory.total_size(); 1565 if let Some(vm) = &self.vm { 1566 memory_actual_size -= vm.balloon_size(); 1567 } 1568 1569 let device_tree = self 1570 .vm 1571 .as_ref() 1572 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1573 1574 Ok(VmInfoResponse { 1575 config: Box::new(config), 1576 state, 1577 memory_actual_size, 1578 device_tree, 1579 }) 1580 } 1581 None => Err(VmError::VmNotCreated), 1582 } 1583 } 1584 1585 fn vmm_ping(&self) -> VmmPingResponse { 1586 let VmmVersionInfo { 1587 build_version, 1588 version, 1589 } = self.version.clone(); 1590 1591 VmmPingResponse { 1592 build_version, 1593 version, 1594 pid: std::process::id() as i64, 1595 features: feature_list(), 1596 } 1597 } 1598 1599 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1600 if self.vm_config.is_none() { 1601 return Ok(()); 1602 } 1603 1604 // If a VM is booted, we first try to shut it down. 1605 if self.vm.is_some() { 1606 self.vm_shutdown()?; 1607 } 1608 1609 self.vm_config = None; 1610 1611 event!("vm", "deleted"); 1612 1613 Ok(()) 1614 } 1615 1616 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1617 self.vm_delete()?; 1618 event!("vmm", "shutdown"); 1619 Ok(()) 1620 } 1621 1622 fn vm_resize( 1623 &mut self, 1624 desired_vcpus: Option<u8>, 1625 desired_ram: Option<u64>, 1626 desired_balloon: Option<u64>, 1627 ) -> result::Result<(), VmError> { 1628 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1629 1630 if let Some(ref mut vm) = self.vm { 1631 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1632 error!("Error when resizing VM: {:?}", e); 1633 Err(e) 1634 } else { 1635 Ok(()) 1636 } 1637 } else { 1638 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1639 if let Some(desired_vcpus) = desired_vcpus { 1640 config.cpus.boot_vcpus = desired_vcpus; 1641 } 1642 if let Some(desired_ram) = desired_ram { 1643 config.memory.size = desired_ram; 1644 } 1645 if let Some(desired_balloon) = desired_balloon { 1646 if let Some(balloon_config) = &mut config.balloon { 1647 balloon_config.size = desired_balloon; 1648 } 1649 } 1650 Ok(()) 1651 } 1652 } 1653 1654 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1655 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1656 1657 if let Some(ref mut vm) = self.vm { 1658 if let Err(e) = vm.resize_zone(id, desired_ram) { 1659 error!("Error when resizing VM: {:?}", e); 1660 Err(e) 1661 } else { 1662 Ok(()) 1663 } 1664 } else { 1665 // Update VmConfig by setting the new desired ram. 1666 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1667 1668 if let Some(zones) = &mut memory_config.zones { 1669 for zone in zones.iter_mut() { 1670 if zone.id == id { 1671 zone.size = desired_ram; 1672 return Ok(()); 1673 } 1674 } 1675 } 1676 1677 error!("Could not find the memory zone {} for the resize", id); 1678 Err(VmError::ResizeZone) 1679 } 1680 } 1681 1682 fn vm_add_device( 1683 &mut self, 1684 device_cfg: DeviceConfig, 1685 ) -> result::Result<Option<Vec<u8>>, VmError> { 1686 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1687 1688 { 1689 // Validate the configuration change in a cloned configuration 1690 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1691 add_to_config(&mut config.devices, device_cfg.clone()); 1692 config.validate().map_err(VmError::ConfigValidation)?; 1693 } 1694 1695 if let Some(ref mut vm) = self.vm { 1696 let info = vm.add_device(device_cfg).map_err(|e| { 1697 error!("Error when adding new device to the VM: {:?}", e); 1698 e 1699 })?; 1700 serde_json::to_vec(&info) 1701 .map(Some) 1702 .map_err(VmError::SerializeJson) 1703 } else { 1704 // Update VmConfig by adding the new device. 1705 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1706 add_to_config(&mut config.devices, device_cfg); 1707 Ok(None) 1708 } 1709 } 1710 1711 fn vm_add_user_device( 1712 &mut self, 1713 device_cfg: UserDeviceConfig, 1714 ) -> result::Result<Option<Vec<u8>>, VmError> { 1715 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1716 1717 { 1718 // Validate the configuration change in a cloned configuration 1719 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1720 add_to_config(&mut config.user_devices, device_cfg.clone()); 1721 config.validate().map_err(VmError::ConfigValidation)?; 1722 } 1723 1724 if let Some(ref mut vm) = self.vm { 1725 let info = vm.add_user_device(device_cfg).map_err(|e| { 1726 error!("Error when adding new user device to the VM: {:?}", e); 1727 e 1728 })?; 1729 serde_json::to_vec(&info) 1730 .map(Some) 1731 .map_err(VmError::SerializeJson) 1732 } else { 1733 // Update VmConfig by adding the new device. 1734 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1735 add_to_config(&mut config.user_devices, device_cfg); 1736 Ok(None) 1737 } 1738 } 1739 1740 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1741 if let Some(ref mut vm) = self.vm { 1742 if let Err(e) = vm.remove_device(id) { 1743 error!("Error when removing device from the VM: {:?}", e); 1744 Err(e) 1745 } else { 1746 Ok(()) 1747 } 1748 } else if let Some(ref config) = self.vm_config { 1749 let mut config = config.lock().unwrap(); 1750 if config.remove_device(&id) { 1751 Ok(()) 1752 } else { 1753 Err(VmError::NoDeviceToRemove(id)) 1754 } 1755 } else { 1756 Err(VmError::VmNotCreated) 1757 } 1758 } 1759 1760 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1761 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1762 1763 { 1764 // Validate the configuration change in a cloned configuration 1765 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1766 add_to_config(&mut config.disks, disk_cfg.clone()); 1767 config.validate().map_err(VmError::ConfigValidation)?; 1768 } 1769 1770 if let Some(ref mut vm) = self.vm { 1771 let info = vm.add_disk(disk_cfg).map_err(|e| { 1772 error!("Error when adding new disk to the VM: {:?}", e); 1773 e 1774 })?; 1775 serde_json::to_vec(&info) 1776 .map(Some) 1777 .map_err(VmError::SerializeJson) 1778 } else { 1779 // Update VmConfig by adding the new device. 1780 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1781 add_to_config(&mut config.disks, disk_cfg); 1782 Ok(None) 1783 } 1784 } 1785 1786 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1787 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1788 1789 { 1790 // Validate the configuration change in a cloned configuration 1791 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1792 add_to_config(&mut config.fs, fs_cfg.clone()); 1793 config.validate().map_err(VmError::ConfigValidation)?; 1794 } 1795 1796 if let Some(ref mut vm) = self.vm { 1797 let info = vm.add_fs(fs_cfg).map_err(|e| { 1798 error!("Error when adding new fs to the VM: {:?}", e); 1799 e 1800 })?; 1801 serde_json::to_vec(&info) 1802 .map(Some) 1803 .map_err(VmError::SerializeJson) 1804 } else { 1805 // Update VmConfig by adding the new device. 1806 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1807 add_to_config(&mut config.fs, fs_cfg); 1808 Ok(None) 1809 } 1810 } 1811 1812 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1813 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1814 1815 { 1816 // Validate the configuration change in a cloned configuration 1817 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1818 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1819 config.validate().map_err(VmError::ConfigValidation)?; 1820 } 1821 1822 if let Some(ref mut vm) = self.vm { 1823 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1824 error!("Error when adding new pmem device to the VM: {:?}", e); 1825 e 1826 })?; 1827 serde_json::to_vec(&info) 1828 .map(Some) 1829 .map_err(VmError::SerializeJson) 1830 } else { 1831 // Update VmConfig by adding the new device. 1832 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1833 add_to_config(&mut config.pmem, pmem_cfg); 1834 Ok(None) 1835 } 1836 } 1837 1838 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1839 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1840 1841 { 1842 // Validate the configuration change in a cloned configuration 1843 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1844 add_to_config(&mut config.net, net_cfg.clone()); 1845 config.validate().map_err(VmError::ConfigValidation)?; 1846 } 1847 1848 if let Some(ref mut vm) = self.vm { 1849 let info = vm.add_net(net_cfg).map_err(|e| { 1850 error!("Error when adding new network device to the VM: {:?}", e); 1851 e 1852 })?; 1853 serde_json::to_vec(&info) 1854 .map(Some) 1855 .map_err(VmError::SerializeJson) 1856 } else { 1857 // Update VmConfig by adding the new device. 1858 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1859 add_to_config(&mut config.net, net_cfg); 1860 Ok(None) 1861 } 1862 } 1863 1864 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1865 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1866 1867 { 1868 // Validate the configuration change in a cloned configuration 1869 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1870 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1871 config.validate().map_err(VmError::ConfigValidation)?; 1872 } 1873 1874 if let Some(ref mut vm) = self.vm { 1875 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1876 error!("Error when adding new vDPA device to the VM: {:?}", e); 1877 e 1878 })?; 1879 serde_json::to_vec(&info) 1880 .map(Some) 1881 .map_err(VmError::SerializeJson) 1882 } else { 1883 // Update VmConfig by adding the new device. 1884 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1885 add_to_config(&mut config.vdpa, vdpa_cfg); 1886 Ok(None) 1887 } 1888 } 1889 1890 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1891 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1892 1893 { 1894 // Validate the configuration change in a cloned configuration 1895 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1896 1897 if config.vsock.is_some() { 1898 return Err(VmError::TooManyVsockDevices); 1899 } 1900 1901 config.vsock = Some(vsock_cfg.clone()); 1902 config.validate().map_err(VmError::ConfigValidation)?; 1903 } 1904 1905 if let Some(ref mut vm) = self.vm { 1906 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1907 error!("Error when adding new vsock device to the VM: {:?}", e); 1908 e 1909 })?; 1910 serde_json::to_vec(&info) 1911 .map(Some) 1912 .map_err(VmError::SerializeJson) 1913 } else { 1914 // Update VmConfig by adding the new device. 1915 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1916 config.vsock = Some(vsock_cfg); 1917 Ok(None) 1918 } 1919 } 1920 1921 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1922 if let Some(ref mut vm) = self.vm { 1923 let info = vm.counters().map_err(|e| { 1924 error!("Error when getting counters from the VM: {:?}", e); 1925 e 1926 })?; 1927 serde_json::to_vec(&info) 1928 .map(Some) 1929 .map_err(VmError::SerializeJson) 1930 } else { 1931 Err(VmError::VmNotRunning) 1932 } 1933 } 1934 1935 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1936 if let Some(ref mut vm) = self.vm { 1937 vm.power_button() 1938 } else { 1939 Err(VmError::VmNotRunning) 1940 } 1941 } 1942 1943 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1944 if let Some(ref mut vm) = self.vm { 1945 vm.nmi() 1946 } else { 1947 Err(VmError::VmNotRunning) 1948 } 1949 } 1950 1951 fn vm_receive_migration( 1952 &mut self, 1953 receive_data_migration: VmReceiveMigrationData, 1954 ) -> result::Result<(), MigratableError> { 1955 info!( 1956 "Receiving migration: receiver_url = {}", 1957 receive_data_migration.receiver_url 1958 ); 1959 1960 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1961 let listener = UnixListener::bind(&path).map_err(|e| { 1962 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1963 })?; 1964 let (mut socket, _addr) = listener.accept().map_err(|e| { 1965 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1966 })?; 1967 std::fs::remove_file(&path).map_err(|e| { 1968 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1969 })?; 1970 1971 let mut started = false; 1972 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1973 let mut existing_memory_files = None; 1974 loop { 1975 let req = Request::read_from(&mut socket)?; 1976 match req.command() { 1977 Command::Invalid => info!("Invalid Command Received"), 1978 Command::Start => { 1979 info!("Start Command Received"); 1980 started = true; 1981 1982 Response::ok().write_to(&mut socket)?; 1983 } 1984 Command::Config => { 1985 info!("Config Command Received"); 1986 1987 if !started { 1988 warn!("Migration not started yet"); 1989 Response::error().write_to(&mut socket)?; 1990 continue; 1991 } 1992 memory_manager = Some(self.vm_receive_config( 1993 &req, 1994 &mut socket, 1995 existing_memory_files.take(), 1996 )?); 1997 } 1998 Command::State => { 1999 info!("State Command Received"); 2000 2001 if !started { 2002 warn!("Migration not started yet"); 2003 Response::error().write_to(&mut socket)?; 2004 continue; 2005 } 2006 if let Some(mm) = memory_manager.take() { 2007 self.vm_receive_state(&req, &mut socket, mm)?; 2008 } else { 2009 warn!("Configuration not sent yet"); 2010 Response::error().write_to(&mut socket)?; 2011 } 2012 } 2013 Command::Memory => { 2014 info!("Memory Command Received"); 2015 2016 if !started { 2017 warn!("Migration not started yet"); 2018 Response::error().write_to(&mut socket)?; 2019 continue; 2020 } 2021 if let Some(mm) = memory_manager.as_ref() { 2022 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2023 } else { 2024 warn!("Configuration not sent yet"); 2025 Response::error().write_to(&mut socket)?; 2026 } 2027 } 2028 Command::MemoryFd => { 2029 info!("MemoryFd Command Received"); 2030 2031 if !started { 2032 warn!("Migration not started yet"); 2033 Response::error().write_to(&mut socket)?; 2034 continue; 2035 } 2036 2037 let mut buf = [0u8; 4]; 2038 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2039 MigratableError::MigrateReceive(anyhow!( 2040 "Error receiving slot from socket: {}", 2041 e 2042 )) 2043 })?; 2044 2045 if existing_memory_files.is_none() { 2046 existing_memory_files = Some(HashMap::default()) 2047 } 2048 2049 if let Some(ref mut existing_memory_files) = existing_memory_files { 2050 let slot = u32::from_le_bytes(buf); 2051 existing_memory_files.insert(slot, file.unwrap()); 2052 } 2053 2054 Response::ok().write_to(&mut socket)?; 2055 } 2056 Command::Complete => { 2057 info!("Complete Command Received"); 2058 if let Some(ref mut vm) = self.vm.as_mut() { 2059 vm.resume()?; 2060 Response::ok().write_to(&mut socket)?; 2061 } else { 2062 warn!("VM not created yet"); 2063 Response::error().write_to(&mut socket)?; 2064 } 2065 break; 2066 } 2067 Command::Abandon => { 2068 info!("Abandon Command Received"); 2069 self.vm = None; 2070 self.vm_config = None; 2071 Response::ok().write_to(&mut socket).ok(); 2072 break; 2073 } 2074 } 2075 } 2076 2077 Ok(()) 2078 } 2079 2080 fn vm_send_migration( 2081 &mut self, 2082 send_data_migration: VmSendMigrationData, 2083 ) -> result::Result<(), MigratableError> { 2084 info!( 2085 "Sending migration: destination_url = {}, local = {}", 2086 send_data_migration.destination_url, send_data_migration.local 2087 ); 2088 2089 if !self 2090 .vm_config 2091 .as_ref() 2092 .unwrap() 2093 .lock() 2094 .unwrap() 2095 .backed_by_shared_memory() 2096 && send_data_migration.local 2097 { 2098 return Err(MigratableError::MigrateSend(anyhow!( 2099 "Local migration requires shared memory or hugepages enabled" 2100 ))); 2101 } 2102 2103 if let Some(vm) = self.vm.as_mut() { 2104 Self::send_migration( 2105 vm, 2106 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2107 self.hypervisor.clone(), 2108 send_data_migration, 2109 ) 2110 .map_err(|migration_err| { 2111 error!("Migration failed: {:?}", migration_err); 2112 2113 // Stop logging dirty pages 2114 if let Err(e) = vm.stop_dirty_log() { 2115 return e; 2116 } 2117 2118 if vm.get_state().unwrap() == VmState::Paused { 2119 if let Err(e) = vm.resume() { 2120 return e; 2121 } 2122 } 2123 2124 migration_err 2125 })?; 2126 2127 // Shutdown the VM after the migration succeeded 2128 self.exit_evt.write(1).map_err(|e| { 2129 MigratableError::MigrateSend(anyhow!( 2130 "Failed shutting down the VM after migration: {:?}", 2131 e 2132 )) 2133 }) 2134 } else { 2135 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2136 } 2137 } 2138 } 2139 2140 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2141 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2142 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2143 2144 #[cfg(test)] 2145 mod unit_tests { 2146 use config::{ 2147 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2148 RngConfig, 2149 }; 2150 2151 use super::*; 2152 #[cfg(target_arch = "x86_64")] 2153 use crate::config::DebugConsoleConfig; 2154 2155 fn create_dummy_vmm() -> Vmm { 2156 Vmm::new( 2157 VmmVersionInfo::new("dummy", "dummy"), 2158 EventFd::new(EFD_NONBLOCK).unwrap(), 2159 #[cfg(feature = "guest_debug")] 2160 EventFd::new(EFD_NONBLOCK).unwrap(), 2161 #[cfg(feature = "guest_debug")] 2162 EventFd::new(EFD_NONBLOCK).unwrap(), 2163 SeccompAction::Allow, 2164 hypervisor::new().unwrap(), 2165 EventFd::new(EFD_NONBLOCK).unwrap(), 2166 ) 2167 .unwrap() 2168 } 2169 2170 fn create_dummy_vm_config() -> Box<VmConfig> { 2171 Box::new(VmConfig { 2172 cpus: CpusConfig { 2173 boot_vcpus: 1, 2174 max_vcpus: 1, 2175 topology: None, 2176 kvm_hyperv: false, 2177 max_phys_bits: 46, 2178 affinity: None, 2179 features: config::CpuFeatures::default(), 2180 }, 2181 memory: MemoryConfig { 2182 size: 536_870_912, 2183 mergeable: false, 2184 hotplug_method: HotplugMethod::Acpi, 2185 hotplug_size: None, 2186 hotplugged_size: None, 2187 shared: true, 2188 hugepages: false, 2189 hugepage_size: None, 2190 prefault: false, 2191 zones: None, 2192 thp: true, 2193 }, 2194 payload: Some(PayloadConfig { 2195 kernel: Some(PathBuf::from("/path/to/kernel")), 2196 firmware: None, 2197 cmdline: None, 2198 initramfs: None, 2199 #[cfg(feature = "igvm")] 2200 igvm: None, 2201 #[cfg(feature = "sev_snp")] 2202 host_data: None, 2203 }), 2204 rate_limit_groups: None, 2205 disks: None, 2206 net: None, 2207 rng: RngConfig { 2208 src: PathBuf::from("/dev/urandom"), 2209 iommu: false, 2210 }, 2211 balloon: None, 2212 fs: None, 2213 pmem: None, 2214 serial: ConsoleConfig { 2215 file: None, 2216 mode: ConsoleOutputMode::Null, 2217 iommu: false, 2218 socket: None, 2219 }, 2220 console: ConsoleConfig { 2221 file: None, 2222 mode: ConsoleOutputMode::Tty, 2223 iommu: false, 2224 socket: None, 2225 }, 2226 #[cfg(target_arch = "x86_64")] 2227 debug_console: DebugConsoleConfig::default(), 2228 devices: None, 2229 user_devices: None, 2230 vdpa: None, 2231 vsock: None, 2232 #[cfg(feature = "pvmemcontrol")] 2233 pvmemcontrol: None, 2234 pvpanic: false, 2235 iommu: false, 2236 #[cfg(target_arch = "x86_64")] 2237 sgx_epc: None, 2238 numa: None, 2239 watchdog: false, 2240 #[cfg(feature = "guest_debug")] 2241 gdb: false, 2242 pci_segments: None, 2243 platform: None, 2244 tpm: None, 2245 preserved_fds: None, 2246 landlock_enable: false, 2247 landlock_rules: None, 2248 }) 2249 } 2250 2251 #[test] 2252 fn test_vmm_vm_create() { 2253 let mut vmm = create_dummy_vmm(); 2254 let config = create_dummy_vm_config(); 2255 2256 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2257 assert!(matches!( 2258 vmm.vm_create(config), 2259 Err(VmError::VmAlreadyCreated) 2260 )); 2261 } 2262 2263 #[test] 2264 fn test_vmm_vm_cold_add_device() { 2265 let mut vmm = create_dummy_vmm(); 2266 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2267 2268 assert!(matches!( 2269 vmm.vm_add_device(device_config.clone()), 2270 Err(VmError::VmNotCreated) 2271 )); 2272 2273 let _ = vmm.vm_create(create_dummy_vm_config()); 2274 assert!(vmm 2275 .vm_config 2276 .as_ref() 2277 .unwrap() 2278 .lock() 2279 .unwrap() 2280 .devices 2281 .is_none()); 2282 2283 let result = vmm.vm_add_device(device_config.clone()); 2284 assert!(result.is_ok()); 2285 assert!(result.unwrap().is_none()); 2286 assert_eq!( 2287 vmm.vm_config 2288 .as_ref() 2289 .unwrap() 2290 .lock() 2291 .unwrap() 2292 .devices 2293 .clone() 2294 .unwrap() 2295 .len(), 2296 1 2297 ); 2298 assert_eq!( 2299 vmm.vm_config 2300 .as_ref() 2301 .unwrap() 2302 .lock() 2303 .unwrap() 2304 .devices 2305 .clone() 2306 .unwrap()[0], 2307 device_config 2308 ); 2309 } 2310 2311 #[test] 2312 fn test_vmm_vm_cold_add_user_device() { 2313 let mut vmm = create_dummy_vmm(); 2314 let user_device_config = 2315 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2316 2317 assert!(matches!( 2318 vmm.vm_add_user_device(user_device_config.clone()), 2319 Err(VmError::VmNotCreated) 2320 )); 2321 2322 let _ = vmm.vm_create(create_dummy_vm_config()); 2323 assert!(vmm 2324 .vm_config 2325 .as_ref() 2326 .unwrap() 2327 .lock() 2328 .unwrap() 2329 .user_devices 2330 .is_none()); 2331 2332 let result = vmm.vm_add_user_device(user_device_config.clone()); 2333 assert!(result.is_ok()); 2334 assert!(result.unwrap().is_none()); 2335 assert_eq!( 2336 vmm.vm_config 2337 .as_ref() 2338 .unwrap() 2339 .lock() 2340 .unwrap() 2341 .user_devices 2342 .clone() 2343 .unwrap() 2344 .len(), 2345 1 2346 ); 2347 assert_eq!( 2348 vmm.vm_config 2349 .as_ref() 2350 .unwrap() 2351 .lock() 2352 .unwrap() 2353 .user_devices 2354 .clone() 2355 .unwrap()[0], 2356 user_device_config 2357 ); 2358 } 2359 2360 #[test] 2361 fn test_vmm_vm_cold_add_disk() { 2362 let mut vmm = create_dummy_vmm(); 2363 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2364 2365 assert!(matches!( 2366 vmm.vm_add_disk(disk_config.clone()), 2367 Err(VmError::VmNotCreated) 2368 )); 2369 2370 let _ = vmm.vm_create(create_dummy_vm_config()); 2371 assert!(vmm 2372 .vm_config 2373 .as_ref() 2374 .unwrap() 2375 .lock() 2376 .unwrap() 2377 .disks 2378 .is_none()); 2379 2380 let result = vmm.vm_add_disk(disk_config.clone()); 2381 assert!(result.is_ok()); 2382 assert!(result.unwrap().is_none()); 2383 assert_eq!( 2384 vmm.vm_config 2385 .as_ref() 2386 .unwrap() 2387 .lock() 2388 .unwrap() 2389 .disks 2390 .clone() 2391 .unwrap() 2392 .len(), 2393 1 2394 ); 2395 assert_eq!( 2396 vmm.vm_config 2397 .as_ref() 2398 .unwrap() 2399 .lock() 2400 .unwrap() 2401 .disks 2402 .clone() 2403 .unwrap()[0], 2404 disk_config 2405 ); 2406 } 2407 2408 #[test] 2409 fn test_vmm_vm_cold_add_fs() { 2410 let mut vmm = create_dummy_vmm(); 2411 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2412 2413 assert!(matches!( 2414 vmm.vm_add_fs(fs_config.clone()), 2415 Err(VmError::VmNotCreated) 2416 )); 2417 2418 let _ = vmm.vm_create(create_dummy_vm_config()); 2419 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2420 2421 let result = vmm.vm_add_fs(fs_config.clone()); 2422 assert!(result.is_ok()); 2423 assert!(result.unwrap().is_none()); 2424 assert_eq!( 2425 vmm.vm_config 2426 .as_ref() 2427 .unwrap() 2428 .lock() 2429 .unwrap() 2430 .fs 2431 .clone() 2432 .unwrap() 2433 .len(), 2434 1 2435 ); 2436 assert_eq!( 2437 vmm.vm_config 2438 .as_ref() 2439 .unwrap() 2440 .lock() 2441 .unwrap() 2442 .fs 2443 .clone() 2444 .unwrap()[0], 2445 fs_config 2446 ); 2447 } 2448 2449 #[test] 2450 fn test_vmm_vm_cold_add_pmem() { 2451 let mut vmm = create_dummy_vmm(); 2452 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2453 2454 assert!(matches!( 2455 vmm.vm_add_pmem(pmem_config.clone()), 2456 Err(VmError::VmNotCreated) 2457 )); 2458 2459 let _ = vmm.vm_create(create_dummy_vm_config()); 2460 assert!(vmm 2461 .vm_config 2462 .as_ref() 2463 .unwrap() 2464 .lock() 2465 .unwrap() 2466 .pmem 2467 .is_none()); 2468 2469 let result = vmm.vm_add_pmem(pmem_config.clone()); 2470 assert!(result.is_ok()); 2471 assert!(result.unwrap().is_none()); 2472 assert_eq!( 2473 vmm.vm_config 2474 .as_ref() 2475 .unwrap() 2476 .lock() 2477 .unwrap() 2478 .pmem 2479 .clone() 2480 .unwrap() 2481 .len(), 2482 1 2483 ); 2484 assert_eq!( 2485 vmm.vm_config 2486 .as_ref() 2487 .unwrap() 2488 .lock() 2489 .unwrap() 2490 .pmem 2491 .clone() 2492 .unwrap()[0], 2493 pmem_config 2494 ); 2495 } 2496 2497 #[test] 2498 fn test_vmm_vm_cold_add_net() { 2499 let mut vmm = create_dummy_vmm(); 2500 let net_config = NetConfig::parse( 2501 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2502 ) 2503 .unwrap(); 2504 2505 assert!(matches!( 2506 vmm.vm_add_net(net_config.clone()), 2507 Err(VmError::VmNotCreated) 2508 )); 2509 2510 let _ = vmm.vm_create(create_dummy_vm_config()); 2511 assert!(vmm 2512 .vm_config 2513 .as_ref() 2514 .unwrap() 2515 .lock() 2516 .unwrap() 2517 .net 2518 .is_none()); 2519 2520 let result = vmm.vm_add_net(net_config.clone()); 2521 assert!(result.is_ok()); 2522 assert!(result.unwrap().is_none()); 2523 assert_eq!( 2524 vmm.vm_config 2525 .as_ref() 2526 .unwrap() 2527 .lock() 2528 .unwrap() 2529 .net 2530 .clone() 2531 .unwrap() 2532 .len(), 2533 1 2534 ); 2535 assert_eq!( 2536 vmm.vm_config 2537 .as_ref() 2538 .unwrap() 2539 .lock() 2540 .unwrap() 2541 .net 2542 .clone() 2543 .unwrap()[0], 2544 net_config 2545 ); 2546 } 2547 2548 #[test] 2549 fn test_vmm_vm_cold_add_vdpa() { 2550 let mut vmm = create_dummy_vmm(); 2551 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2552 2553 assert!(matches!( 2554 vmm.vm_add_vdpa(vdpa_config.clone()), 2555 Err(VmError::VmNotCreated) 2556 )); 2557 2558 let _ = vmm.vm_create(create_dummy_vm_config()); 2559 assert!(vmm 2560 .vm_config 2561 .as_ref() 2562 .unwrap() 2563 .lock() 2564 .unwrap() 2565 .vdpa 2566 .is_none()); 2567 2568 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2569 assert!(result.is_ok()); 2570 assert!(result.unwrap().is_none()); 2571 assert_eq!( 2572 vmm.vm_config 2573 .as_ref() 2574 .unwrap() 2575 .lock() 2576 .unwrap() 2577 .vdpa 2578 .clone() 2579 .unwrap() 2580 .len(), 2581 1 2582 ); 2583 assert_eq!( 2584 vmm.vm_config 2585 .as_ref() 2586 .unwrap() 2587 .lock() 2588 .unwrap() 2589 .vdpa 2590 .clone() 2591 .unwrap()[0], 2592 vdpa_config 2593 ); 2594 } 2595 2596 #[test] 2597 fn test_vmm_vm_cold_add_vsock() { 2598 let mut vmm = create_dummy_vmm(); 2599 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2600 2601 assert!(matches!( 2602 vmm.vm_add_vsock(vsock_config.clone()), 2603 Err(VmError::VmNotCreated) 2604 )); 2605 2606 let _ = vmm.vm_create(create_dummy_vm_config()); 2607 assert!(vmm 2608 .vm_config 2609 .as_ref() 2610 .unwrap() 2611 .lock() 2612 .unwrap() 2613 .vsock 2614 .is_none()); 2615 2616 let result = vmm.vm_add_vsock(vsock_config.clone()); 2617 assert!(result.is_ok()); 2618 assert!(result.unwrap().is_none()); 2619 assert_eq!( 2620 vmm.vm_config 2621 .as_ref() 2622 .unwrap() 2623 .lock() 2624 .unwrap() 2625 .vsock 2626 .clone() 2627 .unwrap(), 2628 vsock_config 2629 ); 2630 } 2631 } 2632