1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use crate::api::{ 12 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 13 VmSendMigrationData, VmmPingResponse, 14 }; 15 use crate::config::{ 16 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 17 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 18 }; 19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 20 use crate::coredump::GuestDebuggable; 21 use crate::memory_manager::MemoryManager; 22 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 23 use crate::migration::get_vm_snapshot; 24 use crate::migration::{recv_vm_config, recv_vm_state}; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 use crate::vm::{Error as VmError, Vm, VmState}; 27 use anyhow::anyhow; 28 #[cfg(feature = "dbus_api")] 29 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 30 use api::http::HttpApiHandle; 31 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 32 use memory_manager::MemoryManagerSnapshotData; 33 use pci::PciBdf; 34 use seccompiler::{apply_filter, SeccompAction}; 35 use serde::ser::{SerializeStruct, Serializer}; 36 use serde::{Deserialize, Serialize}; 37 use signal_hook::iterator::{Handle, Signals}; 38 use std::collections::HashMap; 39 use std::fs::File; 40 use std::io; 41 use std::io::{stdout, Read, Write}; 42 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 43 use std::os::unix::net::UnixListener; 44 use std::os::unix::net::UnixStream; 45 use std::panic::AssertUnwindSafe; 46 use std::path::PathBuf; 47 use std::rc::Rc; 48 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 49 use std::sync::{Arc, Mutex}; 50 use std::time::Instant; 51 use std::{result, thread}; 52 use thiserror::Error; 53 use tracer::trace_scoped; 54 use vm_memory::bitmap::AtomicBitmap; 55 use vm_memory::{ReadVolatile, WriteVolatile}; 56 use vm_migration::{protocol::*, Migratable}; 57 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 58 use vmm_sys_util::eventfd::EventFd; 59 use vmm_sys_util::signal::unblock_signal; 60 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 61 62 mod acpi; 63 pub mod api; 64 mod clone3; 65 pub mod config; 66 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 67 mod coredump; 68 pub mod cpu; 69 pub mod device_manager; 70 pub mod device_tree; 71 #[cfg(feature = "guest_debug")] 72 mod gdb; 73 #[cfg(feature = "igvm")] 74 mod igvm; 75 pub mod interrupt; 76 pub mod memory_manager; 77 pub mod migration; 78 mod pci_segment; 79 pub mod seccomp_filters; 80 mod serial_manager; 81 mod sigwinch_listener; 82 pub mod vm; 83 pub mod vm_config; 84 85 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 86 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 87 88 /// Errors associated with VMM management 89 #[derive(Debug, Error)] 90 pub enum Error { 91 /// API request receive error 92 #[error("Error receiving API request: {0}")] 93 ApiRequestRecv(#[source] RecvError), 94 95 /// API response send error 96 #[error("Error sending API request: {0}")] 97 ApiResponseSend(#[source] SendError<ApiResponse>), 98 99 /// Cannot bind to the UNIX domain socket path 100 #[error("Error binding to UNIX domain socket: {0}")] 101 Bind(#[source] io::Error), 102 103 /// Cannot clone EventFd. 104 #[error("Error cloning EventFd: {0}")] 105 EventFdClone(#[source] io::Error), 106 107 /// Cannot create EventFd. 108 #[error("Error creating EventFd: {0}")] 109 EventFdCreate(#[source] io::Error), 110 111 /// Cannot read from EventFd. 112 #[error("Error reading from EventFd: {0}")] 113 EventFdRead(#[source] io::Error), 114 115 /// Cannot create epoll context. 116 #[error("Error creating epoll context: {0}")] 117 Epoll(#[source] io::Error), 118 119 /// Cannot create HTTP thread 120 #[error("Error spawning HTTP thread: {0}")] 121 HttpThreadSpawn(#[source] io::Error), 122 123 /// Cannot create D-Bus thread 124 #[cfg(feature = "dbus_api")] 125 #[error("Error spawning D-Bus thread: {0}")] 126 DBusThreadSpawn(#[source] io::Error), 127 128 /// Cannot start D-Bus session 129 #[cfg(feature = "dbus_api")] 130 #[error("Error starting D-Bus session: {0}")] 131 CreateDBusSession(#[source] zbus::Error), 132 133 /// Cannot create `event-monitor` thread 134 #[error("Error spawning `event-monitor` thread: {0}")] 135 EventMonitorThreadSpawn(#[source] io::Error), 136 137 /// Cannot handle the VM STDIN stream 138 #[error("Error handling VM stdin: {0:?}")] 139 Stdin(VmError), 140 141 /// Cannot handle the VM pty stream 142 #[error("Error handling VM pty: {0:?}")] 143 Pty(VmError), 144 145 /// Cannot reboot the VM 146 #[error("Error rebooting VM: {0:?}")] 147 VmReboot(VmError), 148 149 /// Cannot create VMM thread 150 #[error("Error spawning VMM thread {0:?}")] 151 VmmThreadSpawn(#[source] io::Error), 152 153 /// Cannot shut the VMM down 154 #[error("Error shutting down VMM: {0:?}")] 155 VmmShutdown(VmError), 156 157 /// Cannot create seccomp filter 158 #[error("Error creating seccomp filter: {0}")] 159 CreateSeccompFilter(seccompiler::Error), 160 161 /// Cannot apply seccomp filter 162 #[error("Error applying seccomp filter: {0}")] 163 ApplySeccompFilter(seccompiler::Error), 164 165 /// Error activating virtio devices 166 #[error("Error activating virtio devices: {0:?}")] 167 ActivateVirtioDevices(VmError), 168 169 /// Error creating API server 170 #[error("Error creating API server {0:?}")] 171 CreateApiServer(micro_http::ServerError), 172 173 /// Error binding API server socket 174 #[error("Error creation API server's socket {0:?}")] 175 CreateApiServerSocket(#[source] io::Error), 176 177 #[cfg(feature = "guest_debug")] 178 #[error("Failed to start the GDB thread: {0}")] 179 GdbThreadSpawn(io::Error), 180 181 /// GDB request receive error 182 #[cfg(feature = "guest_debug")] 183 #[error("Error receiving GDB request: {0}")] 184 GdbRequestRecv(#[source] RecvError), 185 186 /// GDB response send error 187 #[cfg(feature = "guest_debug")] 188 #[error("Error sending GDB request: {0}")] 189 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 190 191 #[error("Cannot spawn a signal handler thread: {0}")] 192 SignalHandlerSpawn(#[source] io::Error), 193 194 #[error("Failed to join on threads: {0:?}")] 195 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 196 } 197 pub type Result<T> = result::Result<T, Error>; 198 199 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 200 #[repr(u64)] 201 pub enum EpollDispatch { 202 Exit = 0, 203 Reset = 1, 204 Api = 2, 205 ActivateVirtioDevices = 3, 206 Debug = 4, 207 Unknown, 208 } 209 210 impl From<u64> for EpollDispatch { 211 fn from(v: u64) -> Self { 212 use EpollDispatch::*; 213 match v { 214 0 => Exit, 215 1 => Reset, 216 2 => Api, 217 3 => ActivateVirtioDevices, 218 4 => Debug, 219 _ => Unknown, 220 } 221 } 222 } 223 224 pub struct EpollContext { 225 epoll_file: File, 226 } 227 228 impl EpollContext { 229 pub fn new() -> result::Result<EpollContext, io::Error> { 230 let epoll_fd = epoll::create(true)?; 231 // Use 'File' to enforce closing on 'epoll_fd' 232 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 233 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 234 235 Ok(EpollContext { epoll_file }) 236 } 237 238 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 239 where 240 T: AsRawFd, 241 { 242 let dispatch_index = token as u64; 243 epoll::ctl( 244 self.epoll_file.as_raw_fd(), 245 epoll::ControlOptions::EPOLL_CTL_ADD, 246 fd.as_raw_fd(), 247 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 248 )?; 249 250 Ok(()) 251 } 252 253 #[cfg(fuzzing)] 254 pub fn add_event_custom<T>( 255 &mut self, 256 fd: &T, 257 id: u64, 258 evts: epoll::Events, 259 ) -> result::Result<(), io::Error> 260 where 261 T: AsRawFd, 262 { 263 epoll::ctl( 264 self.epoll_file.as_raw_fd(), 265 epoll::ControlOptions::EPOLL_CTL_ADD, 266 fd.as_raw_fd(), 267 epoll::Event::new(evts, id), 268 )?; 269 270 Ok(()) 271 } 272 } 273 274 impl AsRawFd for EpollContext { 275 fn as_raw_fd(&self) -> RawFd { 276 self.epoll_file.as_raw_fd() 277 } 278 } 279 280 pub struct PciDeviceInfo { 281 pub id: String, 282 pub bdf: PciBdf, 283 } 284 285 impl Serialize for PciDeviceInfo { 286 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 287 where 288 S: Serializer, 289 { 290 let bdf_str = self.bdf.to_string(); 291 292 // Serialize the structure. 293 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 294 state.serialize_field("id", &self.id)?; 295 state.serialize_field("bdf", &bdf_str)?; 296 state.end() 297 } 298 } 299 300 pub fn feature_list() -> Vec<String> { 301 vec![ 302 #[cfg(feature = "dbus_api")] 303 "dbus_api".to_string(), 304 #[cfg(feature = "dhat-heap")] 305 "dhat-heap".to_string(), 306 #[cfg(feature = "guest_debug")] 307 "guest_debug".to_string(), 308 #[cfg(feature = "igvm")] 309 "igvm".to_string(), 310 #[cfg(feature = "io_uring")] 311 "io_uring".to_string(), 312 #[cfg(feature = "kvm")] 313 "kvm".to_string(), 314 #[cfg(feature = "mshv")] 315 "mshv".to_string(), 316 #[cfg(feature = "sev_snp")] 317 "sev_snp".to_string(), 318 #[cfg(feature = "tdx")] 319 "tdx".to_string(), 320 #[cfg(feature = "tracing")] 321 "tracing".to_string(), 322 ] 323 } 324 325 pub fn start_event_monitor_thread( 326 mut monitor: event_monitor::Monitor, 327 seccomp_action: &SeccompAction, 328 hypervisor_type: hypervisor::HypervisorType, 329 exit_event: EventFd, 330 ) -> Result<thread::JoinHandle<Result<()>>> { 331 // Retrieve seccomp filter 332 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 333 .map_err(Error::CreateSeccompFilter)?; 334 335 thread::Builder::new() 336 .name("event-monitor".to_owned()) 337 .spawn(move || { 338 // Apply seccomp filter 339 if !seccomp_filter.is_empty() { 340 apply_filter(&seccomp_filter) 341 .map_err(Error::ApplySeccompFilter) 342 .map_err(|e| { 343 error!("Error applying seccomp filter: {:?}", e); 344 exit_event.write(1).ok(); 345 e 346 })?; 347 } 348 349 std::panic::catch_unwind(AssertUnwindSafe(move || { 350 while let Ok(event) = monitor.rx.recv() { 351 let event = Arc::new(event); 352 353 if let Some(ref mut file) = monitor.file { 354 file.write_all(event.as_bytes().as_ref()).ok(); 355 file.write_all(b"\n\n").ok(); 356 } 357 358 for tx in monitor.broadcast.iter() { 359 tx.send(event.clone()).ok(); 360 } 361 } 362 })) 363 .map_err(|_| { 364 error!("`event-monitor` thread panicked"); 365 exit_event.write(1).ok(); 366 }) 367 .ok(); 368 369 Ok(()) 370 }) 371 .map_err(Error::EventMonitorThreadSpawn) 372 } 373 374 #[allow(unused_variables)] 375 #[allow(clippy::too_many_arguments)] 376 pub fn start_vmm_thread( 377 vmm_version: VmmVersionInfo, 378 http_path: &Option<String>, 379 http_fd: Option<RawFd>, 380 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 381 api_event: EventFd, 382 api_sender: Sender<ApiRequest>, 383 api_receiver: Receiver<ApiRequest>, 384 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 385 #[cfg(feature = "guest_debug")] debug_event: EventFd, 386 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 387 exit_event: EventFd, 388 seccomp_action: &SeccompAction, 389 hypervisor: Arc<dyn hypervisor::Hypervisor>, 390 ) -> Result<VmmThreadHandle> { 391 #[cfg(feature = "guest_debug")] 392 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 393 #[cfg(feature = "guest_debug")] 394 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 395 #[cfg(feature = "guest_debug")] 396 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 397 #[cfg(feature = "guest_debug")] 398 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 399 400 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 401 let hypervisor_type = hypervisor.hypervisor_type(); 402 403 // Retrieve seccomp filter 404 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 405 .map_err(Error::CreateSeccompFilter)?; 406 407 let vmm_seccomp_action = seccomp_action.clone(); 408 let thread = { 409 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 410 thread::Builder::new() 411 .name("vmm".to_string()) 412 .spawn(move || { 413 // Apply seccomp filter for VMM thread. 414 if !vmm_seccomp_filter.is_empty() { 415 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 416 } 417 418 let mut vmm = Vmm::new( 419 vmm_version, 420 api_event, 421 #[cfg(feature = "guest_debug")] 422 debug_event, 423 #[cfg(feature = "guest_debug")] 424 vm_debug_event, 425 vmm_seccomp_action, 426 hypervisor, 427 exit_event, 428 )?; 429 430 vmm.setup_signal_handler()?; 431 432 vmm.control_loop( 433 Rc::new(api_receiver), 434 #[cfg(feature = "guest_debug")] 435 Rc::new(gdb_receiver), 436 ) 437 }) 438 .map_err(Error::VmmThreadSpawn)? 439 }; 440 441 // The VMM thread is started, we can start the dbus thread 442 // and start serving HTTP requests 443 #[cfg(feature = "dbus_api")] 444 let dbus_shutdown_chs = match dbus_options { 445 Some(opts) => { 446 let (_, chs) = api::start_dbus_thread( 447 opts, 448 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 449 api_sender.clone(), 450 seccomp_action, 451 exit_event.try_clone().map_err(Error::EventFdClone)?, 452 hypervisor_type, 453 )?; 454 Some(chs) 455 } 456 None => None, 457 }; 458 459 let http_api_handle = if let Some(http_path) = http_path { 460 Some(api::start_http_path_thread( 461 http_path, 462 api_event_clone, 463 api_sender, 464 seccomp_action, 465 exit_event, 466 hypervisor_type, 467 )?) 468 } else if let Some(http_fd) = http_fd { 469 Some(api::start_http_fd_thread( 470 http_fd, 471 api_event_clone, 472 api_sender, 473 seccomp_action, 474 exit_event, 475 hypervisor_type, 476 )?) 477 } else { 478 None 479 }; 480 481 #[cfg(feature = "guest_debug")] 482 if let Some(debug_path) = debug_path { 483 let target = gdb::GdbStub::new( 484 gdb_sender, 485 gdb_debug_event, 486 gdb_vm_debug_event, 487 gdb_hw_breakpoints, 488 ); 489 thread::Builder::new() 490 .name("gdb".to_owned()) 491 .spawn(move || gdb::gdb_thread(target, &debug_path)) 492 .map_err(Error::GdbThreadSpawn)?; 493 } 494 495 Ok(VmmThreadHandle { 496 thread_handle: thread, 497 #[cfg(feature = "dbus_api")] 498 dbus_shutdown_chs, 499 http_api_handle, 500 }) 501 } 502 503 #[derive(Clone, Deserialize, Serialize)] 504 struct VmMigrationConfig { 505 vm_config: Arc<Mutex<VmConfig>>, 506 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 507 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 508 memory_manager_data: MemoryManagerSnapshotData, 509 } 510 511 #[derive(Debug, Clone)] 512 pub struct VmmVersionInfo { 513 pub build_version: String, 514 pub version: String, 515 } 516 517 impl VmmVersionInfo { 518 pub fn new(build_version: &str, version: &str) -> Self { 519 Self { 520 build_version: build_version.to_owned(), 521 version: version.to_owned(), 522 } 523 } 524 } 525 526 pub struct VmmThreadHandle { 527 pub thread_handle: thread::JoinHandle<Result<()>>, 528 #[cfg(feature = "dbus_api")] 529 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 530 pub http_api_handle: Option<HttpApiHandle>, 531 } 532 533 pub struct Vmm { 534 epoll: EpollContext, 535 exit_evt: EventFd, 536 reset_evt: EventFd, 537 api_evt: EventFd, 538 #[cfg(feature = "guest_debug")] 539 debug_evt: EventFd, 540 #[cfg(feature = "guest_debug")] 541 vm_debug_evt: EventFd, 542 version: VmmVersionInfo, 543 vm: Option<Vm>, 544 vm_config: Option<Arc<Mutex<VmConfig>>>, 545 seccomp_action: SeccompAction, 546 hypervisor: Arc<dyn hypervisor::Hypervisor>, 547 activate_evt: EventFd, 548 signals: Option<Handle>, 549 threads: Vec<thread::JoinHandle<()>>, 550 original_termios_opt: Arc<Mutex<Option<termios>>>, 551 } 552 553 impl Vmm { 554 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 555 556 fn signal_handler( 557 mut signals: Signals, 558 original_termios_opt: Arc<Mutex<Option<termios>>>, 559 exit_evt: &EventFd, 560 ) { 561 for sig in &Self::HANDLED_SIGNALS { 562 unblock_signal(*sig).unwrap(); 563 } 564 565 for signal in signals.forever() { 566 match signal { 567 SIGTERM | SIGINT => { 568 if exit_evt.write(1).is_err() { 569 // Resetting the terminal is usually done as the VMM exits 570 if let Ok(lock) = original_termios_opt.lock() { 571 if let Some(termios) = *lock { 572 // SAFETY: FFI call 573 let _ = unsafe { 574 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 575 }; 576 } 577 } else { 578 warn!("Failed to lock original termios"); 579 } 580 581 std::process::exit(1); 582 } 583 } 584 _ => (), 585 } 586 } 587 } 588 589 fn setup_signal_handler(&mut self) -> Result<()> { 590 let signals = Signals::new(Self::HANDLED_SIGNALS); 591 match signals { 592 Ok(signals) => { 593 self.signals = Some(signals.handle()); 594 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 595 let original_termios_opt = Arc::clone(&self.original_termios_opt); 596 597 let signal_handler_seccomp_filter = get_seccomp_filter( 598 &self.seccomp_action, 599 Thread::SignalHandler, 600 self.hypervisor.hypervisor_type(), 601 ) 602 .map_err(Error::CreateSeccompFilter)?; 603 self.threads.push( 604 thread::Builder::new() 605 .name("vmm_signal_handler".to_string()) 606 .spawn(move || { 607 if !signal_handler_seccomp_filter.is_empty() { 608 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 609 .map_err(Error::ApplySeccompFilter) 610 { 611 error!("Error applying seccomp filter: {:?}", e); 612 exit_evt.write(1).ok(); 613 return; 614 } 615 } 616 std::panic::catch_unwind(AssertUnwindSafe(|| { 617 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 618 })) 619 .map_err(|_| { 620 error!("vmm signal_handler thread panicked"); 621 exit_evt.write(1).ok() 622 }) 623 .ok(); 624 }) 625 .map_err(Error::SignalHandlerSpawn)?, 626 ); 627 } 628 Err(e) => error!("Signal not found {}", e), 629 } 630 Ok(()) 631 } 632 633 fn new( 634 vmm_version: VmmVersionInfo, 635 api_evt: EventFd, 636 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 637 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 638 seccomp_action: SeccompAction, 639 hypervisor: Arc<dyn hypervisor::Hypervisor>, 640 exit_evt: EventFd, 641 ) -> Result<Self> { 642 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 643 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 644 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 645 646 epoll 647 .add_event(&exit_evt, EpollDispatch::Exit) 648 .map_err(Error::Epoll)?; 649 650 epoll 651 .add_event(&reset_evt, EpollDispatch::Reset) 652 .map_err(Error::Epoll)?; 653 654 epoll 655 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 656 .map_err(Error::Epoll)?; 657 658 epoll 659 .add_event(&api_evt, EpollDispatch::Api) 660 .map_err(Error::Epoll)?; 661 662 #[cfg(feature = "guest_debug")] 663 epoll 664 .add_event(&debug_evt, EpollDispatch::Debug) 665 .map_err(Error::Epoll)?; 666 667 Ok(Vmm { 668 epoll, 669 exit_evt, 670 reset_evt, 671 api_evt, 672 #[cfg(feature = "guest_debug")] 673 debug_evt, 674 #[cfg(feature = "guest_debug")] 675 vm_debug_evt, 676 version: vmm_version, 677 vm: None, 678 vm_config: None, 679 seccomp_action, 680 hypervisor, 681 activate_evt, 682 signals: None, 683 threads: vec![], 684 original_termios_opt: Arc::new(Mutex::new(None)), 685 }) 686 } 687 688 fn vm_receive_config<T>( 689 &mut self, 690 req: &Request, 691 socket: &mut T, 692 existing_memory_files: Option<HashMap<u32, File>>, 693 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 694 where 695 T: Read + Write, 696 { 697 // Read in config data along with memory manager data 698 let mut data: Vec<u8> = Vec::new(); 699 data.resize_with(req.length() as usize, Default::default); 700 socket 701 .read_exact(&mut data) 702 .map_err(MigratableError::MigrateSocket)?; 703 704 let vm_migration_config: VmMigrationConfig = 705 serde_json::from_slice(&data).map_err(|e| { 706 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 707 })?; 708 709 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 710 self.vm_check_cpuid_compatibility( 711 &vm_migration_config.vm_config, 712 &vm_migration_config.common_cpuid, 713 )?; 714 715 let config = vm_migration_config.vm_config.clone(); 716 self.vm_config = Some(vm_migration_config.vm_config); 717 718 let vm = Vm::create_hypervisor_vm( 719 &self.hypervisor, 720 #[cfg(feature = "tdx")] 721 false, 722 #[cfg(feature = "sev_snp")] 723 false, 724 ) 725 .map_err(|e| { 726 MigratableError::MigrateReceive(anyhow!( 727 "Error creating hypervisor VM from snapshot: {:?}", 728 e 729 )) 730 })?; 731 732 let phys_bits = 733 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 734 735 let memory_manager = MemoryManager::new( 736 vm, 737 &config.lock().unwrap().memory.clone(), 738 None, 739 phys_bits, 740 #[cfg(feature = "tdx")] 741 false, 742 Some(&vm_migration_config.memory_manager_data), 743 existing_memory_files, 744 #[cfg(target_arch = "x86_64")] 745 None, 746 ) 747 .map_err(|e| { 748 MigratableError::MigrateReceive(anyhow!( 749 "Error creating MemoryManager from snapshot: {:?}", 750 e 751 )) 752 })?; 753 754 Response::ok().write_to(socket)?; 755 756 Ok(memory_manager) 757 } 758 759 fn vm_receive_state<T>( 760 &mut self, 761 req: &Request, 762 socket: &mut T, 763 mm: Arc<Mutex<MemoryManager>>, 764 ) -> std::result::Result<(), MigratableError> 765 where 766 T: Read + Write, 767 { 768 // Read in state data 769 let mut data: Vec<u8> = Vec::new(); 770 data.resize_with(req.length() as usize, Default::default); 771 socket 772 .read_exact(&mut data) 773 .map_err(MigratableError::MigrateSocket)?; 774 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 775 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 776 })?; 777 778 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 779 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 780 })?; 781 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 782 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 783 })?; 784 #[cfg(feature = "guest_debug")] 785 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 786 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 787 })?; 788 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 789 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 790 })?; 791 792 let timestamp = Instant::now(); 793 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 794 let mut vm = Vm::new_from_memory_manager( 795 self.vm_config.clone().unwrap(), 796 mm, 797 hypervisor_vm, 798 exit_evt, 799 reset_evt, 800 #[cfg(feature = "guest_debug")] 801 debug_evt, 802 &self.seccomp_action, 803 self.hypervisor.clone(), 804 activate_evt, 805 timestamp, 806 None, 807 None, 808 None, 809 None, 810 Arc::clone(&self.original_termios_opt), 811 Some(snapshot), 812 ) 813 .map_err(|e| { 814 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 815 })?; 816 817 // Create VM 818 vm.restore().map_err(|e| { 819 Response::error().write_to(socket).ok(); 820 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 821 })?; 822 self.vm = Some(vm); 823 824 Response::ok().write_to(socket)?; 825 826 Ok(()) 827 } 828 829 fn vm_receive_memory<T>( 830 &mut self, 831 req: &Request, 832 socket: &mut T, 833 memory_manager: &mut MemoryManager, 834 ) -> std::result::Result<(), MigratableError> 835 where 836 T: Read + ReadVolatile + Write, 837 { 838 // Read table 839 let table = MemoryRangeTable::read_from(socket, req.length())?; 840 841 // And then read the memory itself 842 memory_manager 843 .receive_memory_regions(&table, socket) 844 .map_err(|e| { 845 Response::error().write_to(socket).ok(); 846 e 847 })?; 848 Response::ok().write_to(socket)?; 849 Ok(()) 850 } 851 852 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 853 url.strip_prefix("unix:") 854 .ok_or_else(|| { 855 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 856 }) 857 .map(|s| s.into()) 858 } 859 860 // Returns true if there were dirty pages to send 861 fn vm_maybe_send_dirty_pages<T>( 862 vm: &mut Vm, 863 socket: &mut T, 864 ) -> result::Result<bool, MigratableError> 865 where 866 T: Read + Write + WriteVolatile, 867 { 868 // Send (dirty) memory table 869 let table = vm.dirty_log()?; 870 871 // But if there are no regions go straight to pause 872 if table.regions().is_empty() { 873 return Ok(false); 874 } 875 876 Request::memory(table.length()).write_to(socket).unwrap(); 877 table.write_to(socket)?; 878 // And then the memory itself 879 vm.send_memory_regions(&table, socket)?; 880 let res = Response::read_from(socket)?; 881 if res.status() != Status::Ok { 882 warn!("Error during dirty memory migration"); 883 Request::abandon().write_to(socket)?; 884 Response::read_from(socket).ok(); 885 return Err(MigratableError::MigrateSend(anyhow!( 886 "Error during dirty memory migration" 887 ))); 888 } 889 890 Ok(true) 891 } 892 893 fn send_migration( 894 vm: &mut Vm, 895 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 896 dyn hypervisor::Hypervisor, 897 >, 898 send_data_migration: VmSendMigrationData, 899 ) -> result::Result<(), MigratableError> { 900 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 901 let mut socket = UnixStream::connect(path).map_err(|e| { 902 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 903 })?; 904 905 // Start the migration 906 Request::start().write_to(&mut socket)?; 907 let res = Response::read_from(&mut socket)?; 908 if res.status() != Status::Ok { 909 warn!("Error starting migration"); 910 Request::abandon().write_to(&mut socket)?; 911 Response::read_from(&mut socket).ok(); 912 return Err(MigratableError::MigrateSend(anyhow!( 913 "Error starting migration" 914 ))); 915 } 916 917 // Send config 918 let vm_config = vm.get_config(); 919 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 920 let common_cpuid = { 921 #[cfg(feature = "tdx")] 922 if vm_config.lock().unwrap().is_tdx_enabled() { 923 return Err(MigratableError::MigrateSend(anyhow!( 924 "Live Migration is not supported when TDX is enabled" 925 ))); 926 }; 927 928 let amx = vm_config.lock().unwrap().cpus.features.amx; 929 let phys_bits = 930 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 931 arch::generate_common_cpuid( 932 &hypervisor, 933 &arch::CpuidConfig { 934 sgx_epc_sections: None, 935 phys_bits, 936 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 937 #[cfg(feature = "tdx")] 938 tdx: false, 939 amx, 940 }, 941 ) 942 .map_err(|e| { 943 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 944 })? 945 }; 946 947 if send_data_migration.local { 948 vm.send_memory_fds(&mut socket)?; 949 } 950 951 let vm_migration_config = VmMigrationConfig { 952 vm_config, 953 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 954 common_cpuid, 955 memory_manager_data: vm.memory_manager_data(), 956 }; 957 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 958 Request::config(config_data.len() as u64).write_to(&mut socket)?; 959 socket 960 .write_all(&config_data) 961 .map_err(MigratableError::MigrateSocket)?; 962 let res = Response::read_from(&mut socket)?; 963 if res.status() != Status::Ok { 964 warn!("Error during config migration"); 965 Request::abandon().write_to(&mut socket)?; 966 Response::read_from(&mut socket).ok(); 967 return Err(MigratableError::MigrateSend(anyhow!( 968 "Error during config migration" 969 ))); 970 } 971 972 // Let every Migratable object know about the migration being started. 973 vm.start_migration()?; 974 975 if send_data_migration.local { 976 // Now pause VM 977 vm.pause()?; 978 } else { 979 // Start logging dirty pages 980 vm.start_dirty_log()?; 981 982 // Send memory table 983 let table = vm.memory_range_table()?; 984 Request::memory(table.length()) 985 .write_to(&mut socket) 986 .unwrap(); 987 table.write_to(&mut socket)?; 988 // And then the memory itself 989 vm.send_memory_regions(&table, &mut socket)?; 990 let res = Response::read_from(&mut socket)?; 991 if res.status() != Status::Ok { 992 warn!("Error during memory migration"); 993 Request::abandon().write_to(&mut socket)?; 994 Response::read_from(&mut socket).ok(); 995 return Err(MigratableError::MigrateSend(anyhow!( 996 "Error during memory migration" 997 ))); 998 } 999 1000 // Try at most 5 passes of dirty memory sending 1001 const MAX_DIRTY_MIGRATIONS: usize = 5; 1002 for i in 0..MAX_DIRTY_MIGRATIONS { 1003 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1004 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1005 break; 1006 } 1007 } 1008 1009 // Now pause VM 1010 vm.pause()?; 1011 1012 // Send last batch of dirty pages 1013 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1014 1015 // Stop logging dirty pages 1016 vm.stop_dirty_log()?; 1017 } 1018 // Capture snapshot and send it 1019 let vm_snapshot = vm.snapshot()?; 1020 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1021 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1022 socket 1023 .write_all(&snapshot_data) 1024 .map_err(MigratableError::MigrateSocket)?; 1025 let res = Response::read_from(&mut socket)?; 1026 if res.status() != Status::Ok { 1027 warn!("Error during state migration"); 1028 Request::abandon().write_to(&mut socket)?; 1029 Response::read_from(&mut socket).ok(); 1030 return Err(MigratableError::MigrateSend(anyhow!( 1031 "Error during state migration" 1032 ))); 1033 } 1034 1035 // Complete the migration 1036 Request::complete().write_to(&mut socket)?; 1037 let res = Response::read_from(&mut socket)?; 1038 if res.status() != Status::Ok { 1039 warn!("Error completing migration"); 1040 Request::abandon().write_to(&mut socket)?; 1041 Response::read_from(&mut socket).ok(); 1042 return Err(MigratableError::MigrateSend(anyhow!( 1043 "Error completing migration" 1044 ))); 1045 } 1046 info!("Migration complete"); 1047 1048 // Let every Migratable object know about the migration being complete 1049 vm.complete_migration() 1050 } 1051 1052 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1053 fn vm_check_cpuid_compatibility( 1054 &self, 1055 src_vm_config: &Arc<Mutex<VmConfig>>, 1056 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1057 ) -> result::Result<(), MigratableError> { 1058 #[cfg(feature = "tdx")] 1059 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1060 return Err(MigratableError::MigrateReceive(anyhow!( 1061 "Live Migration is not supported when TDX is enabled" 1062 ))); 1063 }; 1064 1065 // We check the `CPUID` compatibility of between the source vm and destination, which is 1066 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1067 let dest_cpuid = &{ 1068 let vm_config = &src_vm_config.lock().unwrap(); 1069 1070 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1071 arch::generate_common_cpuid( 1072 &self.hypervisor.clone(), 1073 &arch::CpuidConfig { 1074 sgx_epc_sections: None, 1075 phys_bits, 1076 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1077 #[cfg(feature = "tdx")] 1078 tdx: false, 1079 amx: vm_config.cpus.features.amx, 1080 }, 1081 ) 1082 .map_err(|e| { 1083 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1084 })? 1085 }; 1086 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1087 MigratableError::MigrateReceive(anyhow!( 1088 "Error checking cpu feature compatibility': {:?}", 1089 e 1090 )) 1091 }) 1092 } 1093 1094 fn control_loop( 1095 &mut self, 1096 api_receiver: Rc<Receiver<ApiRequest>>, 1097 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1098 ) -> Result<()> { 1099 const EPOLL_EVENTS_LEN: usize = 100; 1100 1101 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1102 let epoll_fd = self.epoll.as_raw_fd(); 1103 1104 'outer: loop { 1105 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1106 Ok(res) => res, 1107 Err(e) => { 1108 if e.kind() == io::ErrorKind::Interrupted { 1109 // It's well defined from the epoll_wait() syscall 1110 // documentation that the epoll loop can be interrupted 1111 // before any of the requested events occurred or the 1112 // timeout expired. In both those cases, epoll_wait() 1113 // returns an error of type EINTR, but this should not 1114 // be considered as a regular error. Instead it is more 1115 // appropriate to retry, by calling into epoll_wait(). 1116 continue; 1117 } 1118 return Err(Error::Epoll(e)); 1119 } 1120 }; 1121 1122 for event in events.iter().take(num_events) { 1123 let dispatch_event: EpollDispatch = event.data.into(); 1124 match dispatch_event { 1125 EpollDispatch::Unknown => { 1126 let event = event.data; 1127 warn!("Unknown VMM loop event: {}", event); 1128 } 1129 EpollDispatch::Exit => { 1130 info!("VM exit event"); 1131 // Consume the event. 1132 self.exit_evt.read().map_err(Error::EventFdRead)?; 1133 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1134 1135 break 'outer; 1136 } 1137 EpollDispatch::Reset => { 1138 info!("VM reset event"); 1139 // Consume the event. 1140 self.reset_evt.read().map_err(Error::EventFdRead)?; 1141 self.vm_reboot().map_err(Error::VmReboot)?; 1142 } 1143 EpollDispatch::ActivateVirtioDevices => { 1144 if let Some(ref vm) = self.vm { 1145 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1146 info!( 1147 "Trying to activate pending virtio devices: count = {}", 1148 count 1149 ); 1150 vm.activate_virtio_devices() 1151 .map_err(Error::ActivateVirtioDevices)?; 1152 } 1153 } 1154 EpollDispatch::Api => { 1155 // Consume the events. 1156 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1157 // Read from the API receiver channel 1158 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1159 1160 if api_request(self)? { 1161 break 'outer; 1162 } 1163 } 1164 } 1165 #[cfg(feature = "guest_debug")] 1166 EpollDispatch::Debug => { 1167 // Consume the events. 1168 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1169 // Read from the API receiver channel 1170 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1171 1172 let response = if let Some(ref mut vm) = self.vm { 1173 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1174 } else { 1175 Err(VmError::VmNotRunning) 1176 } 1177 .map_err(gdb::Error::Vm); 1178 1179 gdb_request 1180 .sender 1181 .send(response) 1182 .map_err(Error::GdbResponseSend)?; 1183 } 1184 } 1185 #[cfg(not(feature = "guest_debug"))] 1186 EpollDispatch::Debug => {} 1187 } 1188 } 1189 } 1190 1191 // Trigger the termination of the signal_handler thread 1192 if let Some(signals) = self.signals.take() { 1193 signals.close(); 1194 } 1195 1196 // Wait for all the threads to finish 1197 for thread in self.threads.drain(..) { 1198 thread.join().map_err(Error::ThreadCleanup)? 1199 } 1200 1201 Ok(()) 1202 } 1203 } 1204 1205 impl RequestHandler for Vmm { 1206 fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> { 1207 // We only store the passed VM config. 1208 // The VM will be created when being asked to boot it. 1209 if self.vm_config.is_none() { 1210 self.vm_config = Some(config); 1211 Ok(()) 1212 } else { 1213 Err(VmError::VmAlreadyCreated) 1214 } 1215 } 1216 1217 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1218 tracer::start(); 1219 info!("Booting VM"); 1220 event!("vm", "booting"); 1221 let r = { 1222 trace_scoped!("vm_boot"); 1223 // If we don't have a config, we can not boot a VM. 1224 if self.vm_config.is_none() { 1225 return Err(VmError::VmMissingConfig); 1226 }; 1227 1228 // Create a new VM if we don't have one yet. 1229 if self.vm.is_none() { 1230 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1231 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1232 #[cfg(feature = "guest_debug")] 1233 let vm_debug_evt = self 1234 .vm_debug_evt 1235 .try_clone() 1236 .map_err(VmError::EventFdClone)?; 1237 let activate_evt = self 1238 .activate_evt 1239 .try_clone() 1240 .map_err(VmError::EventFdClone)?; 1241 1242 if let Some(ref vm_config) = self.vm_config { 1243 let vm = Vm::new( 1244 Arc::clone(vm_config), 1245 exit_evt, 1246 reset_evt, 1247 #[cfg(feature = "guest_debug")] 1248 vm_debug_evt, 1249 &self.seccomp_action, 1250 self.hypervisor.clone(), 1251 activate_evt, 1252 None, 1253 None, 1254 None, 1255 None, 1256 Arc::clone(&self.original_termios_opt), 1257 None, 1258 None, 1259 None, 1260 )?; 1261 1262 self.vm = Some(vm); 1263 } 1264 } 1265 1266 // Now we can boot the VM. 1267 if let Some(ref mut vm) = self.vm { 1268 vm.boot() 1269 } else { 1270 Err(VmError::VmNotCreated) 1271 } 1272 }; 1273 tracer::end(); 1274 if r.is_ok() { 1275 event!("vm", "booted"); 1276 } 1277 r 1278 } 1279 1280 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1281 if let Some(ref mut vm) = self.vm { 1282 vm.pause().map_err(VmError::Pause) 1283 } else { 1284 Err(VmError::VmNotRunning) 1285 } 1286 } 1287 1288 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1289 if let Some(ref mut vm) = self.vm { 1290 vm.resume().map_err(VmError::Resume) 1291 } else { 1292 Err(VmError::VmNotRunning) 1293 } 1294 } 1295 1296 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1297 if let Some(ref mut vm) = self.vm { 1298 vm.snapshot() 1299 .map_err(VmError::Snapshot) 1300 .and_then(|snapshot| { 1301 vm.send(&snapshot, destination_url) 1302 .map_err(VmError::SnapshotSend) 1303 }) 1304 } else { 1305 Err(VmError::VmNotRunning) 1306 } 1307 } 1308 1309 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1310 if self.vm.is_some() || self.vm_config.is_some() { 1311 return Err(VmError::VmAlreadyCreated); 1312 } 1313 1314 let source_url = restore_cfg.source_url.as_path().to_str(); 1315 if source_url.is_none() { 1316 return Err(VmError::InvalidRestoreSourceUrl); 1317 } 1318 // Safe to unwrap as we checked it was Some(&str). 1319 let source_url = source_url.unwrap(); 1320 1321 let vm_config = Arc::new(Mutex::new( 1322 recv_vm_config(source_url).map_err(VmError::Restore)?, 1323 )); 1324 restore_cfg 1325 .validate(&vm_config.lock().unwrap().clone()) 1326 .map_err(VmError::ConfigValidation)?; 1327 1328 // Update VM's net configurations with new fds received for restore operation 1329 if let (Some(restored_nets), Some(vm_net_configs)) = 1330 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1331 { 1332 for net in restored_nets.iter() { 1333 for net_config in vm_net_configs.iter_mut() { 1334 // update only if the net dev is backed by FDs 1335 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1336 net_config.fds.clone_from(&net.fds); 1337 } 1338 } 1339 } 1340 } 1341 1342 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1343 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1344 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1345 1346 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1347 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1348 .map_err(VmError::Restore)?; 1349 1350 self.vm_config = Some(Arc::clone(&vm_config)); 1351 1352 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1353 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1354 #[cfg(feature = "guest_debug")] 1355 let debug_evt = self 1356 .vm_debug_evt 1357 .try_clone() 1358 .map_err(VmError::EventFdClone)?; 1359 let activate_evt = self 1360 .activate_evt 1361 .try_clone() 1362 .map_err(VmError::EventFdClone)?; 1363 1364 let vm = Vm::new( 1365 vm_config, 1366 exit_evt, 1367 reset_evt, 1368 #[cfg(feature = "guest_debug")] 1369 debug_evt, 1370 &self.seccomp_action, 1371 self.hypervisor.clone(), 1372 activate_evt, 1373 None, 1374 None, 1375 None, 1376 None, 1377 Arc::clone(&self.original_termios_opt), 1378 Some(snapshot), 1379 Some(source_url), 1380 Some(restore_cfg.prefault), 1381 )?; 1382 self.vm = Some(vm); 1383 1384 // Now we can restore the rest of the VM. 1385 if let Some(ref mut vm) = self.vm { 1386 vm.restore() 1387 } else { 1388 Err(VmError::VmNotCreated) 1389 } 1390 } 1391 1392 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1393 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1394 if let Some(ref mut vm) = self.vm { 1395 vm.coredump(destination_url).map_err(VmError::Coredump) 1396 } else { 1397 Err(VmError::VmNotRunning) 1398 } 1399 } 1400 1401 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1402 let r = if let Some(ref mut vm) = self.vm.take() { 1403 vm.shutdown() 1404 } else { 1405 Err(VmError::VmNotRunning) 1406 }; 1407 1408 if r.is_ok() { 1409 event!("vm", "shutdown"); 1410 } 1411 1412 r 1413 } 1414 1415 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1416 event!("vm", "rebooting"); 1417 1418 // First we stop the current VM 1419 let (config, serial_pty, console_pty, debug_console_pty, console_resize_pipe) = 1420 if let Some(mut vm) = self.vm.take() { 1421 let config = vm.get_config(); 1422 let serial_pty = vm.serial_pty(); 1423 let console_pty = vm.console_pty(); 1424 let debug_console_pty = vm.debug_console_pty(); 1425 let console_resize_pipe = vm 1426 .console_resize_pipe() 1427 .as_ref() 1428 .map(|pipe| pipe.try_clone().unwrap()); 1429 vm.shutdown()?; 1430 ( 1431 config, 1432 serial_pty, 1433 console_pty, 1434 debug_console_pty, 1435 console_resize_pipe, 1436 ) 1437 } else { 1438 return Err(VmError::VmNotCreated); 1439 }; 1440 1441 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1442 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1443 #[cfg(feature = "guest_debug")] 1444 let debug_evt = self 1445 .vm_debug_evt 1446 .try_clone() 1447 .map_err(VmError::EventFdClone)?; 1448 let activate_evt = self 1449 .activate_evt 1450 .try_clone() 1451 .map_err(VmError::EventFdClone)?; 1452 1453 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1454 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1455 // during the boot process. 1456 if self.reset_evt.read().is_ok() { 1457 warn!("Spurious second reset event received. Ignoring."); 1458 } 1459 1460 // Then we create the new VM 1461 let mut vm = Vm::new( 1462 config, 1463 exit_evt, 1464 reset_evt, 1465 #[cfg(feature = "guest_debug")] 1466 debug_evt, 1467 &self.seccomp_action, 1468 self.hypervisor.clone(), 1469 activate_evt, 1470 serial_pty, 1471 console_pty, 1472 debug_console_pty, 1473 console_resize_pipe, 1474 Arc::clone(&self.original_termios_opt), 1475 None, 1476 None, 1477 None, 1478 )?; 1479 1480 // And we boot it 1481 vm.boot()?; 1482 1483 self.vm = Some(vm); 1484 1485 event!("vm", "rebooted"); 1486 1487 Ok(()) 1488 } 1489 1490 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1491 match &self.vm_config { 1492 Some(config) => { 1493 let state = match &self.vm { 1494 Some(vm) => vm.get_state()?, 1495 None => VmState::Created, 1496 }; 1497 1498 let config = Arc::clone(config); 1499 1500 let mut memory_actual_size = config.lock().unwrap().memory.total_size(); 1501 if let Some(vm) = &self.vm { 1502 memory_actual_size -= vm.balloon_size(); 1503 } 1504 1505 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree()); 1506 1507 Ok(VmInfoResponse { 1508 config, 1509 state, 1510 memory_actual_size, 1511 device_tree, 1512 }) 1513 } 1514 None => Err(VmError::VmNotCreated), 1515 } 1516 } 1517 1518 fn vmm_ping(&self) -> VmmPingResponse { 1519 let VmmVersionInfo { 1520 build_version, 1521 version, 1522 } = self.version.clone(); 1523 1524 VmmPingResponse { 1525 build_version, 1526 version, 1527 pid: std::process::id() as i64, 1528 features: feature_list(), 1529 } 1530 } 1531 1532 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1533 if self.vm_config.is_none() { 1534 return Ok(()); 1535 } 1536 1537 // If a VM is booted, we first try to shut it down. 1538 if self.vm.is_some() { 1539 self.vm_shutdown()?; 1540 } 1541 1542 self.vm_config = None; 1543 1544 event!("vm", "deleted"); 1545 1546 Ok(()) 1547 } 1548 1549 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1550 self.vm_delete()?; 1551 event!("vmm", "shutdown"); 1552 Ok(()) 1553 } 1554 1555 fn vm_resize( 1556 &mut self, 1557 desired_vcpus: Option<u8>, 1558 desired_ram: Option<u64>, 1559 desired_balloon: Option<u64>, 1560 ) -> result::Result<(), VmError> { 1561 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1562 1563 if let Some(ref mut vm) = self.vm { 1564 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1565 error!("Error when resizing VM: {:?}", e); 1566 Err(e) 1567 } else { 1568 Ok(()) 1569 } 1570 } else { 1571 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1572 if let Some(desired_vcpus) = desired_vcpus { 1573 config.cpus.boot_vcpus = desired_vcpus; 1574 } 1575 if let Some(desired_ram) = desired_ram { 1576 config.memory.size = desired_ram; 1577 } 1578 if let Some(desired_balloon) = desired_balloon { 1579 if let Some(balloon_config) = &mut config.balloon { 1580 balloon_config.size = desired_balloon; 1581 } 1582 } 1583 Ok(()) 1584 } 1585 } 1586 1587 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1588 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1589 1590 if let Some(ref mut vm) = self.vm { 1591 if let Err(e) = vm.resize_zone(id, desired_ram) { 1592 error!("Error when resizing VM: {:?}", e); 1593 Err(e) 1594 } else { 1595 Ok(()) 1596 } 1597 } else { 1598 // Update VmConfig by setting the new desired ram. 1599 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1600 1601 if let Some(zones) = &mut memory_config.zones { 1602 for zone in zones.iter_mut() { 1603 if zone.id == id { 1604 zone.size = desired_ram; 1605 return Ok(()); 1606 } 1607 } 1608 } 1609 1610 error!("Could not find the memory zone {} for the resize", id); 1611 Err(VmError::ResizeZone) 1612 } 1613 } 1614 1615 fn vm_add_device( 1616 &mut self, 1617 device_cfg: DeviceConfig, 1618 ) -> result::Result<Option<Vec<u8>>, VmError> { 1619 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1620 1621 { 1622 // Validate the configuration change in a cloned configuration 1623 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1624 add_to_config(&mut config.devices, device_cfg.clone()); 1625 config.validate().map_err(VmError::ConfigValidation)?; 1626 } 1627 1628 if let Some(ref mut vm) = self.vm { 1629 let info = vm.add_device(device_cfg).map_err(|e| { 1630 error!("Error when adding new device to the VM: {:?}", e); 1631 e 1632 })?; 1633 serde_json::to_vec(&info) 1634 .map(Some) 1635 .map_err(VmError::SerializeJson) 1636 } else { 1637 // Update VmConfig by adding the new device. 1638 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1639 add_to_config(&mut config.devices, device_cfg); 1640 Ok(None) 1641 } 1642 } 1643 1644 fn vm_add_user_device( 1645 &mut self, 1646 device_cfg: UserDeviceConfig, 1647 ) -> result::Result<Option<Vec<u8>>, VmError> { 1648 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1649 1650 { 1651 // Validate the configuration change in a cloned configuration 1652 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1653 add_to_config(&mut config.user_devices, device_cfg.clone()); 1654 config.validate().map_err(VmError::ConfigValidation)?; 1655 } 1656 1657 if let Some(ref mut vm) = self.vm { 1658 let info = vm.add_user_device(device_cfg).map_err(|e| { 1659 error!("Error when adding new user device to the VM: {:?}", e); 1660 e 1661 })?; 1662 serde_json::to_vec(&info) 1663 .map(Some) 1664 .map_err(VmError::SerializeJson) 1665 } else { 1666 // Update VmConfig by adding the new device. 1667 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1668 add_to_config(&mut config.user_devices, device_cfg); 1669 Ok(None) 1670 } 1671 } 1672 1673 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1674 if let Some(ref mut vm) = self.vm { 1675 if let Err(e) = vm.remove_device(id) { 1676 error!("Error when removing device from the VM: {:?}", e); 1677 Err(e) 1678 } else { 1679 Ok(()) 1680 } 1681 } else if let Some(ref config) = self.vm_config { 1682 let mut config = config.lock().unwrap(); 1683 if config.remove_device(&id) { 1684 Ok(()) 1685 } else { 1686 Err(VmError::NoDeviceToRemove(id)) 1687 } 1688 } else { 1689 Err(VmError::VmNotCreated) 1690 } 1691 } 1692 1693 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1694 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1695 1696 { 1697 // Validate the configuration change in a cloned configuration 1698 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1699 add_to_config(&mut config.disks, disk_cfg.clone()); 1700 config.validate().map_err(VmError::ConfigValidation)?; 1701 } 1702 1703 if let Some(ref mut vm) = self.vm { 1704 let info = vm.add_disk(disk_cfg).map_err(|e| { 1705 error!("Error when adding new disk to the VM: {:?}", e); 1706 e 1707 })?; 1708 serde_json::to_vec(&info) 1709 .map(Some) 1710 .map_err(VmError::SerializeJson) 1711 } else { 1712 // Update VmConfig by adding the new device. 1713 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1714 add_to_config(&mut config.disks, disk_cfg); 1715 Ok(None) 1716 } 1717 } 1718 1719 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1720 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1721 1722 { 1723 // Validate the configuration change in a cloned configuration 1724 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1725 add_to_config(&mut config.fs, fs_cfg.clone()); 1726 config.validate().map_err(VmError::ConfigValidation)?; 1727 } 1728 1729 if let Some(ref mut vm) = self.vm { 1730 let info = vm.add_fs(fs_cfg).map_err(|e| { 1731 error!("Error when adding new fs to the VM: {:?}", e); 1732 e 1733 })?; 1734 serde_json::to_vec(&info) 1735 .map(Some) 1736 .map_err(VmError::SerializeJson) 1737 } else { 1738 // Update VmConfig by adding the new device. 1739 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1740 add_to_config(&mut config.fs, fs_cfg); 1741 Ok(None) 1742 } 1743 } 1744 1745 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1746 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1747 1748 { 1749 // Validate the configuration change in a cloned configuration 1750 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1751 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1752 config.validate().map_err(VmError::ConfigValidation)?; 1753 } 1754 1755 if let Some(ref mut vm) = self.vm { 1756 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1757 error!("Error when adding new pmem device to the VM: {:?}", e); 1758 e 1759 })?; 1760 serde_json::to_vec(&info) 1761 .map(Some) 1762 .map_err(VmError::SerializeJson) 1763 } else { 1764 // Update VmConfig by adding the new device. 1765 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1766 add_to_config(&mut config.pmem, pmem_cfg); 1767 Ok(None) 1768 } 1769 } 1770 1771 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1772 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1773 1774 { 1775 // Validate the configuration change in a cloned configuration 1776 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1777 add_to_config(&mut config.net, net_cfg.clone()); 1778 config.validate().map_err(VmError::ConfigValidation)?; 1779 } 1780 1781 if let Some(ref mut vm) = self.vm { 1782 let info = vm.add_net(net_cfg).map_err(|e| { 1783 error!("Error when adding new network device to the VM: {:?}", e); 1784 e 1785 })?; 1786 serde_json::to_vec(&info) 1787 .map(Some) 1788 .map_err(VmError::SerializeJson) 1789 } else { 1790 // Update VmConfig by adding the new device. 1791 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1792 add_to_config(&mut config.net, net_cfg); 1793 Ok(None) 1794 } 1795 } 1796 1797 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1798 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1799 1800 { 1801 // Validate the configuration change in a cloned configuration 1802 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1803 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1804 config.validate().map_err(VmError::ConfigValidation)?; 1805 } 1806 1807 if let Some(ref mut vm) = self.vm { 1808 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1809 error!("Error when adding new vDPA device to the VM: {:?}", e); 1810 e 1811 })?; 1812 serde_json::to_vec(&info) 1813 .map(Some) 1814 .map_err(VmError::SerializeJson) 1815 } else { 1816 // Update VmConfig by adding the new device. 1817 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1818 add_to_config(&mut config.vdpa, vdpa_cfg); 1819 Ok(None) 1820 } 1821 } 1822 1823 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1824 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1825 1826 { 1827 // Validate the configuration change in a cloned configuration 1828 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1829 1830 if config.vsock.is_some() { 1831 return Err(VmError::TooManyVsockDevices); 1832 } 1833 1834 config.vsock = Some(vsock_cfg.clone()); 1835 config.validate().map_err(VmError::ConfigValidation)?; 1836 } 1837 1838 if let Some(ref mut vm) = self.vm { 1839 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1840 error!("Error when adding new vsock device to the VM: {:?}", e); 1841 e 1842 })?; 1843 serde_json::to_vec(&info) 1844 .map(Some) 1845 .map_err(VmError::SerializeJson) 1846 } else { 1847 // Update VmConfig by adding the new device. 1848 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1849 config.vsock = Some(vsock_cfg); 1850 Ok(None) 1851 } 1852 } 1853 1854 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1855 if let Some(ref mut vm) = self.vm { 1856 let info = vm.counters().map_err(|e| { 1857 error!("Error when getting counters from the VM: {:?}", e); 1858 e 1859 })?; 1860 serde_json::to_vec(&info) 1861 .map(Some) 1862 .map_err(VmError::SerializeJson) 1863 } else { 1864 Err(VmError::VmNotRunning) 1865 } 1866 } 1867 1868 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1869 if let Some(ref mut vm) = self.vm { 1870 vm.power_button() 1871 } else { 1872 Err(VmError::VmNotRunning) 1873 } 1874 } 1875 1876 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1877 if let Some(ref mut vm) = self.vm { 1878 vm.nmi() 1879 } else { 1880 Err(VmError::VmNotRunning) 1881 } 1882 } 1883 1884 fn vm_receive_migration( 1885 &mut self, 1886 receive_data_migration: VmReceiveMigrationData, 1887 ) -> result::Result<(), MigratableError> { 1888 info!( 1889 "Receiving migration: receiver_url = {}", 1890 receive_data_migration.receiver_url 1891 ); 1892 1893 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1894 let listener = UnixListener::bind(&path).map_err(|e| { 1895 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1896 })?; 1897 let (mut socket, _addr) = listener.accept().map_err(|e| { 1898 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1899 })?; 1900 std::fs::remove_file(&path).map_err(|e| { 1901 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1902 })?; 1903 1904 let mut started = false; 1905 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1906 let mut existing_memory_files = None; 1907 loop { 1908 let req = Request::read_from(&mut socket)?; 1909 match req.command() { 1910 Command::Invalid => info!("Invalid Command Received"), 1911 Command::Start => { 1912 info!("Start Command Received"); 1913 started = true; 1914 1915 Response::ok().write_to(&mut socket)?; 1916 } 1917 Command::Config => { 1918 info!("Config Command Received"); 1919 1920 if !started { 1921 warn!("Migration not started yet"); 1922 Response::error().write_to(&mut socket)?; 1923 continue; 1924 } 1925 memory_manager = Some(self.vm_receive_config( 1926 &req, 1927 &mut socket, 1928 existing_memory_files.take(), 1929 )?); 1930 } 1931 Command::State => { 1932 info!("State Command Received"); 1933 1934 if !started { 1935 warn!("Migration not started yet"); 1936 Response::error().write_to(&mut socket)?; 1937 continue; 1938 } 1939 if let Some(mm) = memory_manager.take() { 1940 self.vm_receive_state(&req, &mut socket, mm)?; 1941 } else { 1942 warn!("Configuration not sent yet"); 1943 Response::error().write_to(&mut socket)?; 1944 } 1945 } 1946 Command::Memory => { 1947 info!("Memory Command Received"); 1948 1949 if !started { 1950 warn!("Migration not started yet"); 1951 Response::error().write_to(&mut socket)?; 1952 continue; 1953 } 1954 if let Some(mm) = memory_manager.as_ref() { 1955 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 1956 } else { 1957 warn!("Configuration not sent yet"); 1958 Response::error().write_to(&mut socket)?; 1959 } 1960 } 1961 Command::MemoryFd => { 1962 info!("MemoryFd Command Received"); 1963 1964 if !started { 1965 warn!("Migration not started yet"); 1966 Response::error().write_to(&mut socket)?; 1967 continue; 1968 } 1969 1970 let mut buf = [0u8; 4]; 1971 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 1972 MigratableError::MigrateReceive(anyhow!( 1973 "Error receiving slot from socket: {}", 1974 e 1975 )) 1976 })?; 1977 1978 if existing_memory_files.is_none() { 1979 existing_memory_files = Some(HashMap::default()) 1980 } 1981 1982 if let Some(ref mut existing_memory_files) = existing_memory_files { 1983 let slot = u32::from_le_bytes(buf); 1984 existing_memory_files.insert(slot, file.unwrap()); 1985 } 1986 1987 Response::ok().write_to(&mut socket)?; 1988 } 1989 Command::Complete => { 1990 info!("Complete Command Received"); 1991 if let Some(ref mut vm) = self.vm.as_mut() { 1992 vm.resume()?; 1993 Response::ok().write_to(&mut socket)?; 1994 } else { 1995 warn!("VM not created yet"); 1996 Response::error().write_to(&mut socket)?; 1997 } 1998 break; 1999 } 2000 Command::Abandon => { 2001 info!("Abandon Command Received"); 2002 self.vm = None; 2003 self.vm_config = None; 2004 Response::ok().write_to(&mut socket).ok(); 2005 break; 2006 } 2007 } 2008 } 2009 2010 Ok(()) 2011 } 2012 2013 fn vm_send_migration( 2014 &mut self, 2015 send_data_migration: VmSendMigrationData, 2016 ) -> result::Result<(), MigratableError> { 2017 info!( 2018 "Sending migration: destination_url = {}, local = {}", 2019 send_data_migration.destination_url, send_data_migration.local 2020 ); 2021 2022 if !self 2023 .vm_config 2024 .as_ref() 2025 .unwrap() 2026 .lock() 2027 .unwrap() 2028 .backed_by_shared_memory() 2029 && send_data_migration.local 2030 { 2031 return Err(MigratableError::MigrateSend(anyhow!( 2032 "Local migration requires shared memory or hugepages enabled" 2033 ))); 2034 } 2035 2036 if let Some(vm) = self.vm.as_mut() { 2037 Self::send_migration( 2038 vm, 2039 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2040 self.hypervisor.clone(), 2041 send_data_migration, 2042 ) 2043 .map_err(|migration_err| { 2044 error!("Migration failed: {:?}", migration_err); 2045 2046 // Stop logging dirty pages 2047 if let Err(e) = vm.stop_dirty_log() { 2048 return e; 2049 } 2050 2051 if vm.get_state().unwrap() == VmState::Paused { 2052 if let Err(e) = vm.resume() { 2053 return e; 2054 } 2055 } 2056 2057 migration_err 2058 })?; 2059 2060 // Shutdown the VM after the migration succeeded 2061 self.exit_evt.write(1).map_err(|e| { 2062 MigratableError::MigrateSend(anyhow!( 2063 "Failed shutting down the VM after migration: {:?}", 2064 e 2065 )) 2066 }) 2067 } else { 2068 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2069 } 2070 } 2071 } 2072 2073 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2074 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2075 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2076 2077 #[cfg(test)] 2078 mod unit_tests { 2079 use super::*; 2080 #[cfg(target_arch = "x86_64")] 2081 use crate::config::DebugConsoleConfig; 2082 use config::{ 2083 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2084 RngConfig, 2085 }; 2086 2087 fn create_dummy_vmm() -> Vmm { 2088 Vmm::new( 2089 VmmVersionInfo::new("dummy", "dummy"), 2090 EventFd::new(EFD_NONBLOCK).unwrap(), 2091 #[cfg(feature = "guest_debug")] 2092 EventFd::new(EFD_NONBLOCK).unwrap(), 2093 #[cfg(feature = "guest_debug")] 2094 EventFd::new(EFD_NONBLOCK).unwrap(), 2095 SeccompAction::Allow, 2096 hypervisor::new().unwrap(), 2097 EventFd::new(EFD_NONBLOCK).unwrap(), 2098 ) 2099 .unwrap() 2100 } 2101 2102 fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> { 2103 Arc::new(Mutex::new(VmConfig { 2104 cpus: CpusConfig { 2105 boot_vcpus: 1, 2106 max_vcpus: 1, 2107 topology: None, 2108 kvm_hyperv: false, 2109 max_phys_bits: 46, 2110 affinity: None, 2111 features: config::CpuFeatures::default(), 2112 }, 2113 memory: MemoryConfig { 2114 size: 536_870_912, 2115 mergeable: false, 2116 hotplug_method: HotplugMethod::Acpi, 2117 hotplug_size: None, 2118 hotplugged_size: None, 2119 shared: true, 2120 hugepages: false, 2121 hugepage_size: None, 2122 prefault: false, 2123 zones: None, 2124 thp: true, 2125 }, 2126 payload: Some(PayloadConfig { 2127 kernel: Some(PathBuf::from("/path/to/kernel")), 2128 firmware: None, 2129 cmdline: None, 2130 initramfs: None, 2131 #[cfg(feature = "igvm")] 2132 igvm: None, 2133 #[cfg(feature = "sev_snp")] 2134 host_data: None, 2135 }), 2136 rate_limit_groups: None, 2137 disks: None, 2138 net: None, 2139 rng: RngConfig { 2140 src: PathBuf::from("/dev/urandom"), 2141 iommu: false, 2142 }, 2143 balloon: None, 2144 fs: None, 2145 pmem: None, 2146 serial: ConsoleConfig { 2147 file: None, 2148 mode: ConsoleOutputMode::Null, 2149 iommu: false, 2150 socket: None, 2151 }, 2152 console: ConsoleConfig { 2153 file: None, 2154 mode: ConsoleOutputMode::Tty, 2155 iommu: false, 2156 socket: None, 2157 }, 2158 #[cfg(target_arch = "x86_64")] 2159 debug_console: DebugConsoleConfig::default(), 2160 devices: None, 2161 user_devices: None, 2162 vdpa: None, 2163 vsock: None, 2164 pvpanic: false, 2165 iommu: false, 2166 #[cfg(target_arch = "x86_64")] 2167 sgx_epc: None, 2168 numa: None, 2169 watchdog: false, 2170 #[cfg(feature = "guest_debug")] 2171 gdb: false, 2172 pci_segments: None, 2173 platform: None, 2174 tpm: None, 2175 preserved_fds: None, 2176 })) 2177 } 2178 2179 #[test] 2180 fn test_vmm_vm_create() { 2181 let mut vmm = create_dummy_vmm(); 2182 let config = create_dummy_vm_config(); 2183 2184 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2185 assert!(matches!( 2186 vmm.vm_create(config), 2187 Err(VmError::VmAlreadyCreated) 2188 )); 2189 } 2190 2191 #[test] 2192 fn test_vmm_vm_cold_add_device() { 2193 let mut vmm = create_dummy_vmm(); 2194 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2195 2196 assert!(matches!( 2197 vmm.vm_add_device(device_config.clone()), 2198 Err(VmError::VmNotCreated) 2199 )); 2200 2201 let _ = vmm.vm_create(create_dummy_vm_config()); 2202 assert!(vmm 2203 .vm_config 2204 .as_ref() 2205 .unwrap() 2206 .lock() 2207 .unwrap() 2208 .devices 2209 .is_none()); 2210 2211 let result = vmm.vm_add_device(device_config.clone()); 2212 assert!(result.is_ok()); 2213 assert!(result.unwrap().is_none()); 2214 assert_eq!( 2215 vmm.vm_config 2216 .as_ref() 2217 .unwrap() 2218 .lock() 2219 .unwrap() 2220 .devices 2221 .clone() 2222 .unwrap() 2223 .len(), 2224 1 2225 ); 2226 assert_eq!( 2227 vmm.vm_config 2228 .as_ref() 2229 .unwrap() 2230 .lock() 2231 .unwrap() 2232 .devices 2233 .clone() 2234 .unwrap()[0], 2235 device_config 2236 ); 2237 } 2238 2239 #[test] 2240 fn test_vmm_vm_cold_add_user_device() { 2241 let mut vmm = create_dummy_vmm(); 2242 let user_device_config = 2243 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2244 2245 assert!(matches!( 2246 vmm.vm_add_user_device(user_device_config.clone()), 2247 Err(VmError::VmNotCreated) 2248 )); 2249 2250 let _ = vmm.vm_create(create_dummy_vm_config()); 2251 assert!(vmm 2252 .vm_config 2253 .as_ref() 2254 .unwrap() 2255 .lock() 2256 .unwrap() 2257 .user_devices 2258 .is_none()); 2259 2260 let result = vmm.vm_add_user_device(user_device_config.clone()); 2261 assert!(result.is_ok()); 2262 assert!(result.unwrap().is_none()); 2263 assert_eq!( 2264 vmm.vm_config 2265 .as_ref() 2266 .unwrap() 2267 .lock() 2268 .unwrap() 2269 .user_devices 2270 .clone() 2271 .unwrap() 2272 .len(), 2273 1 2274 ); 2275 assert_eq!( 2276 vmm.vm_config 2277 .as_ref() 2278 .unwrap() 2279 .lock() 2280 .unwrap() 2281 .user_devices 2282 .clone() 2283 .unwrap()[0], 2284 user_device_config 2285 ); 2286 } 2287 2288 #[test] 2289 fn test_vmm_vm_cold_add_disk() { 2290 let mut vmm = create_dummy_vmm(); 2291 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2292 2293 assert!(matches!( 2294 vmm.vm_add_disk(disk_config.clone()), 2295 Err(VmError::VmNotCreated) 2296 )); 2297 2298 let _ = vmm.vm_create(create_dummy_vm_config()); 2299 assert!(vmm 2300 .vm_config 2301 .as_ref() 2302 .unwrap() 2303 .lock() 2304 .unwrap() 2305 .disks 2306 .is_none()); 2307 2308 let result = vmm.vm_add_disk(disk_config.clone()); 2309 assert!(result.is_ok()); 2310 assert!(result.unwrap().is_none()); 2311 assert_eq!( 2312 vmm.vm_config 2313 .as_ref() 2314 .unwrap() 2315 .lock() 2316 .unwrap() 2317 .disks 2318 .clone() 2319 .unwrap() 2320 .len(), 2321 1 2322 ); 2323 assert_eq!( 2324 vmm.vm_config 2325 .as_ref() 2326 .unwrap() 2327 .lock() 2328 .unwrap() 2329 .disks 2330 .clone() 2331 .unwrap()[0], 2332 disk_config 2333 ); 2334 } 2335 2336 #[test] 2337 fn test_vmm_vm_cold_add_fs() { 2338 let mut vmm = create_dummy_vmm(); 2339 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2340 2341 assert!(matches!( 2342 vmm.vm_add_fs(fs_config.clone()), 2343 Err(VmError::VmNotCreated) 2344 )); 2345 2346 let _ = vmm.vm_create(create_dummy_vm_config()); 2347 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2348 2349 let result = vmm.vm_add_fs(fs_config.clone()); 2350 assert!(result.is_ok()); 2351 assert!(result.unwrap().is_none()); 2352 assert_eq!( 2353 vmm.vm_config 2354 .as_ref() 2355 .unwrap() 2356 .lock() 2357 .unwrap() 2358 .fs 2359 .clone() 2360 .unwrap() 2361 .len(), 2362 1 2363 ); 2364 assert_eq!( 2365 vmm.vm_config 2366 .as_ref() 2367 .unwrap() 2368 .lock() 2369 .unwrap() 2370 .fs 2371 .clone() 2372 .unwrap()[0], 2373 fs_config 2374 ); 2375 } 2376 2377 #[test] 2378 fn test_vmm_vm_cold_add_pmem() { 2379 let mut vmm = create_dummy_vmm(); 2380 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2381 2382 assert!(matches!( 2383 vmm.vm_add_pmem(pmem_config.clone()), 2384 Err(VmError::VmNotCreated) 2385 )); 2386 2387 let _ = vmm.vm_create(create_dummy_vm_config()); 2388 assert!(vmm 2389 .vm_config 2390 .as_ref() 2391 .unwrap() 2392 .lock() 2393 .unwrap() 2394 .pmem 2395 .is_none()); 2396 2397 let result = vmm.vm_add_pmem(pmem_config.clone()); 2398 assert!(result.is_ok()); 2399 assert!(result.unwrap().is_none()); 2400 assert_eq!( 2401 vmm.vm_config 2402 .as_ref() 2403 .unwrap() 2404 .lock() 2405 .unwrap() 2406 .pmem 2407 .clone() 2408 .unwrap() 2409 .len(), 2410 1 2411 ); 2412 assert_eq!( 2413 vmm.vm_config 2414 .as_ref() 2415 .unwrap() 2416 .lock() 2417 .unwrap() 2418 .pmem 2419 .clone() 2420 .unwrap()[0], 2421 pmem_config 2422 ); 2423 } 2424 2425 #[test] 2426 fn test_vmm_vm_cold_add_net() { 2427 let mut vmm = create_dummy_vmm(); 2428 let net_config = NetConfig::parse( 2429 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2430 ) 2431 .unwrap(); 2432 2433 assert!(matches!( 2434 vmm.vm_add_net(net_config.clone()), 2435 Err(VmError::VmNotCreated) 2436 )); 2437 2438 let _ = vmm.vm_create(create_dummy_vm_config()); 2439 assert!(vmm 2440 .vm_config 2441 .as_ref() 2442 .unwrap() 2443 .lock() 2444 .unwrap() 2445 .net 2446 .is_none()); 2447 2448 let result = vmm.vm_add_net(net_config.clone()); 2449 assert!(result.is_ok()); 2450 assert!(result.unwrap().is_none()); 2451 assert_eq!( 2452 vmm.vm_config 2453 .as_ref() 2454 .unwrap() 2455 .lock() 2456 .unwrap() 2457 .net 2458 .clone() 2459 .unwrap() 2460 .len(), 2461 1 2462 ); 2463 assert_eq!( 2464 vmm.vm_config 2465 .as_ref() 2466 .unwrap() 2467 .lock() 2468 .unwrap() 2469 .net 2470 .clone() 2471 .unwrap()[0], 2472 net_config 2473 ); 2474 } 2475 2476 #[test] 2477 fn test_vmm_vm_cold_add_vdpa() { 2478 let mut vmm = create_dummy_vmm(); 2479 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2480 2481 assert!(matches!( 2482 vmm.vm_add_vdpa(vdpa_config.clone()), 2483 Err(VmError::VmNotCreated) 2484 )); 2485 2486 let _ = vmm.vm_create(create_dummy_vm_config()); 2487 assert!(vmm 2488 .vm_config 2489 .as_ref() 2490 .unwrap() 2491 .lock() 2492 .unwrap() 2493 .vdpa 2494 .is_none()); 2495 2496 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2497 assert!(result.is_ok()); 2498 assert!(result.unwrap().is_none()); 2499 assert_eq!( 2500 vmm.vm_config 2501 .as_ref() 2502 .unwrap() 2503 .lock() 2504 .unwrap() 2505 .vdpa 2506 .clone() 2507 .unwrap() 2508 .len(), 2509 1 2510 ); 2511 assert_eq!( 2512 vmm.vm_config 2513 .as_ref() 2514 .unwrap() 2515 .lock() 2516 .unwrap() 2517 .vdpa 2518 .clone() 2519 .unwrap()[0], 2520 vdpa_config 2521 ); 2522 } 2523 2524 #[test] 2525 fn test_vmm_vm_cold_add_vsock() { 2526 let mut vmm = create_dummy_vmm(); 2527 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2528 2529 assert!(matches!( 2530 vmm.vm_add_vsock(vsock_config.clone()), 2531 Err(VmError::VmNotCreated) 2532 )); 2533 2534 let _ = vmm.vm_create(create_dummy_vm_config()); 2535 assert!(vmm 2536 .vm_config 2537 .as_ref() 2538 .unwrap() 2539 .lock() 2540 .unwrap() 2541 .vsock 2542 .is_none()); 2543 2544 let result = vmm.vm_add_vsock(vsock_config.clone()); 2545 assert!(result.is_ok()); 2546 assert!(result.unwrap().is_none()); 2547 assert_eq!( 2548 vmm.vm_config 2549 .as_ref() 2550 .unwrap() 2551 .lock() 2552 .unwrap() 2553 .vsock 2554 .clone() 2555 .unwrap(), 2556 vsock_config 2557 ); 2558 } 2559 } 2560