1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use crate::api::{ 12 ApiError, ApiRequest, ApiResponse, ApiResponsePayload, VmInfo, VmReceiveMigrationData, 13 VmSendMigrationData, VmmPingResponse, 14 }; 15 use crate::config::{ 16 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 17 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 18 }; 19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 20 use crate::coredump::GuestDebuggable; 21 use crate::memory_manager::MemoryManager; 22 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 23 use crate::migration::get_vm_snapshot; 24 use crate::migration::{recv_vm_config, recv_vm_state}; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 use crate::vm::{Error as VmError, Vm, VmState}; 27 use anyhow::anyhow; 28 #[cfg(feature = "dbus_api")] 29 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 31 use memory_manager::MemoryManagerSnapshotData; 32 use pci::PciBdf; 33 use seccompiler::{apply_filter, SeccompAction}; 34 use serde::ser::{SerializeStruct, Serializer}; 35 use serde::{Deserialize, Serialize}; 36 use signal_hook::iterator::{Handle, Signals}; 37 use std::collections::HashMap; 38 use std::fs::File; 39 use std::io; 40 use std::io::{stdout, Read, Write}; 41 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 42 use std::os::unix::net::UnixListener; 43 use std::os::unix::net::UnixStream; 44 use std::panic::AssertUnwindSafe; 45 use std::path::PathBuf; 46 use std::rc::Rc; 47 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 48 use std::sync::{Arc, Mutex}; 49 use std::time::Instant; 50 use std::{result, thread}; 51 use thiserror::Error; 52 use tracer::trace_scoped; 53 use vm_memory::bitmap::AtomicBitmap; 54 use vm_memory::{ReadVolatile, WriteVolatile}; 55 use vm_migration::{protocol::*, Migratable}; 56 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 57 use vmm_sys_util::eventfd::EventFd; 58 use vmm_sys_util::signal::unblock_signal; 59 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 60 61 mod acpi; 62 pub mod api; 63 mod clone3; 64 pub mod config; 65 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 66 mod coredump; 67 pub mod cpu; 68 pub mod device_manager; 69 pub mod device_tree; 70 #[cfg(feature = "guest_debug")] 71 mod gdb; 72 pub mod interrupt; 73 pub mod memory_manager; 74 pub mod migration; 75 mod pci_segment; 76 pub mod seccomp_filters; 77 mod serial_manager; 78 mod sigwinch_listener; 79 pub mod vm; 80 pub mod vm_config; 81 82 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 83 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 84 85 /// Errors associated with VMM management 86 #[derive(Debug, Error)] 87 pub enum Error { 88 /// API request receive error 89 #[error("Error receiving API request: {0}")] 90 ApiRequestRecv(#[source] RecvError), 91 92 /// API response send error 93 #[error("Error sending API request: {0}")] 94 ApiResponseSend(#[source] SendError<ApiResponse>), 95 96 /// Cannot bind to the UNIX domain socket path 97 #[error("Error binding to UNIX domain socket: {0}")] 98 Bind(#[source] io::Error), 99 100 /// Cannot clone EventFd. 101 #[error("Error cloning EventFd: {0}")] 102 EventFdClone(#[source] io::Error), 103 104 /// Cannot create EventFd. 105 #[error("Error creating EventFd: {0}")] 106 EventFdCreate(#[source] io::Error), 107 108 /// Cannot read from EventFd. 109 #[error("Error reading from EventFd: {0}")] 110 EventFdRead(#[source] io::Error), 111 112 /// Cannot create epoll context. 113 #[error("Error creating epoll context: {0}")] 114 Epoll(#[source] io::Error), 115 116 /// Cannot create HTTP thread 117 #[error("Error spawning HTTP thread: {0}")] 118 HttpThreadSpawn(#[source] io::Error), 119 120 /// Cannot create D-Bus thread 121 #[cfg(feature = "dbus_api")] 122 #[error("Error spawning D-Bus thread: {0}")] 123 DBusThreadSpawn(#[source] io::Error), 124 125 /// Cannot start D-Bus session 126 #[cfg(feature = "dbus_api")] 127 #[error("Error starting D-Bus session: {0}")] 128 CreateDBusSession(#[source] zbus::Error), 129 130 /// Cannot create `event-monitor` thread 131 #[error("Error spawning `event-monitor` thread: {0}")] 132 EventMonitorThreadSpawn(#[source] io::Error), 133 134 /// Cannot handle the VM STDIN stream 135 #[error("Error handling VM stdin: {0:?}")] 136 Stdin(VmError), 137 138 /// Cannot handle the VM pty stream 139 #[error("Error handling VM pty: {0:?}")] 140 Pty(VmError), 141 142 /// Cannot reboot the VM 143 #[error("Error rebooting VM: {0:?}")] 144 VmReboot(VmError), 145 146 /// Cannot create VMM thread 147 #[error("Error spawning VMM thread {0:?}")] 148 VmmThreadSpawn(#[source] io::Error), 149 150 /// Cannot shut the VMM down 151 #[error("Error shutting down VMM: {0:?}")] 152 VmmShutdown(VmError), 153 154 /// Cannot create seccomp filter 155 #[error("Error creating seccomp filter: {0}")] 156 CreateSeccompFilter(seccompiler::Error), 157 158 /// Cannot apply seccomp filter 159 #[error("Error applying seccomp filter: {0}")] 160 ApplySeccompFilter(seccompiler::Error), 161 162 /// Error activating virtio devices 163 #[error("Error activating virtio devices: {0:?}")] 164 ActivateVirtioDevices(VmError), 165 166 /// Error creating API server 167 #[error("Error creating API server {0:?}")] 168 CreateApiServer(micro_http::ServerError), 169 170 /// Error binding API server socket 171 #[error("Error creation API server's socket {0:?}")] 172 CreateApiServerSocket(#[source] io::Error), 173 174 #[cfg(feature = "guest_debug")] 175 #[error("Failed to start the GDB thread: {0}")] 176 GdbThreadSpawn(io::Error), 177 178 /// GDB request receive error 179 #[cfg(feature = "guest_debug")] 180 #[error("Error receiving GDB request: {0}")] 181 GdbRequestRecv(#[source] RecvError), 182 183 /// GDB response send error 184 #[cfg(feature = "guest_debug")] 185 #[error("Error sending GDB request: {0}")] 186 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 187 188 #[error("Cannot spawn a signal handler thread: {0}")] 189 SignalHandlerSpawn(#[source] io::Error), 190 191 #[error("Failed to join on threads: {0:?}")] 192 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 193 } 194 pub type Result<T> = result::Result<T, Error>; 195 196 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 197 #[repr(u64)] 198 pub enum EpollDispatch { 199 Exit = 0, 200 Reset = 1, 201 Api = 2, 202 ActivateVirtioDevices = 3, 203 Debug = 4, 204 Unknown, 205 } 206 207 impl From<u64> for EpollDispatch { 208 fn from(v: u64) -> Self { 209 use EpollDispatch::*; 210 match v { 211 0 => Exit, 212 1 => Reset, 213 2 => Api, 214 3 => ActivateVirtioDevices, 215 4 => Debug, 216 _ => Unknown, 217 } 218 } 219 } 220 221 pub struct EpollContext { 222 epoll_file: File, 223 } 224 225 impl EpollContext { 226 pub fn new() -> result::Result<EpollContext, io::Error> { 227 let epoll_fd = epoll::create(true)?; 228 // Use 'File' to enforce closing on 'epoll_fd' 229 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 230 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 231 232 Ok(EpollContext { epoll_file }) 233 } 234 235 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 236 where 237 T: AsRawFd, 238 { 239 let dispatch_index = token as u64; 240 epoll::ctl( 241 self.epoll_file.as_raw_fd(), 242 epoll::ControlOptions::EPOLL_CTL_ADD, 243 fd.as_raw_fd(), 244 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 245 )?; 246 247 Ok(()) 248 } 249 250 #[cfg(fuzzing)] 251 pub fn add_event_custom<T>( 252 &mut self, 253 fd: &T, 254 id: u64, 255 evts: epoll::Events, 256 ) -> result::Result<(), io::Error> 257 where 258 T: AsRawFd, 259 { 260 epoll::ctl( 261 self.epoll_file.as_raw_fd(), 262 epoll::ControlOptions::EPOLL_CTL_ADD, 263 fd.as_raw_fd(), 264 epoll::Event::new(evts, id), 265 )?; 266 267 Ok(()) 268 } 269 } 270 271 impl AsRawFd for EpollContext { 272 fn as_raw_fd(&self) -> RawFd { 273 self.epoll_file.as_raw_fd() 274 } 275 } 276 277 pub struct PciDeviceInfo { 278 pub id: String, 279 pub bdf: PciBdf, 280 } 281 282 impl Serialize for PciDeviceInfo { 283 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 284 where 285 S: Serializer, 286 { 287 let bdf_str = self.bdf.to_string(); 288 289 // Serialize the structure. 290 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 291 state.serialize_field("id", &self.id)?; 292 state.serialize_field("bdf", &bdf_str)?; 293 state.end() 294 } 295 } 296 297 pub fn feature_list() -> Vec<String> { 298 vec![ 299 #[cfg(feature = "dbus_api")] 300 "dbus_api".to_string(), 301 #[cfg(feature = "dhat-heap")] 302 "dhat-heap".to_string(), 303 #[cfg(feature = "guest_debug")] 304 "guest_debug".to_string(), 305 #[cfg(feature = "io_uring")] 306 "io_uring".to_string(), 307 #[cfg(feature = "kvm")] 308 "kvm".to_string(), 309 #[cfg(feature = "mshv")] 310 "mshv".to_string(), 311 #[cfg(feature = "sev_snp")] 312 "sev_snp".to_string(), 313 #[cfg(feature = "tdx")] 314 "tdx".to_string(), 315 #[cfg(feature = "tracing")] 316 "tracing".to_string(), 317 ] 318 } 319 320 pub fn start_event_monitor_thread( 321 mut monitor: event_monitor::Monitor, 322 seccomp_action: &SeccompAction, 323 hypervisor_type: hypervisor::HypervisorType, 324 exit_event: EventFd, 325 ) -> Result<thread::JoinHandle<Result<()>>> { 326 // Retrieve seccomp filter 327 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 328 .map_err(Error::CreateSeccompFilter)?; 329 330 thread::Builder::new() 331 .name("event-monitor".to_owned()) 332 .spawn(move || { 333 // Apply seccomp filter 334 if !seccomp_filter.is_empty() { 335 apply_filter(&seccomp_filter) 336 .map_err(Error::ApplySeccompFilter) 337 .map_err(|e| { 338 error!("Error applying seccomp filter: {:?}", e); 339 exit_event.write(1).ok(); 340 e 341 })?; 342 } 343 344 std::panic::catch_unwind(AssertUnwindSafe(move || { 345 while let Ok(event) = monitor.rx.recv() { 346 let event = Arc::new(event); 347 348 if let Some(ref mut file) = monitor.file { 349 file.write_all(event.as_bytes().as_ref()).ok(); 350 file.write_all(b"\n\n").ok(); 351 } 352 353 for tx in monitor.broadcast.iter() { 354 tx.send(event.clone()).ok(); 355 } 356 } 357 })) 358 .map_err(|_| { 359 error!("`event-monitor` thread panicked"); 360 exit_event.write(1).ok(); 361 }) 362 .ok(); 363 364 Ok(()) 365 }) 366 .map_err(Error::EventMonitorThreadSpawn) 367 } 368 369 #[allow(unused_variables)] 370 #[allow(clippy::too_many_arguments)] 371 pub fn start_vmm_thread( 372 vmm_version: VmmVersionInfo, 373 http_path: &Option<String>, 374 http_fd: Option<RawFd>, 375 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 376 api_event: EventFd, 377 api_sender: Sender<ApiRequest>, 378 api_receiver: Receiver<ApiRequest>, 379 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 380 #[cfg(feature = "guest_debug")] debug_event: EventFd, 381 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 382 exit_event: EventFd, 383 seccomp_action: &SeccompAction, 384 hypervisor: Arc<dyn hypervisor::Hypervisor>, 385 ) -> Result<VmmThreadHandle> { 386 #[cfg(feature = "guest_debug")] 387 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 388 #[cfg(feature = "guest_debug")] 389 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 390 #[cfg(feature = "guest_debug")] 391 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 392 #[cfg(feature = "guest_debug")] 393 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 394 395 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 396 let hypervisor_type = hypervisor.hypervisor_type(); 397 398 // Retrieve seccomp filter 399 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 400 .map_err(Error::CreateSeccompFilter)?; 401 402 let vmm_seccomp_action = seccomp_action.clone(); 403 let thread = { 404 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 405 thread::Builder::new() 406 .name("vmm".to_string()) 407 .spawn(move || { 408 // Apply seccomp filter for VMM thread. 409 if !vmm_seccomp_filter.is_empty() { 410 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 411 } 412 413 let mut vmm = Vmm::new( 414 vmm_version, 415 api_event, 416 #[cfg(feature = "guest_debug")] 417 debug_event, 418 #[cfg(feature = "guest_debug")] 419 vm_debug_event, 420 vmm_seccomp_action, 421 hypervisor, 422 exit_event, 423 )?; 424 425 vmm.setup_signal_handler()?; 426 427 vmm.control_loop( 428 Rc::new(api_receiver), 429 #[cfg(feature = "guest_debug")] 430 Rc::new(gdb_receiver), 431 ) 432 }) 433 .map_err(Error::VmmThreadSpawn)? 434 }; 435 436 // The VMM thread is started, we can start the dbus thread 437 // and start serving HTTP requests 438 #[cfg(feature = "dbus_api")] 439 let dbus_shutdown_chs = match dbus_options { 440 Some(opts) => { 441 let (_, chs) = api::start_dbus_thread( 442 opts, 443 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 444 api_sender.clone(), 445 seccomp_action, 446 exit_event.try_clone().map_err(Error::EventFdClone)?, 447 hypervisor_type, 448 )?; 449 Some(chs) 450 } 451 None => None, 452 }; 453 454 if let Some(http_path) = http_path { 455 api::start_http_path_thread( 456 http_path, 457 api_event_clone, 458 api_sender, 459 seccomp_action, 460 exit_event, 461 hypervisor_type, 462 )?; 463 } else if let Some(http_fd) = http_fd { 464 api::start_http_fd_thread( 465 http_fd, 466 api_event_clone, 467 api_sender, 468 seccomp_action, 469 exit_event, 470 hypervisor_type, 471 )?; 472 } 473 474 #[cfg(feature = "guest_debug")] 475 if let Some(debug_path) = debug_path { 476 let target = gdb::GdbStub::new( 477 gdb_sender, 478 gdb_debug_event, 479 gdb_vm_debug_event, 480 gdb_hw_breakpoints, 481 ); 482 thread::Builder::new() 483 .name("gdb".to_owned()) 484 .spawn(move || gdb::gdb_thread(target, &debug_path)) 485 .map_err(Error::GdbThreadSpawn)?; 486 } 487 488 Ok(VmmThreadHandle { 489 thread_handle: thread, 490 #[cfg(feature = "dbus_api")] 491 dbus_shutdown_chs, 492 }) 493 } 494 495 #[derive(Clone, Deserialize, Serialize)] 496 struct VmMigrationConfig { 497 vm_config: Arc<Mutex<VmConfig>>, 498 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 499 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 500 memory_manager_data: MemoryManagerSnapshotData, 501 } 502 503 #[derive(Debug, Clone)] 504 pub struct VmmVersionInfo { 505 pub build_version: String, 506 pub version: String, 507 } 508 509 impl VmmVersionInfo { 510 pub fn new(build_version: &str, version: &str) -> Self { 511 Self { 512 build_version: build_version.to_owned(), 513 version: version.to_owned(), 514 } 515 } 516 } 517 518 pub struct VmmThreadHandle { 519 pub thread_handle: thread::JoinHandle<Result<()>>, 520 #[cfg(feature = "dbus_api")] 521 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 522 } 523 524 pub struct Vmm { 525 epoll: EpollContext, 526 exit_evt: EventFd, 527 reset_evt: EventFd, 528 api_evt: EventFd, 529 #[cfg(feature = "guest_debug")] 530 debug_evt: EventFd, 531 #[cfg(feature = "guest_debug")] 532 vm_debug_evt: EventFd, 533 version: VmmVersionInfo, 534 vm: Option<Vm>, 535 vm_config: Option<Arc<Mutex<VmConfig>>>, 536 seccomp_action: SeccompAction, 537 hypervisor: Arc<dyn hypervisor::Hypervisor>, 538 activate_evt: EventFd, 539 signals: Option<Handle>, 540 threads: Vec<thread::JoinHandle<()>>, 541 original_termios_opt: Arc<Mutex<Option<termios>>>, 542 } 543 544 impl Vmm { 545 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 546 547 fn signal_handler( 548 mut signals: Signals, 549 original_termios_opt: Arc<Mutex<Option<termios>>>, 550 exit_evt: &EventFd, 551 ) { 552 for sig in &Self::HANDLED_SIGNALS { 553 unblock_signal(*sig).unwrap(); 554 } 555 556 for signal in signals.forever() { 557 match signal { 558 SIGTERM | SIGINT => { 559 if exit_evt.write(1).is_err() { 560 // Resetting the terminal is usually done as the VMM exits 561 if let Ok(lock) = original_termios_opt.lock() { 562 if let Some(termios) = *lock { 563 // SAFETY: FFI call 564 let _ = unsafe { 565 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 566 }; 567 } 568 } else { 569 warn!("Failed to lock original termios"); 570 } 571 572 std::process::exit(1); 573 } 574 } 575 _ => (), 576 } 577 } 578 } 579 580 fn setup_signal_handler(&mut self) -> Result<()> { 581 let signals = Signals::new(Self::HANDLED_SIGNALS); 582 match signals { 583 Ok(signals) => { 584 self.signals = Some(signals.handle()); 585 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 586 let original_termios_opt = Arc::clone(&self.original_termios_opt); 587 588 let signal_handler_seccomp_filter = get_seccomp_filter( 589 &self.seccomp_action, 590 Thread::SignalHandler, 591 self.hypervisor.hypervisor_type(), 592 ) 593 .map_err(Error::CreateSeccompFilter)?; 594 self.threads.push( 595 thread::Builder::new() 596 .name("vmm_signal_handler".to_string()) 597 .spawn(move || { 598 if !signal_handler_seccomp_filter.is_empty() { 599 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 600 .map_err(Error::ApplySeccompFilter) 601 { 602 error!("Error applying seccomp filter: {:?}", e); 603 exit_evt.write(1).ok(); 604 return; 605 } 606 } 607 std::panic::catch_unwind(AssertUnwindSafe(|| { 608 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 609 })) 610 .map_err(|_| { 611 error!("vmm signal_handler thread panicked"); 612 exit_evt.write(1).ok() 613 }) 614 .ok(); 615 }) 616 .map_err(Error::SignalHandlerSpawn)?, 617 ); 618 } 619 Err(e) => error!("Signal not found {}", e), 620 } 621 Ok(()) 622 } 623 624 fn new( 625 vmm_version: VmmVersionInfo, 626 api_evt: EventFd, 627 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 628 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 629 seccomp_action: SeccompAction, 630 hypervisor: Arc<dyn hypervisor::Hypervisor>, 631 exit_evt: EventFd, 632 ) -> Result<Self> { 633 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 634 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 635 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 636 637 epoll 638 .add_event(&exit_evt, EpollDispatch::Exit) 639 .map_err(Error::Epoll)?; 640 641 epoll 642 .add_event(&reset_evt, EpollDispatch::Reset) 643 .map_err(Error::Epoll)?; 644 645 epoll 646 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 647 .map_err(Error::Epoll)?; 648 649 epoll 650 .add_event(&api_evt, EpollDispatch::Api) 651 .map_err(Error::Epoll)?; 652 653 #[cfg(feature = "guest_debug")] 654 epoll 655 .add_event(&debug_evt, EpollDispatch::Debug) 656 .map_err(Error::Epoll)?; 657 658 Ok(Vmm { 659 epoll, 660 exit_evt, 661 reset_evt, 662 api_evt, 663 #[cfg(feature = "guest_debug")] 664 debug_evt, 665 #[cfg(feature = "guest_debug")] 666 vm_debug_evt, 667 version: vmm_version, 668 vm: None, 669 vm_config: None, 670 seccomp_action, 671 hypervisor, 672 activate_evt, 673 signals: None, 674 threads: vec![], 675 original_termios_opt: Arc::new(Mutex::new(None)), 676 }) 677 } 678 679 fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> { 680 // We only store the passed VM config. 681 // The VM will be created when being asked to boot it. 682 if self.vm_config.is_none() { 683 self.vm_config = Some(config); 684 Ok(()) 685 } else { 686 Err(VmError::VmAlreadyCreated) 687 } 688 } 689 690 fn vm_boot(&mut self) -> result::Result<(), VmError> { 691 tracer::start(); 692 let r = { 693 trace_scoped!("vm_boot"); 694 // If we don't have a config, we can not boot a VM. 695 if self.vm_config.is_none() { 696 return Err(VmError::VmMissingConfig); 697 }; 698 699 // Create a new VM if we don't have one yet. 700 if self.vm.is_none() { 701 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 702 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 703 #[cfg(feature = "guest_debug")] 704 let vm_debug_evt = self 705 .vm_debug_evt 706 .try_clone() 707 .map_err(VmError::EventFdClone)?; 708 let activate_evt = self 709 .activate_evt 710 .try_clone() 711 .map_err(VmError::EventFdClone)?; 712 713 if let Some(ref vm_config) = self.vm_config { 714 let vm = Vm::new( 715 Arc::clone(vm_config), 716 exit_evt, 717 reset_evt, 718 #[cfg(feature = "guest_debug")] 719 vm_debug_evt, 720 &self.seccomp_action, 721 self.hypervisor.clone(), 722 activate_evt, 723 None, 724 None, 725 None, 726 Arc::clone(&self.original_termios_opt), 727 None, 728 None, 729 None, 730 )?; 731 732 self.vm = Some(vm); 733 } 734 } 735 736 // Now we can boot the VM. 737 if let Some(ref mut vm) = self.vm { 738 vm.boot() 739 } else { 740 Err(VmError::VmNotCreated) 741 } 742 }; 743 tracer::end(); 744 r 745 } 746 747 fn vm_pause(&mut self) -> result::Result<(), VmError> { 748 if let Some(ref mut vm) = self.vm { 749 vm.pause().map_err(VmError::Pause) 750 } else { 751 Err(VmError::VmNotRunning) 752 } 753 } 754 755 fn vm_resume(&mut self) -> result::Result<(), VmError> { 756 if let Some(ref mut vm) = self.vm { 757 vm.resume().map_err(VmError::Resume) 758 } else { 759 Err(VmError::VmNotRunning) 760 } 761 } 762 763 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 764 if let Some(ref mut vm) = self.vm { 765 vm.snapshot() 766 .map_err(VmError::Snapshot) 767 .and_then(|snapshot| { 768 vm.send(&snapshot, destination_url) 769 .map_err(VmError::SnapshotSend) 770 }) 771 } else { 772 Err(VmError::VmNotRunning) 773 } 774 } 775 776 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 777 if self.vm.is_some() || self.vm_config.is_some() { 778 return Err(VmError::VmAlreadyCreated); 779 } 780 781 let source_url = restore_cfg.source_url.as_path().to_str(); 782 if source_url.is_none() { 783 return Err(VmError::InvalidRestoreSourceUrl); 784 } 785 // Safe to unwrap as we checked it was Some(&str). 786 let source_url = source_url.unwrap(); 787 788 let vm_config = Arc::new(Mutex::new( 789 recv_vm_config(source_url).map_err(VmError::Restore)?, 790 )); 791 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 792 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 793 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 794 795 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 796 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 797 .map_err(VmError::Restore)?; 798 799 self.vm_config = Some(Arc::clone(&vm_config)); 800 801 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 802 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 803 #[cfg(feature = "guest_debug")] 804 let debug_evt = self 805 .vm_debug_evt 806 .try_clone() 807 .map_err(VmError::EventFdClone)?; 808 let activate_evt = self 809 .activate_evt 810 .try_clone() 811 .map_err(VmError::EventFdClone)?; 812 813 let vm = Vm::new( 814 vm_config, 815 exit_evt, 816 reset_evt, 817 #[cfg(feature = "guest_debug")] 818 debug_evt, 819 &self.seccomp_action, 820 self.hypervisor.clone(), 821 activate_evt, 822 None, 823 None, 824 None, 825 Arc::clone(&self.original_termios_opt), 826 Some(snapshot), 827 Some(source_url), 828 Some(restore_cfg.prefault), 829 )?; 830 self.vm = Some(vm); 831 832 // Now we can restore the rest of the VM. 833 if let Some(ref mut vm) = self.vm { 834 vm.restore() 835 } else { 836 Err(VmError::VmNotCreated) 837 } 838 } 839 840 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 841 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 842 if let Some(ref mut vm) = self.vm { 843 vm.coredump(destination_url).map_err(VmError::Coredump) 844 } else { 845 Err(VmError::VmNotRunning) 846 } 847 } 848 849 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 850 if let Some(ref mut vm) = self.vm.take() { 851 vm.shutdown() 852 } else { 853 Err(VmError::VmNotRunning) 854 } 855 } 856 857 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 858 // First we stop the current VM 859 let (config, serial_pty, console_pty, console_resize_pipe) = 860 if let Some(mut vm) = self.vm.take() { 861 let config = vm.get_config(); 862 let serial_pty = vm.serial_pty(); 863 let console_pty = vm.console_pty(); 864 let console_resize_pipe = vm 865 .console_resize_pipe() 866 .as_ref() 867 .map(|pipe| pipe.try_clone().unwrap()); 868 vm.shutdown()?; 869 (config, serial_pty, console_pty, console_resize_pipe) 870 } else { 871 return Err(VmError::VmNotCreated); 872 }; 873 874 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 875 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 876 #[cfg(feature = "guest_debug")] 877 let debug_evt = self 878 .vm_debug_evt 879 .try_clone() 880 .map_err(VmError::EventFdClone)?; 881 let activate_evt = self 882 .activate_evt 883 .try_clone() 884 .map_err(VmError::EventFdClone)?; 885 886 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 887 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 888 // during the boot process. 889 if self.reset_evt.read().is_ok() { 890 warn!("Spurious second reset event received. Ignoring."); 891 } 892 893 // Then we create the new VM 894 let mut vm = Vm::new( 895 config, 896 exit_evt, 897 reset_evt, 898 #[cfg(feature = "guest_debug")] 899 debug_evt, 900 &self.seccomp_action, 901 self.hypervisor.clone(), 902 activate_evt, 903 serial_pty, 904 console_pty, 905 console_resize_pipe, 906 Arc::clone(&self.original_termios_opt), 907 None, 908 None, 909 None, 910 )?; 911 912 // And we boot it 913 vm.boot()?; 914 915 self.vm = Some(vm); 916 917 Ok(()) 918 } 919 920 fn vm_info(&self) -> result::Result<VmInfo, VmError> { 921 match &self.vm_config { 922 Some(config) => { 923 let state = match &self.vm { 924 Some(vm) => vm.get_state()?, 925 None => VmState::Created, 926 }; 927 928 let config = Arc::clone(config); 929 930 let mut memory_actual_size = config.lock().unwrap().memory.total_size(); 931 if let Some(vm) = &self.vm { 932 memory_actual_size -= vm.balloon_size(); 933 } 934 935 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree()); 936 937 Ok(VmInfo { 938 config, 939 state, 940 memory_actual_size, 941 device_tree, 942 }) 943 } 944 None => Err(VmError::VmNotCreated), 945 } 946 } 947 948 fn vmm_ping(&self) -> VmmPingResponse { 949 let VmmVersionInfo { 950 build_version, 951 version, 952 } = self.version.clone(); 953 954 VmmPingResponse { 955 build_version, 956 version, 957 pid: std::process::id() as i64, 958 features: feature_list(), 959 } 960 } 961 962 fn vm_delete(&mut self) -> result::Result<(), VmError> { 963 if self.vm_config.is_none() { 964 return Ok(()); 965 } 966 967 // If a VM is booted, we first try to shut it down. 968 if self.vm.is_some() { 969 self.vm_shutdown()?; 970 } 971 972 self.vm_config = None; 973 974 event!("vm", "deleted"); 975 976 Ok(()) 977 } 978 979 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 980 self.vm_delete()?; 981 event!("vmm", "shutdown"); 982 Ok(()) 983 } 984 985 fn vm_resize( 986 &mut self, 987 desired_vcpus: Option<u8>, 988 desired_ram: Option<u64>, 989 desired_balloon: Option<u64>, 990 ) -> result::Result<(), VmError> { 991 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 992 993 if let Some(ref mut vm) = self.vm { 994 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 995 error!("Error when resizing VM: {:?}", e); 996 Err(e) 997 } else { 998 Ok(()) 999 } 1000 } else { 1001 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1002 if let Some(desired_vcpus) = desired_vcpus { 1003 config.cpus.boot_vcpus = desired_vcpus; 1004 } 1005 if let Some(desired_ram) = desired_ram { 1006 config.memory.size = desired_ram; 1007 } 1008 if let Some(desired_balloon) = desired_balloon { 1009 if let Some(balloon_config) = &mut config.balloon { 1010 balloon_config.size = desired_balloon; 1011 } 1012 } 1013 Ok(()) 1014 } 1015 } 1016 1017 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1018 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1019 1020 if let Some(ref mut vm) = self.vm { 1021 if let Err(e) = vm.resize_zone(id, desired_ram) { 1022 error!("Error when resizing VM: {:?}", e); 1023 Err(e) 1024 } else { 1025 Ok(()) 1026 } 1027 } else { 1028 // Update VmConfig by setting the new desired ram. 1029 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1030 1031 if let Some(zones) = &mut memory_config.zones { 1032 for zone in zones.iter_mut() { 1033 if zone.id == id { 1034 zone.size = desired_ram; 1035 return Ok(()); 1036 } 1037 } 1038 } 1039 1040 error!("Could not find the memory zone {} for the resize", id); 1041 Err(VmError::ResizeZone) 1042 } 1043 } 1044 1045 fn vm_add_device( 1046 &mut self, 1047 device_cfg: DeviceConfig, 1048 ) -> result::Result<Option<Vec<u8>>, VmError> { 1049 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1050 1051 { 1052 // Validate the configuration change in a cloned configuration 1053 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1054 add_to_config(&mut config.devices, device_cfg.clone()); 1055 config.validate().map_err(VmError::ConfigValidation)?; 1056 } 1057 1058 if let Some(ref mut vm) = self.vm { 1059 let info = vm.add_device(device_cfg).map_err(|e| { 1060 error!("Error when adding new device to the VM: {:?}", e); 1061 e 1062 })?; 1063 serde_json::to_vec(&info) 1064 .map(Some) 1065 .map_err(VmError::SerializeJson) 1066 } else { 1067 // Update VmConfig by adding the new device. 1068 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1069 add_to_config(&mut config.devices, device_cfg); 1070 Ok(None) 1071 } 1072 } 1073 1074 fn vm_add_user_device( 1075 &mut self, 1076 device_cfg: UserDeviceConfig, 1077 ) -> result::Result<Option<Vec<u8>>, VmError> { 1078 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1079 1080 { 1081 // Validate the configuration change in a cloned configuration 1082 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1083 add_to_config(&mut config.user_devices, device_cfg.clone()); 1084 config.validate().map_err(VmError::ConfigValidation)?; 1085 } 1086 1087 if let Some(ref mut vm) = self.vm { 1088 let info = vm.add_user_device(device_cfg).map_err(|e| { 1089 error!("Error when adding new user device to the VM: {:?}", e); 1090 e 1091 })?; 1092 serde_json::to_vec(&info) 1093 .map(Some) 1094 .map_err(VmError::SerializeJson) 1095 } else { 1096 // Update VmConfig by adding the new device. 1097 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1098 add_to_config(&mut config.user_devices, device_cfg); 1099 Ok(None) 1100 } 1101 } 1102 1103 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1104 if let Some(ref mut vm) = self.vm { 1105 if let Err(e) = vm.remove_device(id) { 1106 error!("Error when removing device from the VM: {:?}", e); 1107 Err(e) 1108 } else { 1109 Ok(()) 1110 } 1111 } else if let Some(ref config) = self.vm_config { 1112 let mut config = config.lock().unwrap(); 1113 if config.remove_device(&id) { 1114 Ok(()) 1115 } else { 1116 Err(VmError::NoDeviceToRemove(id)) 1117 } 1118 } else { 1119 Err(VmError::VmNotCreated) 1120 } 1121 } 1122 1123 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1124 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1125 1126 { 1127 // Validate the configuration change in a cloned configuration 1128 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1129 add_to_config(&mut config.disks, disk_cfg.clone()); 1130 config.validate().map_err(VmError::ConfigValidation)?; 1131 } 1132 1133 if let Some(ref mut vm) = self.vm { 1134 let info = vm.add_disk(disk_cfg).map_err(|e| { 1135 error!("Error when adding new disk to the VM: {:?}", e); 1136 e 1137 })?; 1138 serde_json::to_vec(&info) 1139 .map(Some) 1140 .map_err(VmError::SerializeJson) 1141 } else { 1142 // Update VmConfig by adding the new device. 1143 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1144 add_to_config(&mut config.disks, disk_cfg); 1145 Ok(None) 1146 } 1147 } 1148 1149 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1150 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1151 1152 { 1153 // Validate the configuration change in a cloned configuration 1154 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1155 add_to_config(&mut config.fs, fs_cfg.clone()); 1156 config.validate().map_err(VmError::ConfigValidation)?; 1157 } 1158 1159 if let Some(ref mut vm) = self.vm { 1160 let info = vm.add_fs(fs_cfg).map_err(|e| { 1161 error!("Error when adding new fs to the VM: {:?}", e); 1162 e 1163 })?; 1164 serde_json::to_vec(&info) 1165 .map(Some) 1166 .map_err(VmError::SerializeJson) 1167 } else { 1168 // Update VmConfig by adding the new device. 1169 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1170 add_to_config(&mut config.fs, fs_cfg); 1171 Ok(None) 1172 } 1173 } 1174 1175 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1176 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1177 1178 { 1179 // Validate the configuration change in a cloned configuration 1180 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1181 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1182 config.validate().map_err(VmError::ConfigValidation)?; 1183 } 1184 1185 if let Some(ref mut vm) = self.vm { 1186 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1187 error!("Error when adding new pmem device to the VM: {:?}", e); 1188 e 1189 })?; 1190 serde_json::to_vec(&info) 1191 .map(Some) 1192 .map_err(VmError::SerializeJson) 1193 } else { 1194 // Update VmConfig by adding the new device. 1195 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1196 add_to_config(&mut config.pmem, pmem_cfg); 1197 Ok(None) 1198 } 1199 } 1200 1201 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1202 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1203 1204 { 1205 // Validate the configuration change in a cloned configuration 1206 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1207 add_to_config(&mut config.net, net_cfg.clone()); 1208 config.validate().map_err(VmError::ConfigValidation)?; 1209 } 1210 1211 if let Some(ref mut vm) = self.vm { 1212 let info = vm.add_net(net_cfg).map_err(|e| { 1213 error!("Error when adding new network device to the VM: {:?}", e); 1214 e 1215 })?; 1216 serde_json::to_vec(&info) 1217 .map(Some) 1218 .map_err(VmError::SerializeJson) 1219 } else { 1220 // Update VmConfig by adding the new device. 1221 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1222 add_to_config(&mut config.net, net_cfg); 1223 Ok(None) 1224 } 1225 } 1226 1227 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1228 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1229 1230 { 1231 // Validate the configuration change in a cloned configuration 1232 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1233 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1234 config.validate().map_err(VmError::ConfigValidation)?; 1235 } 1236 1237 if let Some(ref mut vm) = self.vm { 1238 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1239 error!("Error when adding new vDPA device to the VM: {:?}", e); 1240 e 1241 })?; 1242 serde_json::to_vec(&info) 1243 .map(Some) 1244 .map_err(VmError::SerializeJson) 1245 } else { 1246 // Update VmConfig by adding the new device. 1247 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1248 add_to_config(&mut config.vdpa, vdpa_cfg); 1249 Ok(None) 1250 } 1251 } 1252 1253 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1254 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1255 1256 { 1257 // Validate the configuration change in a cloned configuration 1258 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1259 1260 if config.vsock.is_some() { 1261 return Err(VmError::TooManyVsockDevices); 1262 } 1263 1264 config.vsock = Some(vsock_cfg.clone()); 1265 config.validate().map_err(VmError::ConfigValidation)?; 1266 } 1267 1268 if let Some(ref mut vm) = self.vm { 1269 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1270 error!("Error when adding new vsock device to the VM: {:?}", e); 1271 e 1272 })?; 1273 serde_json::to_vec(&info) 1274 .map(Some) 1275 .map_err(VmError::SerializeJson) 1276 } else { 1277 // Update VmConfig by adding the new device. 1278 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1279 config.vsock = Some(vsock_cfg); 1280 Ok(None) 1281 } 1282 } 1283 1284 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1285 if let Some(ref mut vm) = self.vm { 1286 let info = vm.counters().map_err(|e| { 1287 error!("Error when getting counters from the VM: {:?}", e); 1288 e 1289 })?; 1290 serde_json::to_vec(&info) 1291 .map(Some) 1292 .map_err(VmError::SerializeJson) 1293 } else { 1294 Err(VmError::VmNotRunning) 1295 } 1296 } 1297 1298 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1299 if let Some(ref mut vm) = self.vm { 1300 vm.power_button() 1301 } else { 1302 Err(VmError::VmNotRunning) 1303 } 1304 } 1305 1306 fn vm_receive_config<T>( 1307 &mut self, 1308 req: &Request, 1309 socket: &mut T, 1310 existing_memory_files: Option<HashMap<u32, File>>, 1311 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 1312 where 1313 T: Read + Write, 1314 { 1315 // Read in config data along with memory manager data 1316 let mut data: Vec<u8> = Vec::new(); 1317 data.resize_with(req.length() as usize, Default::default); 1318 socket 1319 .read_exact(&mut data) 1320 .map_err(MigratableError::MigrateSocket)?; 1321 1322 let vm_migration_config: VmMigrationConfig = 1323 serde_json::from_slice(&data).map_err(|e| { 1324 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 1325 })?; 1326 1327 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1328 self.vm_check_cpuid_compatibility( 1329 &vm_migration_config.vm_config, 1330 &vm_migration_config.common_cpuid, 1331 )?; 1332 1333 let config = vm_migration_config.vm_config.clone(); 1334 self.vm_config = Some(vm_migration_config.vm_config); 1335 1336 let vm = Vm::create_hypervisor_vm( 1337 &self.hypervisor, 1338 #[cfg(feature = "tdx")] 1339 false, 1340 #[cfg(feature = "sev_snp")] 1341 false, 1342 ) 1343 .map_err(|e| { 1344 MigratableError::MigrateReceive(anyhow!( 1345 "Error creating hypervisor VM from snapshot: {:?}", 1346 e 1347 )) 1348 })?; 1349 1350 let phys_bits = 1351 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 1352 1353 let memory_manager = MemoryManager::new( 1354 vm, 1355 &config.lock().unwrap().memory.clone(), 1356 None, 1357 phys_bits, 1358 #[cfg(feature = "tdx")] 1359 false, 1360 Some(&vm_migration_config.memory_manager_data), 1361 existing_memory_files, 1362 #[cfg(target_arch = "x86_64")] 1363 None, 1364 ) 1365 .map_err(|e| { 1366 MigratableError::MigrateReceive(anyhow!( 1367 "Error creating MemoryManager from snapshot: {:?}", 1368 e 1369 )) 1370 })?; 1371 1372 Response::ok().write_to(socket)?; 1373 1374 Ok(memory_manager) 1375 } 1376 1377 fn vm_receive_state<T>( 1378 &mut self, 1379 req: &Request, 1380 socket: &mut T, 1381 mm: Arc<Mutex<MemoryManager>>, 1382 ) -> std::result::Result<(), MigratableError> 1383 where 1384 T: Read + Write, 1385 { 1386 // Read in state data 1387 let mut data: Vec<u8> = Vec::new(); 1388 data.resize_with(req.length() as usize, Default::default); 1389 socket 1390 .read_exact(&mut data) 1391 .map_err(MigratableError::MigrateSocket)?; 1392 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 1393 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 1394 })?; 1395 1396 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 1397 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 1398 })?; 1399 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 1400 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 1401 })?; 1402 #[cfg(feature = "guest_debug")] 1403 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 1404 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 1405 })?; 1406 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 1407 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 1408 })?; 1409 1410 let timestamp = Instant::now(); 1411 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 1412 let mut vm = Vm::new_from_memory_manager( 1413 self.vm_config.clone().unwrap(), 1414 mm, 1415 hypervisor_vm, 1416 exit_evt, 1417 reset_evt, 1418 #[cfg(feature = "guest_debug")] 1419 debug_evt, 1420 &self.seccomp_action, 1421 self.hypervisor.clone(), 1422 activate_evt, 1423 timestamp, 1424 None, 1425 None, 1426 None, 1427 Arc::clone(&self.original_termios_opt), 1428 Some(snapshot), 1429 ) 1430 .map_err(|e| { 1431 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 1432 })?; 1433 1434 // Create VM 1435 vm.restore().map_err(|e| { 1436 Response::error().write_to(socket).ok(); 1437 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 1438 })?; 1439 self.vm = Some(vm); 1440 1441 Response::ok().write_to(socket)?; 1442 1443 Ok(()) 1444 } 1445 1446 fn vm_receive_memory<T>( 1447 &mut self, 1448 req: &Request, 1449 socket: &mut T, 1450 memory_manager: &mut MemoryManager, 1451 ) -> std::result::Result<(), MigratableError> 1452 where 1453 T: Read + ReadVolatile + Write, 1454 { 1455 // Read table 1456 let table = MemoryRangeTable::read_from(socket, req.length())?; 1457 1458 // And then read the memory itself 1459 memory_manager 1460 .receive_memory_regions(&table, socket) 1461 .map_err(|e| { 1462 Response::error().write_to(socket).ok(); 1463 e 1464 })?; 1465 Response::ok().write_to(socket)?; 1466 Ok(()) 1467 } 1468 1469 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 1470 url.strip_prefix("unix:") 1471 .ok_or_else(|| { 1472 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 1473 }) 1474 .map(|s| s.into()) 1475 } 1476 1477 fn vm_receive_migration( 1478 &mut self, 1479 receive_data_migration: VmReceiveMigrationData, 1480 ) -> result::Result<(), MigratableError> { 1481 info!( 1482 "Receiving migration: receiver_url = {}", 1483 receive_data_migration.receiver_url 1484 ); 1485 1486 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1487 let listener = UnixListener::bind(&path).map_err(|e| { 1488 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1489 })?; 1490 let (mut socket, _addr) = listener.accept().map_err(|e| { 1491 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1492 })?; 1493 std::fs::remove_file(&path).map_err(|e| { 1494 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1495 })?; 1496 1497 let mut started = false; 1498 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1499 let mut existing_memory_files = None; 1500 loop { 1501 let req = Request::read_from(&mut socket)?; 1502 match req.command() { 1503 Command::Invalid => info!("Invalid Command Received"), 1504 Command::Start => { 1505 info!("Start Command Received"); 1506 started = true; 1507 1508 Response::ok().write_to(&mut socket)?; 1509 } 1510 Command::Config => { 1511 info!("Config Command Received"); 1512 1513 if !started { 1514 warn!("Migration not started yet"); 1515 Response::error().write_to(&mut socket)?; 1516 continue; 1517 } 1518 memory_manager = Some(self.vm_receive_config( 1519 &req, 1520 &mut socket, 1521 existing_memory_files.take(), 1522 )?); 1523 } 1524 Command::State => { 1525 info!("State Command Received"); 1526 1527 if !started { 1528 warn!("Migration not started yet"); 1529 Response::error().write_to(&mut socket)?; 1530 continue; 1531 } 1532 if let Some(mm) = memory_manager.take() { 1533 self.vm_receive_state(&req, &mut socket, mm)?; 1534 } else { 1535 warn!("Configuration not sent yet"); 1536 Response::error().write_to(&mut socket)?; 1537 } 1538 } 1539 Command::Memory => { 1540 info!("Memory Command Received"); 1541 1542 if !started { 1543 warn!("Migration not started yet"); 1544 Response::error().write_to(&mut socket)?; 1545 continue; 1546 } 1547 if let Some(mm) = memory_manager.as_ref() { 1548 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 1549 } else { 1550 warn!("Configuration not sent yet"); 1551 Response::error().write_to(&mut socket)?; 1552 } 1553 } 1554 Command::MemoryFd => { 1555 info!("MemoryFd Command Received"); 1556 1557 if !started { 1558 warn!("Migration not started yet"); 1559 Response::error().write_to(&mut socket)?; 1560 continue; 1561 } 1562 1563 let mut buf = [0u8; 4]; 1564 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 1565 MigratableError::MigrateReceive(anyhow!( 1566 "Error receiving slot from socket: {}", 1567 e 1568 )) 1569 })?; 1570 1571 if existing_memory_files.is_none() { 1572 existing_memory_files = Some(HashMap::default()) 1573 } 1574 1575 if let Some(ref mut existing_memory_files) = existing_memory_files { 1576 let slot = u32::from_le_bytes(buf); 1577 existing_memory_files.insert(slot, file.unwrap()); 1578 } 1579 1580 Response::ok().write_to(&mut socket)?; 1581 } 1582 Command::Complete => { 1583 info!("Complete Command Received"); 1584 if let Some(ref mut vm) = self.vm.as_mut() { 1585 vm.resume()?; 1586 Response::ok().write_to(&mut socket)?; 1587 } else { 1588 warn!("VM not created yet"); 1589 Response::error().write_to(&mut socket)?; 1590 } 1591 break; 1592 } 1593 Command::Abandon => { 1594 info!("Abandon Command Received"); 1595 self.vm = None; 1596 self.vm_config = None; 1597 Response::ok().write_to(&mut socket).ok(); 1598 break; 1599 } 1600 } 1601 } 1602 1603 Ok(()) 1604 } 1605 1606 // Returns true if there were dirty pages to send 1607 fn vm_maybe_send_dirty_pages<T>( 1608 vm: &mut Vm, 1609 socket: &mut T, 1610 ) -> result::Result<bool, MigratableError> 1611 where 1612 T: Read + Write + WriteVolatile, 1613 { 1614 // Send (dirty) memory table 1615 let table = vm.dirty_log()?; 1616 1617 // But if there are no regions go straight to pause 1618 if table.regions().is_empty() { 1619 return Ok(false); 1620 } 1621 1622 Request::memory(table.length()).write_to(socket).unwrap(); 1623 table.write_to(socket)?; 1624 // And then the memory itself 1625 vm.send_memory_regions(&table, socket)?; 1626 let res = Response::read_from(socket)?; 1627 if res.status() != Status::Ok { 1628 warn!("Error during dirty memory migration"); 1629 Request::abandon().write_to(socket)?; 1630 Response::read_from(socket).ok(); 1631 return Err(MigratableError::MigrateSend(anyhow!( 1632 "Error during dirty memory migration" 1633 ))); 1634 } 1635 1636 Ok(true) 1637 } 1638 1639 fn send_migration( 1640 vm: &mut Vm, 1641 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 1642 dyn hypervisor::Hypervisor, 1643 >, 1644 send_data_migration: VmSendMigrationData, 1645 ) -> result::Result<(), MigratableError> { 1646 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 1647 let mut socket = UnixStream::connect(path).map_err(|e| { 1648 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 1649 })?; 1650 1651 // Start the migration 1652 Request::start().write_to(&mut socket)?; 1653 let res = Response::read_from(&mut socket)?; 1654 if res.status() != Status::Ok { 1655 warn!("Error starting migration"); 1656 Request::abandon().write_to(&mut socket)?; 1657 Response::read_from(&mut socket).ok(); 1658 return Err(MigratableError::MigrateSend(anyhow!( 1659 "Error starting migration" 1660 ))); 1661 } 1662 1663 // Send config 1664 let vm_config = vm.get_config(); 1665 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1666 let common_cpuid = { 1667 #[cfg(feature = "tdx")] 1668 let tdx = vm_config.lock().unwrap().is_tdx_enabled(); 1669 let amx = vm_config.lock().unwrap().cpus.features.amx; 1670 let phys_bits = 1671 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 1672 arch::generate_common_cpuid( 1673 &hypervisor, 1674 &arch::CpuidConfig { 1675 sgx_epc_sections: None, 1676 phys_bits, 1677 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 1678 #[cfg(feature = "tdx")] 1679 tdx, 1680 amx, 1681 }, 1682 ) 1683 .map_err(|e| { 1684 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid': {:?}", e)) 1685 })? 1686 }; 1687 1688 if send_data_migration.local { 1689 vm.send_memory_fds(&mut socket)?; 1690 } 1691 1692 let vm_migration_config = VmMigrationConfig { 1693 vm_config, 1694 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1695 common_cpuid, 1696 memory_manager_data: vm.memory_manager_data(), 1697 }; 1698 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1699 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1700 socket 1701 .write_all(&config_data) 1702 .map_err(MigratableError::MigrateSocket)?; 1703 let res = Response::read_from(&mut socket)?; 1704 if res.status() != Status::Ok { 1705 warn!("Error during config migration"); 1706 Request::abandon().write_to(&mut socket)?; 1707 Response::read_from(&mut socket).ok(); 1708 return Err(MigratableError::MigrateSend(anyhow!( 1709 "Error during config migration" 1710 ))); 1711 } 1712 1713 // Let every Migratable object know about the migration being started. 1714 vm.start_migration()?; 1715 1716 if send_data_migration.local { 1717 // Now pause VM 1718 vm.pause()?; 1719 } else { 1720 // Start logging dirty pages 1721 vm.start_dirty_log()?; 1722 1723 // Send memory table 1724 let table = vm.memory_range_table()?; 1725 Request::memory(table.length()) 1726 .write_to(&mut socket) 1727 .unwrap(); 1728 table.write_to(&mut socket)?; 1729 // And then the memory itself 1730 vm.send_memory_regions(&table, &mut socket)?; 1731 let res = Response::read_from(&mut socket)?; 1732 if res.status() != Status::Ok { 1733 warn!("Error during memory migration"); 1734 Request::abandon().write_to(&mut socket)?; 1735 Response::read_from(&mut socket).ok(); 1736 return Err(MigratableError::MigrateSend(anyhow!( 1737 "Error during memory migration" 1738 ))); 1739 } 1740 1741 // Try at most 5 passes of dirty memory sending 1742 const MAX_DIRTY_MIGRATIONS: usize = 5; 1743 for i in 0..MAX_DIRTY_MIGRATIONS { 1744 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1745 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1746 break; 1747 } 1748 } 1749 1750 // Now pause VM 1751 vm.pause()?; 1752 1753 // Send last batch of dirty pages 1754 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1755 1756 // Stop logging dirty pages 1757 vm.stop_dirty_log()?; 1758 } 1759 // Capture snapshot and send it 1760 let vm_snapshot = vm.snapshot()?; 1761 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1762 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1763 socket 1764 .write_all(&snapshot_data) 1765 .map_err(MigratableError::MigrateSocket)?; 1766 let res = Response::read_from(&mut socket)?; 1767 if res.status() != Status::Ok { 1768 warn!("Error during state migration"); 1769 Request::abandon().write_to(&mut socket)?; 1770 Response::read_from(&mut socket).ok(); 1771 return Err(MigratableError::MigrateSend(anyhow!( 1772 "Error during state migration" 1773 ))); 1774 } 1775 1776 // Complete the migration 1777 Request::complete().write_to(&mut socket)?; 1778 let res = Response::read_from(&mut socket)?; 1779 if res.status() != Status::Ok { 1780 warn!("Error completing migration"); 1781 Request::abandon().write_to(&mut socket)?; 1782 Response::read_from(&mut socket).ok(); 1783 return Err(MigratableError::MigrateSend(anyhow!( 1784 "Error completing migration" 1785 ))); 1786 } 1787 info!("Migration complete"); 1788 1789 // Let every Migratable object know about the migration being complete 1790 vm.complete_migration() 1791 } 1792 1793 fn vm_send_migration( 1794 &mut self, 1795 send_data_migration: VmSendMigrationData, 1796 ) -> result::Result<(), MigratableError> { 1797 info!( 1798 "Sending migration: destination_url = {}, local = {}", 1799 send_data_migration.destination_url, send_data_migration.local 1800 ); 1801 1802 if !self 1803 .vm_config 1804 .as_ref() 1805 .unwrap() 1806 .lock() 1807 .unwrap() 1808 .backed_by_shared_memory() 1809 && send_data_migration.local 1810 { 1811 return Err(MigratableError::MigrateSend(anyhow!( 1812 "Local migration requires shared memory or hugepages enabled" 1813 ))); 1814 } 1815 1816 if let Some(vm) = self.vm.as_mut() { 1817 Self::send_migration( 1818 vm, 1819 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1820 self.hypervisor.clone(), 1821 send_data_migration, 1822 ) 1823 .map_err(|migration_err| { 1824 error!("Migration failed: {:?}", migration_err); 1825 1826 // Stop logging dirty pages 1827 if let Err(e) = vm.stop_dirty_log() { 1828 return e; 1829 } 1830 1831 if vm.get_state().unwrap() == VmState::Paused { 1832 if let Err(e) = vm.resume() { 1833 return e; 1834 } 1835 } 1836 1837 migration_err 1838 })?; 1839 1840 // Shutdown the VM after the migration succeeded 1841 self.exit_evt.write(1).map_err(|e| { 1842 MigratableError::MigrateSend(anyhow!( 1843 "Failed shutting down the VM after migration: {:?}", 1844 e 1845 )) 1846 }) 1847 } else { 1848 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 1849 } 1850 } 1851 1852 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1853 fn vm_check_cpuid_compatibility( 1854 &self, 1855 src_vm_config: &Arc<Mutex<VmConfig>>, 1856 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1857 ) -> result::Result<(), MigratableError> { 1858 // We check the `CPUID` compatibility of between the source vm and destination, which is 1859 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1860 let dest_cpuid = &{ 1861 let vm_config = &src_vm_config.lock().unwrap(); 1862 1863 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1864 arch::generate_common_cpuid( 1865 &self.hypervisor.clone(), 1866 &arch::CpuidConfig { 1867 sgx_epc_sections: None, 1868 phys_bits, 1869 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1870 #[cfg(feature = "tdx")] 1871 tdx: vm_config.is_tdx_enabled(), 1872 amx: vm_config.cpus.features.amx, 1873 }, 1874 ) 1875 .map_err(|e| { 1876 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1877 })? 1878 }; 1879 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1880 MigratableError::MigrateReceive(anyhow!( 1881 "Error checking cpu feature compatibility': {:?}", 1882 e 1883 )) 1884 }) 1885 } 1886 1887 fn control_loop( 1888 &mut self, 1889 api_receiver: Rc<Receiver<ApiRequest>>, 1890 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1891 ) -> Result<()> { 1892 const EPOLL_EVENTS_LEN: usize = 100; 1893 1894 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1895 let epoll_fd = self.epoll.as_raw_fd(); 1896 1897 'outer: loop { 1898 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1899 Ok(res) => res, 1900 Err(e) => { 1901 if e.kind() == io::ErrorKind::Interrupted { 1902 // It's well defined from the epoll_wait() syscall 1903 // documentation that the epoll loop can be interrupted 1904 // before any of the requested events occurred or the 1905 // timeout expired. In both those cases, epoll_wait() 1906 // returns an error of type EINTR, but this should not 1907 // be considered as a regular error. Instead it is more 1908 // appropriate to retry, by calling into epoll_wait(). 1909 continue; 1910 } 1911 return Err(Error::Epoll(e)); 1912 } 1913 }; 1914 1915 for event in events.iter().take(num_events) { 1916 let dispatch_event: EpollDispatch = event.data.into(); 1917 match dispatch_event { 1918 EpollDispatch::Unknown => { 1919 let event = event.data; 1920 warn!("Unknown VMM loop event: {}", event); 1921 } 1922 EpollDispatch::Exit => { 1923 info!("VM exit event"); 1924 // Consume the event. 1925 self.exit_evt.read().map_err(Error::EventFdRead)?; 1926 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1927 1928 break 'outer; 1929 } 1930 EpollDispatch::Reset => { 1931 info!("VM reset event"); 1932 // Consume the event. 1933 self.reset_evt.read().map_err(Error::EventFdRead)?; 1934 self.vm_reboot().map_err(Error::VmReboot)?; 1935 } 1936 EpollDispatch::ActivateVirtioDevices => { 1937 if let Some(ref vm) = self.vm { 1938 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1939 info!( 1940 "Trying to activate pending virtio devices: count = {}", 1941 count 1942 ); 1943 vm.activate_virtio_devices() 1944 .map_err(Error::ActivateVirtioDevices)?; 1945 } 1946 } 1947 EpollDispatch::Api => { 1948 // Consume the events. 1949 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1950 // Read from the API receiver channel 1951 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1952 1953 info!("API request event: {:?}", api_request); 1954 match api_request { 1955 ApiRequest::VmCreate(config, sender) => { 1956 let response = self 1957 .vm_create(config) 1958 .map_err(ApiError::VmCreate) 1959 .map(|_| ApiResponsePayload::Empty); 1960 1961 sender.send(response).map_err(Error::ApiResponseSend)?; 1962 } 1963 ApiRequest::VmDelete(sender) => { 1964 let response = self 1965 .vm_delete() 1966 .map_err(ApiError::VmDelete) 1967 .map(|_| ApiResponsePayload::Empty); 1968 1969 sender.send(response).map_err(Error::ApiResponseSend)?; 1970 } 1971 ApiRequest::VmBoot(sender) => { 1972 let response = self 1973 .vm_boot() 1974 .map_err(ApiError::VmBoot) 1975 .map(|_| ApiResponsePayload::Empty); 1976 1977 sender.send(response).map_err(Error::ApiResponseSend)?; 1978 } 1979 ApiRequest::VmShutdown(sender) => { 1980 let response = self 1981 .vm_shutdown() 1982 .map_err(ApiError::VmShutdown) 1983 .map(|_| ApiResponsePayload::Empty); 1984 1985 sender.send(response).map_err(Error::ApiResponseSend)?; 1986 } 1987 ApiRequest::VmReboot(sender) => { 1988 let response = self 1989 .vm_reboot() 1990 .map_err(ApiError::VmReboot) 1991 .map(|_| ApiResponsePayload::Empty); 1992 1993 sender.send(response).map_err(Error::ApiResponseSend)?; 1994 } 1995 ApiRequest::VmInfo(sender) => { 1996 let response = self 1997 .vm_info() 1998 .map_err(ApiError::VmInfo) 1999 .map(ApiResponsePayload::VmInfo); 2000 2001 sender.send(response).map_err(Error::ApiResponseSend)?; 2002 } 2003 ApiRequest::VmmPing(sender) => { 2004 let response = ApiResponsePayload::VmmPing(self.vmm_ping()); 2005 2006 sender.send(Ok(response)).map_err(Error::ApiResponseSend)?; 2007 } 2008 ApiRequest::VmPause(sender) => { 2009 let response = self 2010 .vm_pause() 2011 .map_err(ApiError::VmPause) 2012 .map(|_| ApiResponsePayload::Empty); 2013 2014 sender.send(response).map_err(Error::ApiResponseSend)?; 2015 } 2016 ApiRequest::VmResume(sender) => { 2017 let response = self 2018 .vm_resume() 2019 .map_err(ApiError::VmResume) 2020 .map(|_| ApiResponsePayload::Empty); 2021 2022 sender.send(response).map_err(Error::ApiResponseSend)?; 2023 } 2024 ApiRequest::VmSnapshot(snapshot_data, sender) => { 2025 let response = self 2026 .vm_snapshot(&snapshot_data.destination_url) 2027 .map_err(ApiError::VmSnapshot) 2028 .map(|_| ApiResponsePayload::Empty); 2029 2030 sender.send(response).map_err(Error::ApiResponseSend)?; 2031 } 2032 ApiRequest::VmRestore(restore_data, sender) => { 2033 let response = self 2034 .vm_restore(restore_data.as_ref().clone()) 2035 .map_err(ApiError::VmRestore) 2036 .map(|_| ApiResponsePayload::Empty); 2037 2038 sender.send(response).map_err(Error::ApiResponseSend)?; 2039 } 2040 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 2041 ApiRequest::VmCoredump(coredump_data, sender) => { 2042 let response = self 2043 .vm_coredump(&coredump_data.destination_url) 2044 .map_err(ApiError::VmCoredump) 2045 .map(|_| ApiResponsePayload::Empty); 2046 2047 sender.send(response).map_err(Error::ApiResponseSend)?; 2048 } 2049 ApiRequest::VmmShutdown(sender) => { 2050 let response = self 2051 .vmm_shutdown() 2052 .map_err(ApiError::VmmShutdown) 2053 .map(|_| ApiResponsePayload::Empty); 2054 2055 sender.send(response).map_err(Error::ApiResponseSend)?; 2056 2057 break 'outer; 2058 } 2059 ApiRequest::VmResize(resize_data, sender) => { 2060 let response = self 2061 .vm_resize( 2062 resize_data.desired_vcpus, 2063 resize_data.desired_ram, 2064 resize_data.desired_balloon, 2065 ) 2066 .map_err(ApiError::VmResize) 2067 .map(|_| ApiResponsePayload::Empty); 2068 sender.send(response).map_err(Error::ApiResponseSend)?; 2069 } 2070 ApiRequest::VmResizeZone(resize_zone_data, sender) => { 2071 let response = self 2072 .vm_resize_zone( 2073 resize_zone_data.id.clone(), 2074 resize_zone_data.desired_ram, 2075 ) 2076 .map_err(ApiError::VmResizeZone) 2077 .map(|_| ApiResponsePayload::Empty); 2078 sender.send(response).map_err(Error::ApiResponseSend)?; 2079 } 2080 ApiRequest::VmAddDevice(add_device_data, sender) => { 2081 let response = self 2082 .vm_add_device(add_device_data.as_ref().clone()) 2083 .map_err(ApiError::VmAddDevice) 2084 .map(ApiResponsePayload::VmAction); 2085 sender.send(response).map_err(Error::ApiResponseSend)?; 2086 } 2087 ApiRequest::VmAddUserDevice(add_device_data, sender) => { 2088 let response = self 2089 .vm_add_user_device(add_device_data.as_ref().clone()) 2090 .map_err(ApiError::VmAddUserDevice) 2091 .map(ApiResponsePayload::VmAction); 2092 sender.send(response).map_err(Error::ApiResponseSend)?; 2093 } 2094 ApiRequest::VmRemoveDevice(remove_device_data, sender) => { 2095 let response = self 2096 .vm_remove_device(remove_device_data.id.clone()) 2097 .map_err(ApiError::VmRemoveDevice) 2098 .map(|_| ApiResponsePayload::Empty); 2099 sender.send(response).map_err(Error::ApiResponseSend)?; 2100 } 2101 ApiRequest::VmAddDisk(add_disk_data, sender) => { 2102 let response = self 2103 .vm_add_disk(add_disk_data.as_ref().clone()) 2104 .map_err(ApiError::VmAddDisk) 2105 .map(ApiResponsePayload::VmAction); 2106 sender.send(response).map_err(Error::ApiResponseSend)?; 2107 } 2108 ApiRequest::VmAddFs(add_fs_data, sender) => { 2109 let response = self 2110 .vm_add_fs(add_fs_data.as_ref().clone()) 2111 .map_err(ApiError::VmAddFs) 2112 .map(ApiResponsePayload::VmAction); 2113 sender.send(response).map_err(Error::ApiResponseSend)?; 2114 } 2115 ApiRequest::VmAddPmem(add_pmem_data, sender) => { 2116 let response = self 2117 .vm_add_pmem(add_pmem_data.as_ref().clone()) 2118 .map_err(ApiError::VmAddPmem) 2119 .map(ApiResponsePayload::VmAction); 2120 sender.send(response).map_err(Error::ApiResponseSend)?; 2121 } 2122 ApiRequest::VmAddNet(add_net_data, sender) => { 2123 let response = self 2124 .vm_add_net(add_net_data.as_ref().clone()) 2125 .map_err(ApiError::VmAddNet) 2126 .map(ApiResponsePayload::VmAction); 2127 sender.send(response).map_err(Error::ApiResponseSend)?; 2128 } 2129 ApiRequest::VmAddVdpa(add_vdpa_data, sender) => { 2130 let response = self 2131 .vm_add_vdpa(add_vdpa_data.as_ref().clone()) 2132 .map_err(ApiError::VmAddVdpa) 2133 .map(ApiResponsePayload::VmAction); 2134 sender.send(response).map_err(Error::ApiResponseSend)?; 2135 } 2136 ApiRequest::VmAddVsock(add_vsock_data, sender) => { 2137 let response = self 2138 .vm_add_vsock(add_vsock_data.as_ref().clone()) 2139 .map_err(ApiError::VmAddVsock) 2140 .map(ApiResponsePayload::VmAction); 2141 sender.send(response).map_err(Error::ApiResponseSend)?; 2142 } 2143 ApiRequest::VmCounters(sender) => { 2144 let response = self 2145 .vm_counters() 2146 .map_err(ApiError::VmInfo) 2147 .map(ApiResponsePayload::VmAction); 2148 sender.send(response).map_err(Error::ApiResponseSend)?; 2149 } 2150 ApiRequest::VmReceiveMigration(receive_migration_data, sender) => { 2151 let response = self 2152 .vm_receive_migration( 2153 receive_migration_data.as_ref().clone(), 2154 ) 2155 .map_err(ApiError::VmReceiveMigration) 2156 .map(|_| ApiResponsePayload::Empty); 2157 sender.send(response).map_err(Error::ApiResponseSend)?; 2158 } 2159 ApiRequest::VmSendMigration(send_migration_data, sender) => { 2160 let response = self 2161 .vm_send_migration(send_migration_data.as_ref().clone()) 2162 .map_err(ApiError::VmSendMigration) 2163 .map(|_| ApiResponsePayload::Empty); 2164 sender.send(response).map_err(Error::ApiResponseSend)?; 2165 } 2166 ApiRequest::VmPowerButton(sender) => { 2167 let response = self 2168 .vm_power_button() 2169 .map_err(ApiError::VmPowerButton) 2170 .map(|_| ApiResponsePayload::Empty); 2171 2172 sender.send(response).map_err(Error::ApiResponseSend)?; 2173 } 2174 } 2175 } 2176 } 2177 #[cfg(feature = "guest_debug")] 2178 EpollDispatch::Debug => { 2179 // Consume the events. 2180 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 2181 // Read from the API receiver channel 2182 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 2183 2184 let response = if let Some(ref mut vm) = self.vm { 2185 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 2186 } else { 2187 Err(VmError::VmNotRunning) 2188 } 2189 .map_err(gdb::Error::Vm); 2190 2191 gdb_request 2192 .sender 2193 .send(response) 2194 .map_err(Error::GdbResponseSend)?; 2195 } 2196 } 2197 #[cfg(not(feature = "guest_debug"))] 2198 EpollDispatch::Debug => {} 2199 } 2200 } 2201 } 2202 2203 // Trigger the termination of the signal_handler thread 2204 if let Some(signals) = self.signals.take() { 2205 signals.close(); 2206 } 2207 2208 // Wait for all the threads to finish 2209 for thread in self.threads.drain(..) { 2210 thread.join().map_err(Error::ThreadCleanup)? 2211 } 2212 2213 Ok(()) 2214 } 2215 } 2216 2217 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2218 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2219 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2220 2221 #[cfg(test)] 2222 mod unit_tests { 2223 use super::*; 2224 use config::{ 2225 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2226 RngConfig, VmConfig, 2227 }; 2228 2229 fn create_dummy_vmm() -> Vmm { 2230 Vmm::new( 2231 VmmVersionInfo::new("dummy", "dummy"), 2232 EventFd::new(EFD_NONBLOCK).unwrap(), 2233 #[cfg(feature = "guest_debug")] 2234 EventFd::new(EFD_NONBLOCK).unwrap(), 2235 #[cfg(feature = "guest_debug")] 2236 EventFd::new(EFD_NONBLOCK).unwrap(), 2237 SeccompAction::Allow, 2238 hypervisor::new().unwrap(), 2239 EventFd::new(EFD_NONBLOCK).unwrap(), 2240 ) 2241 .unwrap() 2242 } 2243 2244 fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> { 2245 Arc::new(Mutex::new(VmConfig { 2246 cpus: CpusConfig { 2247 boot_vcpus: 1, 2248 max_vcpus: 1, 2249 topology: None, 2250 kvm_hyperv: false, 2251 max_phys_bits: 46, 2252 affinity: None, 2253 features: config::CpuFeatures::default(), 2254 }, 2255 memory: MemoryConfig { 2256 size: 536_870_912, 2257 mergeable: false, 2258 hotplug_method: HotplugMethod::Acpi, 2259 hotplug_size: None, 2260 hotplugged_size: None, 2261 shared: true, 2262 hugepages: false, 2263 hugepage_size: None, 2264 prefault: false, 2265 zones: None, 2266 thp: true, 2267 }, 2268 payload: Some(PayloadConfig { 2269 kernel: Some(PathBuf::from("/path/to/kernel")), 2270 ..Default::default() 2271 }), 2272 disks: None, 2273 net: None, 2274 rng: RngConfig { 2275 src: PathBuf::from("/dev/urandom"), 2276 iommu: false, 2277 }, 2278 balloon: None, 2279 fs: None, 2280 pmem: None, 2281 serial: ConsoleConfig { 2282 file: None, 2283 mode: ConsoleOutputMode::Null, 2284 iommu: false, 2285 socket: None, 2286 }, 2287 console: ConsoleConfig { 2288 file: None, 2289 mode: ConsoleOutputMode::Tty, 2290 iommu: false, 2291 socket: None, 2292 }, 2293 devices: None, 2294 user_devices: None, 2295 vdpa: None, 2296 vsock: None, 2297 pvpanic: false, 2298 iommu: false, 2299 #[cfg(target_arch = "x86_64")] 2300 sgx_epc: None, 2301 numa: None, 2302 watchdog: false, 2303 #[cfg(feature = "guest_debug")] 2304 gdb: false, 2305 platform: None, 2306 tpm: None, 2307 preserved_fds: None, 2308 })) 2309 } 2310 2311 #[test] 2312 fn test_vmm_vm_create() { 2313 let mut vmm = create_dummy_vmm(); 2314 let config = create_dummy_vm_config(); 2315 2316 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2317 assert!(matches!( 2318 vmm.vm_create(config), 2319 Err(VmError::VmAlreadyCreated) 2320 )); 2321 } 2322 2323 #[test] 2324 fn test_vmm_vm_cold_add_device() { 2325 let mut vmm = create_dummy_vmm(); 2326 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2327 2328 assert!(matches!( 2329 vmm.vm_add_device(device_config.clone()), 2330 Err(VmError::VmNotCreated) 2331 )); 2332 2333 let _ = vmm.vm_create(create_dummy_vm_config()); 2334 assert!(vmm 2335 .vm_config 2336 .as_ref() 2337 .unwrap() 2338 .lock() 2339 .unwrap() 2340 .devices 2341 .is_none()); 2342 2343 let result = vmm.vm_add_device(device_config.clone()); 2344 assert!(result.is_ok()); 2345 assert!(result.unwrap().is_none()); 2346 assert_eq!( 2347 vmm.vm_config 2348 .as_ref() 2349 .unwrap() 2350 .lock() 2351 .unwrap() 2352 .devices 2353 .clone() 2354 .unwrap() 2355 .len(), 2356 1 2357 ); 2358 assert_eq!( 2359 vmm.vm_config 2360 .as_ref() 2361 .unwrap() 2362 .lock() 2363 .unwrap() 2364 .devices 2365 .clone() 2366 .unwrap()[0], 2367 device_config 2368 ); 2369 } 2370 2371 #[test] 2372 fn test_vmm_vm_cold_add_user_device() { 2373 let mut vmm = create_dummy_vmm(); 2374 let user_device_config = 2375 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2376 2377 assert!(matches!( 2378 vmm.vm_add_user_device(user_device_config.clone()), 2379 Err(VmError::VmNotCreated) 2380 )); 2381 2382 let _ = vmm.vm_create(create_dummy_vm_config()); 2383 assert!(vmm 2384 .vm_config 2385 .as_ref() 2386 .unwrap() 2387 .lock() 2388 .unwrap() 2389 .user_devices 2390 .is_none()); 2391 2392 let result = vmm.vm_add_user_device(user_device_config.clone()); 2393 assert!(result.is_ok()); 2394 assert!(result.unwrap().is_none()); 2395 assert_eq!( 2396 vmm.vm_config 2397 .as_ref() 2398 .unwrap() 2399 .lock() 2400 .unwrap() 2401 .user_devices 2402 .clone() 2403 .unwrap() 2404 .len(), 2405 1 2406 ); 2407 assert_eq!( 2408 vmm.vm_config 2409 .as_ref() 2410 .unwrap() 2411 .lock() 2412 .unwrap() 2413 .user_devices 2414 .clone() 2415 .unwrap()[0], 2416 user_device_config 2417 ); 2418 } 2419 2420 #[test] 2421 fn test_vmm_vm_cold_add_disk() { 2422 let mut vmm = create_dummy_vmm(); 2423 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2424 2425 assert!(matches!( 2426 vmm.vm_add_disk(disk_config.clone()), 2427 Err(VmError::VmNotCreated) 2428 )); 2429 2430 let _ = vmm.vm_create(create_dummy_vm_config()); 2431 assert!(vmm 2432 .vm_config 2433 .as_ref() 2434 .unwrap() 2435 .lock() 2436 .unwrap() 2437 .disks 2438 .is_none()); 2439 2440 let result = vmm.vm_add_disk(disk_config.clone()); 2441 assert!(result.is_ok()); 2442 assert!(result.unwrap().is_none()); 2443 assert_eq!( 2444 vmm.vm_config 2445 .as_ref() 2446 .unwrap() 2447 .lock() 2448 .unwrap() 2449 .disks 2450 .clone() 2451 .unwrap() 2452 .len(), 2453 1 2454 ); 2455 assert_eq!( 2456 vmm.vm_config 2457 .as_ref() 2458 .unwrap() 2459 .lock() 2460 .unwrap() 2461 .disks 2462 .clone() 2463 .unwrap()[0], 2464 disk_config 2465 ); 2466 } 2467 2468 #[test] 2469 fn test_vmm_vm_cold_add_fs() { 2470 let mut vmm = create_dummy_vmm(); 2471 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2472 2473 assert!(matches!( 2474 vmm.vm_add_fs(fs_config.clone()), 2475 Err(VmError::VmNotCreated) 2476 )); 2477 2478 let _ = vmm.vm_create(create_dummy_vm_config()); 2479 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2480 2481 let result = vmm.vm_add_fs(fs_config.clone()); 2482 assert!(result.is_ok()); 2483 assert!(result.unwrap().is_none()); 2484 assert_eq!( 2485 vmm.vm_config 2486 .as_ref() 2487 .unwrap() 2488 .lock() 2489 .unwrap() 2490 .fs 2491 .clone() 2492 .unwrap() 2493 .len(), 2494 1 2495 ); 2496 assert_eq!( 2497 vmm.vm_config 2498 .as_ref() 2499 .unwrap() 2500 .lock() 2501 .unwrap() 2502 .fs 2503 .clone() 2504 .unwrap()[0], 2505 fs_config 2506 ); 2507 } 2508 2509 #[test] 2510 fn test_vmm_vm_cold_add_pmem() { 2511 let mut vmm = create_dummy_vmm(); 2512 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2513 2514 assert!(matches!( 2515 vmm.vm_add_pmem(pmem_config.clone()), 2516 Err(VmError::VmNotCreated) 2517 )); 2518 2519 let _ = vmm.vm_create(create_dummy_vm_config()); 2520 assert!(vmm 2521 .vm_config 2522 .as_ref() 2523 .unwrap() 2524 .lock() 2525 .unwrap() 2526 .pmem 2527 .is_none()); 2528 2529 let result = vmm.vm_add_pmem(pmem_config.clone()); 2530 assert!(result.is_ok()); 2531 assert!(result.unwrap().is_none()); 2532 assert_eq!( 2533 vmm.vm_config 2534 .as_ref() 2535 .unwrap() 2536 .lock() 2537 .unwrap() 2538 .pmem 2539 .clone() 2540 .unwrap() 2541 .len(), 2542 1 2543 ); 2544 assert_eq!( 2545 vmm.vm_config 2546 .as_ref() 2547 .unwrap() 2548 .lock() 2549 .unwrap() 2550 .pmem 2551 .clone() 2552 .unwrap()[0], 2553 pmem_config 2554 ); 2555 } 2556 2557 #[test] 2558 fn test_vmm_vm_cold_add_net() { 2559 let mut vmm = create_dummy_vmm(); 2560 let net_config = NetConfig::parse( 2561 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2562 ) 2563 .unwrap(); 2564 2565 assert!(matches!( 2566 vmm.vm_add_net(net_config.clone()), 2567 Err(VmError::VmNotCreated) 2568 )); 2569 2570 let _ = vmm.vm_create(create_dummy_vm_config()); 2571 assert!(vmm 2572 .vm_config 2573 .as_ref() 2574 .unwrap() 2575 .lock() 2576 .unwrap() 2577 .net 2578 .is_none()); 2579 2580 let result = vmm.vm_add_net(net_config.clone()); 2581 assert!(result.is_ok()); 2582 assert!(result.unwrap().is_none()); 2583 assert_eq!( 2584 vmm.vm_config 2585 .as_ref() 2586 .unwrap() 2587 .lock() 2588 .unwrap() 2589 .net 2590 .clone() 2591 .unwrap() 2592 .len(), 2593 1 2594 ); 2595 assert_eq!( 2596 vmm.vm_config 2597 .as_ref() 2598 .unwrap() 2599 .lock() 2600 .unwrap() 2601 .net 2602 .clone() 2603 .unwrap()[0], 2604 net_config 2605 ); 2606 } 2607 2608 #[test] 2609 fn test_vmm_vm_cold_add_vdpa() { 2610 let mut vmm = create_dummy_vmm(); 2611 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2612 2613 assert!(matches!( 2614 vmm.vm_add_vdpa(vdpa_config.clone()), 2615 Err(VmError::VmNotCreated) 2616 )); 2617 2618 let _ = vmm.vm_create(create_dummy_vm_config()); 2619 assert!(vmm 2620 .vm_config 2621 .as_ref() 2622 .unwrap() 2623 .lock() 2624 .unwrap() 2625 .vdpa 2626 .is_none()); 2627 2628 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2629 assert!(result.is_ok()); 2630 assert!(result.unwrap().is_none()); 2631 assert_eq!( 2632 vmm.vm_config 2633 .as_ref() 2634 .unwrap() 2635 .lock() 2636 .unwrap() 2637 .vdpa 2638 .clone() 2639 .unwrap() 2640 .len(), 2641 1 2642 ); 2643 assert_eq!( 2644 vmm.vm_config 2645 .as_ref() 2646 .unwrap() 2647 .lock() 2648 .unwrap() 2649 .vdpa 2650 .clone() 2651 .unwrap()[0], 2652 vdpa_config 2653 ); 2654 } 2655 2656 #[test] 2657 fn test_vmm_vm_cold_add_vsock() { 2658 let mut vmm = create_dummy_vmm(); 2659 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=1,iommu=on").unwrap(); 2660 2661 assert!(matches!( 2662 vmm.vm_add_vsock(vsock_config.clone()), 2663 Err(VmError::VmNotCreated) 2664 )); 2665 2666 let _ = vmm.vm_create(create_dummy_vm_config()); 2667 assert!(vmm 2668 .vm_config 2669 .as_ref() 2670 .unwrap() 2671 .lock() 2672 .unwrap() 2673 .vsock 2674 .is_none()); 2675 2676 let result = vmm.vm_add_vsock(vsock_config.clone()); 2677 assert!(result.is_ok()); 2678 assert!(result.unwrap().is_none()); 2679 assert_eq!( 2680 vmm.vm_config 2681 .as_ref() 2682 .unwrap() 2683 .lock() 2684 .unwrap() 2685 .vsock 2686 .clone() 2687 .unwrap(), 2688 vsock_config 2689 ); 2690 } 2691 } 2692