1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use crate::api::{ 12 ApiError, ApiRequest, ApiResponse, ApiResponsePayload, VmInfo, VmReceiveMigrationData, 13 VmSendMigrationData, VmmPingResponse, 14 }; 15 use crate::config::{ 16 add_to_config, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, RestoreConfig, 17 UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, 18 }; 19 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 20 use crate::coredump::GuestDebuggable; 21 use crate::memory_manager::MemoryManager; 22 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 23 use crate::migration::get_vm_snapshot; 24 use crate::migration::{recv_vm_config, recv_vm_state}; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 use crate::vm::{Error as VmError, Vm, VmState}; 27 use anyhow::anyhow; 28 #[cfg(feature = "dbus_api")] 29 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 31 use memory_manager::MemoryManagerSnapshotData; 32 use pci::PciBdf; 33 use seccompiler::{apply_filter, SeccompAction}; 34 use serde::ser::{SerializeStruct, Serializer}; 35 use serde::{Deserialize, Serialize}; 36 use signal_hook::iterator::{Handle, Signals}; 37 use std::collections::HashMap; 38 use std::fs::File; 39 use std::io; 40 use std::io::{stdout, Read, Write}; 41 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 42 use std::os::unix::net::UnixListener; 43 use std::os::unix::net::UnixStream; 44 use std::panic::AssertUnwindSafe; 45 use std::path::PathBuf; 46 use std::rc::Rc; 47 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 48 use std::sync::{Arc, Mutex}; 49 use std::time::Instant; 50 use std::{result, thread}; 51 use thiserror::Error; 52 use tracer::trace_scoped; 53 use vm_memory::bitmap::AtomicBitmap; 54 use vm_migration::{protocol::*, Migratable}; 55 use vm_migration::{MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 56 use vmm_sys_util::eventfd::EventFd; 57 use vmm_sys_util::signal::unblock_signal; 58 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 59 60 mod acpi; 61 pub mod api; 62 mod clone3; 63 pub mod config; 64 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 65 mod coredump; 66 pub mod cpu; 67 pub mod device_manager; 68 pub mod device_tree; 69 #[cfg(feature = "guest_debug")] 70 mod gdb; 71 pub mod interrupt; 72 pub mod memory_manager; 73 pub mod migration; 74 mod pci_segment; 75 pub mod seccomp_filters; 76 mod serial_manager; 77 mod sigwinch_listener; 78 pub mod vm; 79 pub mod vm_config; 80 81 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 82 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 83 84 /// Errors associated with VMM management 85 #[derive(Debug, Error)] 86 pub enum Error { 87 /// API request receive error 88 #[error("Error receiving API request: {0}")] 89 ApiRequestRecv(#[source] RecvError), 90 91 /// API response send error 92 #[error("Error sending API request: {0}")] 93 ApiResponseSend(#[source] SendError<ApiResponse>), 94 95 /// Cannot bind to the UNIX domain socket path 96 #[error("Error binding to UNIX domain socket: {0}")] 97 Bind(#[source] io::Error), 98 99 /// Cannot clone EventFd. 100 #[error("Error cloning EventFd: {0}")] 101 EventFdClone(#[source] io::Error), 102 103 /// Cannot create EventFd. 104 #[error("Error creating EventFd: {0}")] 105 EventFdCreate(#[source] io::Error), 106 107 /// Cannot read from EventFd. 108 #[error("Error reading from EventFd: {0}")] 109 EventFdRead(#[source] io::Error), 110 111 /// Cannot create epoll context. 112 #[error("Error creating epoll context: {0}")] 113 Epoll(#[source] io::Error), 114 115 /// Cannot create HTTP thread 116 #[error("Error spawning HTTP thread: {0}")] 117 HttpThreadSpawn(#[source] io::Error), 118 119 /// Cannot create D-Bus thread 120 #[cfg(feature = "dbus_api")] 121 #[error("Error spawning D-Bus thread: {0}")] 122 DBusThreadSpawn(#[source] io::Error), 123 124 /// Cannot start D-Bus session 125 #[cfg(feature = "dbus_api")] 126 #[error("Error starting D-Bus session: {0}")] 127 CreateDBusSession(#[source] zbus::Error), 128 129 /// Cannot create `event-monitor` thread 130 #[error("Error spawning `event-monitor` thread: {0}")] 131 EventMonitorThreadSpawn(#[source] io::Error), 132 133 /// Cannot handle the VM STDIN stream 134 #[error("Error handling VM stdin: {0:?}")] 135 Stdin(VmError), 136 137 /// Cannot handle the VM pty stream 138 #[error("Error handling VM pty: {0:?}")] 139 Pty(VmError), 140 141 /// Cannot reboot the VM 142 #[error("Error rebooting VM: {0:?}")] 143 VmReboot(VmError), 144 145 /// Cannot create VMM thread 146 #[error("Error spawning VMM thread {0:?}")] 147 VmmThreadSpawn(#[source] io::Error), 148 149 /// Cannot shut the VMM down 150 #[error("Error shutting down VMM: {0:?}")] 151 VmmShutdown(VmError), 152 153 /// Cannot create seccomp filter 154 #[error("Error creating seccomp filter: {0}")] 155 CreateSeccompFilter(seccompiler::Error), 156 157 /// Cannot apply seccomp filter 158 #[error("Error applying seccomp filter: {0}")] 159 ApplySeccompFilter(seccompiler::Error), 160 161 /// Error activating virtio devices 162 #[error("Error activating virtio devices: {0:?}")] 163 ActivateVirtioDevices(VmError), 164 165 /// Error creating API server 166 #[error("Error creating API server {0:?}")] 167 CreateApiServer(micro_http::ServerError), 168 169 /// Error binding API server socket 170 #[error("Error creation API server's socket {0:?}")] 171 CreateApiServerSocket(#[source] io::Error), 172 173 #[cfg(feature = "guest_debug")] 174 #[error("Failed to start the GDB thread: {0}")] 175 GdbThreadSpawn(io::Error), 176 177 /// GDB request receive error 178 #[cfg(feature = "guest_debug")] 179 #[error("Error receiving GDB request: {0}")] 180 GdbRequestRecv(#[source] RecvError), 181 182 /// GDB response send error 183 #[cfg(feature = "guest_debug")] 184 #[error("Error sending GDB request: {0}")] 185 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 186 187 #[error("Cannot spawn a signal handler thread: {0}")] 188 SignalHandlerSpawn(#[source] io::Error), 189 190 #[error("Failed to join on threads: {0:?}")] 191 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 192 } 193 pub type Result<T> = result::Result<T, Error>; 194 195 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 196 #[repr(u64)] 197 pub enum EpollDispatch { 198 Exit = 0, 199 Reset = 1, 200 Api = 2, 201 ActivateVirtioDevices = 3, 202 Debug = 4, 203 Unknown, 204 } 205 206 impl From<u64> for EpollDispatch { 207 fn from(v: u64) -> Self { 208 use EpollDispatch::*; 209 match v { 210 0 => Exit, 211 1 => Reset, 212 2 => Api, 213 3 => ActivateVirtioDevices, 214 4 => Debug, 215 _ => Unknown, 216 } 217 } 218 } 219 220 pub struct EpollContext { 221 epoll_file: File, 222 } 223 224 impl EpollContext { 225 pub fn new() -> result::Result<EpollContext, io::Error> { 226 let epoll_fd = epoll::create(true)?; 227 // Use 'File' to enforce closing on 'epoll_fd' 228 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 229 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 230 231 Ok(EpollContext { epoll_file }) 232 } 233 234 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 235 where 236 T: AsRawFd, 237 { 238 let dispatch_index = token as u64; 239 epoll::ctl( 240 self.epoll_file.as_raw_fd(), 241 epoll::ControlOptions::EPOLL_CTL_ADD, 242 fd.as_raw_fd(), 243 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 244 )?; 245 246 Ok(()) 247 } 248 249 #[cfg(fuzzing)] 250 pub fn add_event_custom<T>( 251 &mut self, 252 fd: &T, 253 id: u64, 254 evts: epoll::Events, 255 ) -> result::Result<(), io::Error> 256 where 257 T: AsRawFd, 258 { 259 epoll::ctl( 260 self.epoll_file.as_raw_fd(), 261 epoll::ControlOptions::EPOLL_CTL_ADD, 262 fd.as_raw_fd(), 263 epoll::Event::new(evts, id), 264 )?; 265 266 Ok(()) 267 } 268 } 269 270 impl AsRawFd for EpollContext { 271 fn as_raw_fd(&self) -> RawFd { 272 self.epoll_file.as_raw_fd() 273 } 274 } 275 276 pub struct PciDeviceInfo { 277 pub id: String, 278 pub bdf: PciBdf, 279 } 280 281 impl Serialize for PciDeviceInfo { 282 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 283 where 284 S: Serializer, 285 { 286 let bdf_str = self.bdf.to_string(); 287 288 // Serialize the structure. 289 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 290 state.serialize_field("id", &self.id)?; 291 state.serialize_field("bdf", &bdf_str)?; 292 state.end() 293 } 294 } 295 296 pub fn feature_list() -> Vec<String> { 297 vec![ 298 #[cfg(feature = "dbus_api")] 299 "dbus_api".to_string(), 300 #[cfg(feature = "dhat-heap")] 301 "dhat-heap".to_string(), 302 #[cfg(feature = "guest_debug")] 303 "guest_debug".to_string(), 304 #[cfg(feature = "io_uring")] 305 "io_uring".to_string(), 306 #[cfg(feature = "kvm")] 307 "kvm".to_string(), 308 #[cfg(feature = "mshv")] 309 "mshv".to_string(), 310 #[cfg(feature = "sev_snp")] 311 "sev_snp".to_string(), 312 #[cfg(feature = "tdx")] 313 "tdx".to_string(), 314 #[cfg(feature = "tracing")] 315 "tracing".to_string(), 316 ] 317 } 318 319 pub fn start_event_monitor_thread( 320 mut monitor: event_monitor::Monitor, 321 seccomp_action: &SeccompAction, 322 hypervisor_type: hypervisor::HypervisorType, 323 exit_event: EventFd, 324 ) -> Result<thread::JoinHandle<Result<()>>> { 325 // Retrieve seccomp filter 326 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 327 .map_err(Error::CreateSeccompFilter)?; 328 329 thread::Builder::new() 330 .name("event-monitor".to_owned()) 331 .spawn(move || { 332 // Apply seccomp filter 333 if !seccomp_filter.is_empty() { 334 apply_filter(&seccomp_filter) 335 .map_err(Error::ApplySeccompFilter) 336 .map_err(|e| { 337 error!("Error applying seccomp filter: {:?}", e); 338 exit_event.write(1).ok(); 339 e 340 })?; 341 } 342 343 std::panic::catch_unwind(AssertUnwindSafe(move || { 344 while let Ok(event) = monitor.rx.recv() { 345 let event = Arc::new(event); 346 347 if let Some(ref mut file) = monitor.file { 348 file.write_all(event.as_bytes().as_ref()).ok(); 349 file.write_all(b"\n\n").ok(); 350 } 351 352 for tx in monitor.broadcast.iter() { 353 tx.send(event.clone()).ok(); 354 } 355 } 356 })) 357 .map_err(|_| { 358 error!("`event-monitor` thread panicked"); 359 exit_event.write(1).ok(); 360 }) 361 .ok(); 362 363 Ok(()) 364 }) 365 .map_err(Error::EventMonitorThreadSpawn) 366 } 367 368 #[allow(unused_variables)] 369 #[allow(clippy::too_many_arguments)] 370 pub fn start_vmm_thread( 371 vmm_version: VmmVersionInfo, 372 http_path: &Option<String>, 373 http_fd: Option<RawFd>, 374 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 375 api_event: EventFd, 376 api_sender: Sender<ApiRequest>, 377 api_receiver: Receiver<ApiRequest>, 378 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 379 #[cfg(feature = "guest_debug")] debug_event: EventFd, 380 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 381 exit_event: EventFd, 382 seccomp_action: &SeccompAction, 383 hypervisor: Arc<dyn hypervisor::Hypervisor>, 384 ) -> Result<VmmThreadHandle> { 385 #[cfg(feature = "guest_debug")] 386 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 387 #[cfg(feature = "guest_debug")] 388 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 389 #[cfg(feature = "guest_debug")] 390 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 391 #[cfg(feature = "guest_debug")] 392 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 393 394 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 395 let hypervisor_type = hypervisor.hypervisor_type(); 396 397 // Retrieve seccomp filter 398 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 399 .map_err(Error::CreateSeccompFilter)?; 400 401 let vmm_seccomp_action = seccomp_action.clone(); 402 let thread = { 403 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 404 thread::Builder::new() 405 .name("vmm".to_string()) 406 .spawn(move || { 407 // Apply seccomp filter for VMM thread. 408 if !vmm_seccomp_filter.is_empty() { 409 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 410 } 411 412 let mut vmm = Vmm::new( 413 vmm_version, 414 api_event, 415 #[cfg(feature = "guest_debug")] 416 debug_event, 417 #[cfg(feature = "guest_debug")] 418 vm_debug_event, 419 vmm_seccomp_action, 420 hypervisor, 421 exit_event, 422 )?; 423 424 vmm.setup_signal_handler()?; 425 426 vmm.control_loop( 427 Rc::new(api_receiver), 428 #[cfg(feature = "guest_debug")] 429 Rc::new(gdb_receiver), 430 ) 431 }) 432 .map_err(Error::VmmThreadSpawn)? 433 }; 434 435 // The VMM thread is started, we can start the dbus thread 436 // and start serving HTTP requests 437 #[cfg(feature = "dbus_api")] 438 let dbus_shutdown_chs = match dbus_options { 439 Some(opts) => { 440 let (_, chs) = api::start_dbus_thread( 441 opts, 442 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 443 api_sender.clone(), 444 seccomp_action, 445 exit_event.try_clone().map_err(Error::EventFdClone)?, 446 hypervisor_type, 447 )?; 448 Some(chs) 449 } 450 None => None, 451 }; 452 453 if let Some(http_path) = http_path { 454 api::start_http_path_thread( 455 http_path, 456 api_event_clone, 457 api_sender, 458 seccomp_action, 459 exit_event, 460 hypervisor_type, 461 )?; 462 } else if let Some(http_fd) = http_fd { 463 api::start_http_fd_thread( 464 http_fd, 465 api_event_clone, 466 api_sender, 467 seccomp_action, 468 exit_event, 469 hypervisor_type, 470 )?; 471 } 472 473 #[cfg(feature = "guest_debug")] 474 if let Some(debug_path) = debug_path { 475 let target = gdb::GdbStub::new( 476 gdb_sender, 477 gdb_debug_event, 478 gdb_vm_debug_event, 479 gdb_hw_breakpoints, 480 ); 481 thread::Builder::new() 482 .name("gdb".to_owned()) 483 .spawn(move || gdb::gdb_thread(target, &debug_path)) 484 .map_err(Error::GdbThreadSpawn)?; 485 } 486 487 Ok(VmmThreadHandle { 488 thread_handle: thread, 489 #[cfg(feature = "dbus_api")] 490 dbus_shutdown_chs, 491 }) 492 } 493 494 #[derive(Clone, Deserialize, Serialize)] 495 struct VmMigrationConfig { 496 vm_config: Arc<Mutex<VmConfig>>, 497 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 498 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 499 memory_manager_data: MemoryManagerSnapshotData, 500 } 501 502 #[derive(Debug, Clone)] 503 pub struct VmmVersionInfo { 504 pub build_version: String, 505 pub version: String, 506 } 507 508 impl VmmVersionInfo { 509 pub fn new(build_version: &str, version: &str) -> Self { 510 Self { 511 build_version: build_version.to_owned(), 512 version: version.to_owned(), 513 } 514 } 515 } 516 517 pub struct VmmThreadHandle { 518 pub thread_handle: thread::JoinHandle<Result<()>>, 519 #[cfg(feature = "dbus_api")] 520 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 521 } 522 523 pub struct Vmm { 524 epoll: EpollContext, 525 exit_evt: EventFd, 526 reset_evt: EventFd, 527 api_evt: EventFd, 528 #[cfg(feature = "guest_debug")] 529 debug_evt: EventFd, 530 #[cfg(feature = "guest_debug")] 531 vm_debug_evt: EventFd, 532 version: VmmVersionInfo, 533 vm: Option<Vm>, 534 vm_config: Option<Arc<Mutex<VmConfig>>>, 535 seccomp_action: SeccompAction, 536 hypervisor: Arc<dyn hypervisor::Hypervisor>, 537 activate_evt: EventFd, 538 signals: Option<Handle>, 539 threads: Vec<thread::JoinHandle<()>>, 540 original_termios_opt: Arc<Mutex<Option<termios>>>, 541 } 542 543 impl Vmm { 544 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 545 546 fn signal_handler( 547 mut signals: Signals, 548 original_termios_opt: Arc<Mutex<Option<termios>>>, 549 exit_evt: &EventFd, 550 ) { 551 for sig in &Self::HANDLED_SIGNALS { 552 unblock_signal(*sig).unwrap(); 553 } 554 555 for signal in signals.forever() { 556 match signal { 557 SIGTERM | SIGINT => { 558 if exit_evt.write(1).is_err() { 559 // Resetting the terminal is usually done as the VMM exits 560 if let Ok(lock) = original_termios_opt.lock() { 561 if let Some(termios) = *lock { 562 // SAFETY: FFI call 563 let _ = unsafe { 564 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 565 }; 566 } 567 } else { 568 warn!("Failed to lock original termios"); 569 } 570 571 std::process::exit(1); 572 } 573 } 574 _ => (), 575 } 576 } 577 } 578 579 fn setup_signal_handler(&mut self) -> Result<()> { 580 let signals = Signals::new(Self::HANDLED_SIGNALS); 581 match signals { 582 Ok(signals) => { 583 self.signals = Some(signals.handle()); 584 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 585 let original_termios_opt = Arc::clone(&self.original_termios_opt); 586 587 let signal_handler_seccomp_filter = get_seccomp_filter( 588 &self.seccomp_action, 589 Thread::SignalHandler, 590 self.hypervisor.hypervisor_type(), 591 ) 592 .map_err(Error::CreateSeccompFilter)?; 593 self.threads.push( 594 thread::Builder::new() 595 .name("vmm_signal_handler".to_string()) 596 .spawn(move || { 597 if !signal_handler_seccomp_filter.is_empty() { 598 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 599 .map_err(Error::ApplySeccompFilter) 600 { 601 error!("Error applying seccomp filter: {:?}", e); 602 exit_evt.write(1).ok(); 603 return; 604 } 605 } 606 std::panic::catch_unwind(AssertUnwindSafe(|| { 607 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 608 })) 609 .map_err(|_| { 610 error!("vmm signal_handler thread panicked"); 611 exit_evt.write(1).ok() 612 }) 613 .ok(); 614 }) 615 .map_err(Error::SignalHandlerSpawn)?, 616 ); 617 } 618 Err(e) => error!("Signal not found {}", e), 619 } 620 Ok(()) 621 } 622 623 fn new( 624 vmm_version: VmmVersionInfo, 625 api_evt: EventFd, 626 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 627 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 628 seccomp_action: SeccompAction, 629 hypervisor: Arc<dyn hypervisor::Hypervisor>, 630 exit_evt: EventFd, 631 ) -> Result<Self> { 632 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 633 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 634 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 635 636 epoll 637 .add_event(&exit_evt, EpollDispatch::Exit) 638 .map_err(Error::Epoll)?; 639 640 epoll 641 .add_event(&reset_evt, EpollDispatch::Reset) 642 .map_err(Error::Epoll)?; 643 644 epoll 645 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 646 .map_err(Error::Epoll)?; 647 648 epoll 649 .add_event(&api_evt, EpollDispatch::Api) 650 .map_err(Error::Epoll)?; 651 652 #[cfg(feature = "guest_debug")] 653 epoll 654 .add_event(&debug_evt, EpollDispatch::Debug) 655 .map_err(Error::Epoll)?; 656 657 Ok(Vmm { 658 epoll, 659 exit_evt, 660 reset_evt, 661 api_evt, 662 #[cfg(feature = "guest_debug")] 663 debug_evt, 664 #[cfg(feature = "guest_debug")] 665 vm_debug_evt, 666 version: vmm_version, 667 vm: None, 668 vm_config: None, 669 seccomp_action, 670 hypervisor, 671 activate_evt, 672 signals: None, 673 threads: vec![], 674 original_termios_opt: Arc::new(Mutex::new(None)), 675 }) 676 } 677 678 fn vm_create(&mut self, config: Arc<Mutex<VmConfig>>) -> result::Result<(), VmError> { 679 // We only store the passed VM config. 680 // The VM will be created when being asked to boot it. 681 if self.vm_config.is_none() { 682 self.vm_config = Some(config); 683 Ok(()) 684 } else { 685 Err(VmError::VmAlreadyCreated) 686 } 687 } 688 689 fn vm_boot(&mut self) -> result::Result<(), VmError> { 690 tracer::start(); 691 let r = { 692 trace_scoped!("vm_boot"); 693 // If we don't have a config, we can not boot a VM. 694 if self.vm_config.is_none() { 695 return Err(VmError::VmMissingConfig); 696 }; 697 698 // Create a new VM if we don't have one yet. 699 if self.vm.is_none() { 700 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 701 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 702 #[cfg(feature = "guest_debug")] 703 let vm_debug_evt = self 704 .vm_debug_evt 705 .try_clone() 706 .map_err(VmError::EventFdClone)?; 707 let activate_evt = self 708 .activate_evt 709 .try_clone() 710 .map_err(VmError::EventFdClone)?; 711 712 if let Some(ref vm_config) = self.vm_config { 713 let vm = Vm::new( 714 Arc::clone(vm_config), 715 exit_evt, 716 reset_evt, 717 #[cfg(feature = "guest_debug")] 718 vm_debug_evt, 719 &self.seccomp_action, 720 self.hypervisor.clone(), 721 activate_evt, 722 None, 723 None, 724 None, 725 Arc::clone(&self.original_termios_opt), 726 None, 727 None, 728 None, 729 )?; 730 731 self.vm = Some(vm); 732 } 733 } 734 735 // Now we can boot the VM. 736 if let Some(ref mut vm) = self.vm { 737 vm.boot() 738 } else { 739 Err(VmError::VmNotCreated) 740 } 741 }; 742 tracer::end(); 743 r 744 } 745 746 fn vm_pause(&mut self) -> result::Result<(), VmError> { 747 if let Some(ref mut vm) = self.vm { 748 vm.pause().map_err(VmError::Pause) 749 } else { 750 Err(VmError::VmNotRunning) 751 } 752 } 753 754 fn vm_resume(&mut self) -> result::Result<(), VmError> { 755 if let Some(ref mut vm) = self.vm { 756 vm.resume().map_err(VmError::Resume) 757 } else { 758 Err(VmError::VmNotRunning) 759 } 760 } 761 762 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 763 if let Some(ref mut vm) = self.vm { 764 vm.snapshot() 765 .map_err(VmError::Snapshot) 766 .and_then(|snapshot| { 767 vm.send(&snapshot, destination_url) 768 .map_err(VmError::SnapshotSend) 769 }) 770 } else { 771 Err(VmError::VmNotRunning) 772 } 773 } 774 775 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 776 if self.vm.is_some() || self.vm_config.is_some() { 777 return Err(VmError::VmAlreadyCreated); 778 } 779 780 let source_url = restore_cfg.source_url.as_path().to_str(); 781 if source_url.is_none() { 782 return Err(VmError::InvalidRestoreSourceUrl); 783 } 784 // Safe to unwrap as we checked it was Some(&str). 785 let source_url = source_url.unwrap(); 786 787 let vm_config = Arc::new(Mutex::new( 788 recv_vm_config(source_url).map_err(VmError::Restore)?, 789 )); 790 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 791 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 792 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 793 794 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 795 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 796 .map_err(VmError::Restore)?; 797 798 self.vm_config = Some(Arc::clone(&vm_config)); 799 800 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 801 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 802 #[cfg(feature = "guest_debug")] 803 let debug_evt = self 804 .vm_debug_evt 805 .try_clone() 806 .map_err(VmError::EventFdClone)?; 807 let activate_evt = self 808 .activate_evt 809 .try_clone() 810 .map_err(VmError::EventFdClone)?; 811 812 let vm = Vm::new( 813 vm_config, 814 exit_evt, 815 reset_evt, 816 #[cfg(feature = "guest_debug")] 817 debug_evt, 818 &self.seccomp_action, 819 self.hypervisor.clone(), 820 activate_evt, 821 None, 822 None, 823 None, 824 Arc::clone(&self.original_termios_opt), 825 Some(snapshot), 826 Some(source_url), 827 Some(restore_cfg.prefault), 828 )?; 829 self.vm = Some(vm); 830 831 // Now we can restore the rest of the VM. 832 if let Some(ref mut vm) = self.vm { 833 vm.restore() 834 } else { 835 Err(VmError::VmNotCreated) 836 } 837 } 838 839 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 840 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 841 if let Some(ref mut vm) = self.vm { 842 vm.coredump(destination_url).map_err(VmError::Coredump) 843 } else { 844 Err(VmError::VmNotRunning) 845 } 846 } 847 848 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 849 if let Some(ref mut vm) = self.vm.take() { 850 vm.shutdown() 851 } else { 852 Err(VmError::VmNotRunning) 853 } 854 } 855 856 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 857 // First we stop the current VM 858 let (config, serial_pty, console_pty, console_resize_pipe) = 859 if let Some(mut vm) = self.vm.take() { 860 let config = vm.get_config(); 861 let serial_pty = vm.serial_pty(); 862 let console_pty = vm.console_pty(); 863 let console_resize_pipe = vm 864 .console_resize_pipe() 865 .as_ref() 866 .map(|pipe| pipe.try_clone().unwrap()); 867 vm.shutdown()?; 868 (config, serial_pty, console_pty, console_resize_pipe) 869 } else { 870 return Err(VmError::VmNotCreated); 871 }; 872 873 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 874 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 875 #[cfg(feature = "guest_debug")] 876 let debug_evt = self 877 .vm_debug_evt 878 .try_clone() 879 .map_err(VmError::EventFdClone)?; 880 let activate_evt = self 881 .activate_evt 882 .try_clone() 883 .map_err(VmError::EventFdClone)?; 884 885 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 886 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 887 // during the boot process. 888 if self.reset_evt.read().is_ok() { 889 warn!("Spurious second reset event received. Ignoring."); 890 } 891 892 // Then we create the new VM 893 let mut vm = Vm::new( 894 config, 895 exit_evt, 896 reset_evt, 897 #[cfg(feature = "guest_debug")] 898 debug_evt, 899 &self.seccomp_action, 900 self.hypervisor.clone(), 901 activate_evt, 902 serial_pty, 903 console_pty, 904 console_resize_pipe, 905 Arc::clone(&self.original_termios_opt), 906 None, 907 None, 908 None, 909 )?; 910 911 // And we boot it 912 vm.boot()?; 913 914 self.vm = Some(vm); 915 916 Ok(()) 917 } 918 919 fn vm_info(&self) -> result::Result<VmInfo, VmError> { 920 match &self.vm_config { 921 Some(config) => { 922 let state = match &self.vm { 923 Some(vm) => vm.get_state()?, 924 None => VmState::Created, 925 }; 926 927 let config = Arc::clone(config); 928 929 let mut memory_actual_size = config.lock().unwrap().memory.total_size(); 930 if let Some(vm) = &self.vm { 931 memory_actual_size -= vm.balloon_size(); 932 } 933 934 let device_tree = self.vm.as_ref().map(|vm| vm.device_tree()); 935 936 Ok(VmInfo { 937 config, 938 state, 939 memory_actual_size, 940 device_tree, 941 }) 942 } 943 None => Err(VmError::VmNotCreated), 944 } 945 } 946 947 fn vmm_ping(&self) -> VmmPingResponse { 948 let VmmVersionInfo { 949 build_version, 950 version, 951 } = self.version.clone(); 952 953 VmmPingResponse { 954 build_version, 955 version, 956 pid: std::process::id() as i64, 957 features: feature_list(), 958 } 959 } 960 961 fn vm_delete(&mut self) -> result::Result<(), VmError> { 962 if self.vm_config.is_none() { 963 return Ok(()); 964 } 965 966 // If a VM is booted, we first try to shut it down. 967 if self.vm.is_some() { 968 self.vm_shutdown()?; 969 } 970 971 self.vm_config = None; 972 973 event!("vm", "deleted"); 974 975 Ok(()) 976 } 977 978 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 979 self.vm_delete()?; 980 event!("vmm", "shutdown"); 981 Ok(()) 982 } 983 984 fn vm_resize( 985 &mut self, 986 desired_vcpus: Option<u8>, 987 desired_ram: Option<u64>, 988 desired_balloon: Option<u64>, 989 ) -> result::Result<(), VmError> { 990 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 991 992 if let Some(ref mut vm) = self.vm { 993 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 994 error!("Error when resizing VM: {:?}", e); 995 Err(e) 996 } else { 997 Ok(()) 998 } 999 } else { 1000 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1001 if let Some(desired_vcpus) = desired_vcpus { 1002 config.cpus.boot_vcpus = desired_vcpus; 1003 } 1004 if let Some(desired_ram) = desired_ram { 1005 config.memory.size = desired_ram; 1006 } 1007 if let Some(desired_balloon) = desired_balloon { 1008 if let Some(balloon_config) = &mut config.balloon { 1009 balloon_config.size = desired_balloon; 1010 } 1011 } 1012 Ok(()) 1013 } 1014 } 1015 1016 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1017 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1018 1019 if let Some(ref mut vm) = self.vm { 1020 if let Err(e) = vm.resize_zone(id, desired_ram) { 1021 error!("Error when resizing VM: {:?}", e); 1022 Err(e) 1023 } else { 1024 Ok(()) 1025 } 1026 } else { 1027 // Update VmConfig by setting the new desired ram. 1028 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1029 1030 if let Some(zones) = &mut memory_config.zones { 1031 for zone in zones.iter_mut() { 1032 if zone.id == id { 1033 zone.size = desired_ram; 1034 return Ok(()); 1035 } 1036 } 1037 } 1038 1039 error!("Could not find the memory zone {} for the resize", id); 1040 Err(VmError::ResizeZone) 1041 } 1042 } 1043 1044 fn vm_add_device( 1045 &mut self, 1046 device_cfg: DeviceConfig, 1047 ) -> result::Result<Option<Vec<u8>>, VmError> { 1048 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1049 1050 { 1051 // Validate the configuration change in a cloned configuration 1052 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1053 add_to_config(&mut config.devices, device_cfg.clone()); 1054 config.validate().map_err(VmError::ConfigValidation)?; 1055 } 1056 1057 if let Some(ref mut vm) = self.vm { 1058 let info = vm.add_device(device_cfg).map_err(|e| { 1059 error!("Error when adding new device to the VM: {:?}", e); 1060 e 1061 })?; 1062 serde_json::to_vec(&info) 1063 .map(Some) 1064 .map_err(VmError::SerializeJson) 1065 } else { 1066 // Update VmConfig by adding the new device. 1067 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1068 add_to_config(&mut config.devices, device_cfg); 1069 Ok(None) 1070 } 1071 } 1072 1073 fn vm_add_user_device( 1074 &mut self, 1075 device_cfg: UserDeviceConfig, 1076 ) -> result::Result<Option<Vec<u8>>, VmError> { 1077 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1078 1079 { 1080 // Validate the configuration change in a cloned configuration 1081 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1082 add_to_config(&mut config.user_devices, device_cfg.clone()); 1083 config.validate().map_err(VmError::ConfigValidation)?; 1084 } 1085 1086 if let Some(ref mut vm) = self.vm { 1087 let info = vm.add_user_device(device_cfg).map_err(|e| { 1088 error!("Error when adding new user device to the VM: {:?}", e); 1089 e 1090 })?; 1091 serde_json::to_vec(&info) 1092 .map(Some) 1093 .map_err(VmError::SerializeJson) 1094 } else { 1095 // Update VmConfig by adding the new device. 1096 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1097 add_to_config(&mut config.user_devices, device_cfg); 1098 Ok(None) 1099 } 1100 } 1101 1102 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1103 if let Some(ref mut vm) = self.vm { 1104 if let Err(e) = vm.remove_device(id) { 1105 error!("Error when removing device from the VM: {:?}", e); 1106 Err(e) 1107 } else { 1108 Ok(()) 1109 } 1110 } else if let Some(ref config) = self.vm_config { 1111 let mut config = config.lock().unwrap(); 1112 if config.remove_device(&id) { 1113 Ok(()) 1114 } else { 1115 Err(VmError::NoDeviceToRemove(id)) 1116 } 1117 } else { 1118 Err(VmError::VmNotCreated) 1119 } 1120 } 1121 1122 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1123 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1124 1125 { 1126 // Validate the configuration change in a cloned configuration 1127 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1128 add_to_config(&mut config.disks, disk_cfg.clone()); 1129 config.validate().map_err(VmError::ConfigValidation)?; 1130 } 1131 1132 if let Some(ref mut vm) = self.vm { 1133 let info = vm.add_disk(disk_cfg).map_err(|e| { 1134 error!("Error when adding new disk to the VM: {:?}", e); 1135 e 1136 })?; 1137 serde_json::to_vec(&info) 1138 .map(Some) 1139 .map_err(VmError::SerializeJson) 1140 } else { 1141 // Update VmConfig by adding the new device. 1142 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1143 add_to_config(&mut config.disks, disk_cfg); 1144 Ok(None) 1145 } 1146 } 1147 1148 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1149 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1150 1151 { 1152 // Validate the configuration change in a cloned configuration 1153 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1154 add_to_config(&mut config.fs, fs_cfg.clone()); 1155 config.validate().map_err(VmError::ConfigValidation)?; 1156 } 1157 1158 if let Some(ref mut vm) = self.vm { 1159 let info = vm.add_fs(fs_cfg).map_err(|e| { 1160 error!("Error when adding new fs to the VM: {:?}", e); 1161 e 1162 })?; 1163 serde_json::to_vec(&info) 1164 .map(Some) 1165 .map_err(VmError::SerializeJson) 1166 } else { 1167 // Update VmConfig by adding the new device. 1168 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1169 add_to_config(&mut config.fs, fs_cfg); 1170 Ok(None) 1171 } 1172 } 1173 1174 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1175 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1176 1177 { 1178 // Validate the configuration change in a cloned configuration 1179 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1180 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1181 config.validate().map_err(VmError::ConfigValidation)?; 1182 } 1183 1184 if let Some(ref mut vm) = self.vm { 1185 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1186 error!("Error when adding new pmem device to the VM: {:?}", e); 1187 e 1188 })?; 1189 serde_json::to_vec(&info) 1190 .map(Some) 1191 .map_err(VmError::SerializeJson) 1192 } else { 1193 // Update VmConfig by adding the new device. 1194 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1195 add_to_config(&mut config.pmem, pmem_cfg); 1196 Ok(None) 1197 } 1198 } 1199 1200 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1201 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1202 1203 { 1204 // Validate the configuration change in a cloned configuration 1205 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1206 add_to_config(&mut config.net, net_cfg.clone()); 1207 config.validate().map_err(VmError::ConfigValidation)?; 1208 } 1209 1210 if let Some(ref mut vm) = self.vm { 1211 let info = vm.add_net(net_cfg).map_err(|e| { 1212 error!("Error when adding new network device to the VM: {:?}", e); 1213 e 1214 })?; 1215 serde_json::to_vec(&info) 1216 .map(Some) 1217 .map_err(VmError::SerializeJson) 1218 } else { 1219 // Update VmConfig by adding the new device. 1220 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1221 add_to_config(&mut config.net, net_cfg); 1222 Ok(None) 1223 } 1224 } 1225 1226 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1227 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1228 1229 { 1230 // Validate the configuration change in a cloned configuration 1231 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1232 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1233 config.validate().map_err(VmError::ConfigValidation)?; 1234 } 1235 1236 if let Some(ref mut vm) = self.vm { 1237 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1238 error!("Error when adding new vDPA device to the VM: {:?}", e); 1239 e 1240 })?; 1241 serde_json::to_vec(&info) 1242 .map(Some) 1243 .map_err(VmError::SerializeJson) 1244 } else { 1245 // Update VmConfig by adding the new device. 1246 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1247 add_to_config(&mut config.vdpa, vdpa_cfg); 1248 Ok(None) 1249 } 1250 } 1251 1252 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1253 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1254 1255 { 1256 // Validate the configuration change in a cloned configuration 1257 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1258 1259 if config.vsock.is_some() { 1260 return Err(VmError::TooManyVsockDevices); 1261 } 1262 1263 config.vsock = Some(vsock_cfg.clone()); 1264 config.validate().map_err(VmError::ConfigValidation)?; 1265 } 1266 1267 if let Some(ref mut vm) = self.vm { 1268 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1269 error!("Error when adding new vsock device to the VM: {:?}", e); 1270 e 1271 })?; 1272 serde_json::to_vec(&info) 1273 .map(Some) 1274 .map_err(VmError::SerializeJson) 1275 } else { 1276 // Update VmConfig by adding the new device. 1277 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1278 config.vsock = Some(vsock_cfg); 1279 Ok(None) 1280 } 1281 } 1282 1283 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1284 if let Some(ref mut vm) = self.vm { 1285 let info = vm.counters().map_err(|e| { 1286 error!("Error when getting counters from the VM: {:?}", e); 1287 e 1288 })?; 1289 serde_json::to_vec(&info) 1290 .map(Some) 1291 .map_err(VmError::SerializeJson) 1292 } else { 1293 Err(VmError::VmNotRunning) 1294 } 1295 } 1296 1297 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1298 if let Some(ref mut vm) = self.vm { 1299 vm.power_button() 1300 } else { 1301 Err(VmError::VmNotRunning) 1302 } 1303 } 1304 1305 fn vm_receive_config<T>( 1306 &mut self, 1307 req: &Request, 1308 socket: &mut T, 1309 existing_memory_files: Option<HashMap<u32, File>>, 1310 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 1311 where 1312 T: Read + Write, 1313 { 1314 // Read in config data along with memory manager data 1315 let mut data: Vec<u8> = Vec::new(); 1316 data.resize_with(req.length() as usize, Default::default); 1317 socket 1318 .read_exact(&mut data) 1319 .map_err(MigratableError::MigrateSocket)?; 1320 1321 let vm_migration_config: VmMigrationConfig = 1322 serde_json::from_slice(&data).map_err(|e| { 1323 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 1324 })?; 1325 1326 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1327 self.vm_check_cpuid_compatibility( 1328 &vm_migration_config.vm_config, 1329 &vm_migration_config.common_cpuid, 1330 )?; 1331 1332 let config = vm_migration_config.vm_config.clone(); 1333 self.vm_config = Some(vm_migration_config.vm_config); 1334 1335 let vm = Vm::create_hypervisor_vm( 1336 &self.hypervisor, 1337 #[cfg(feature = "tdx")] 1338 false, 1339 #[cfg(feature = "sev_snp")] 1340 false, 1341 ) 1342 .map_err(|e| { 1343 MigratableError::MigrateReceive(anyhow!( 1344 "Error creating hypervisor VM from snapshot: {:?}", 1345 e 1346 )) 1347 })?; 1348 1349 let phys_bits = 1350 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 1351 1352 let memory_manager = MemoryManager::new( 1353 vm, 1354 &config.lock().unwrap().memory.clone(), 1355 None, 1356 phys_bits, 1357 #[cfg(feature = "tdx")] 1358 false, 1359 Some(&vm_migration_config.memory_manager_data), 1360 existing_memory_files, 1361 #[cfg(target_arch = "x86_64")] 1362 None, 1363 ) 1364 .map_err(|e| { 1365 MigratableError::MigrateReceive(anyhow!( 1366 "Error creating MemoryManager from snapshot: {:?}", 1367 e 1368 )) 1369 })?; 1370 1371 Response::ok().write_to(socket)?; 1372 1373 Ok(memory_manager) 1374 } 1375 1376 fn vm_receive_state<T>( 1377 &mut self, 1378 req: &Request, 1379 socket: &mut T, 1380 mm: Arc<Mutex<MemoryManager>>, 1381 ) -> std::result::Result<(), MigratableError> 1382 where 1383 T: Read + Write, 1384 { 1385 // Read in state data 1386 let mut data: Vec<u8> = Vec::new(); 1387 data.resize_with(req.length() as usize, Default::default); 1388 socket 1389 .read_exact(&mut data) 1390 .map_err(MigratableError::MigrateSocket)?; 1391 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 1392 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 1393 })?; 1394 1395 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 1396 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 1397 })?; 1398 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 1399 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 1400 })?; 1401 #[cfg(feature = "guest_debug")] 1402 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 1403 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 1404 })?; 1405 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 1406 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 1407 })?; 1408 1409 let timestamp = Instant::now(); 1410 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 1411 let mut vm = Vm::new_from_memory_manager( 1412 self.vm_config.clone().unwrap(), 1413 mm, 1414 hypervisor_vm, 1415 exit_evt, 1416 reset_evt, 1417 #[cfg(feature = "guest_debug")] 1418 debug_evt, 1419 &self.seccomp_action, 1420 self.hypervisor.clone(), 1421 activate_evt, 1422 timestamp, 1423 None, 1424 None, 1425 None, 1426 Arc::clone(&self.original_termios_opt), 1427 Some(snapshot), 1428 ) 1429 .map_err(|e| { 1430 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 1431 })?; 1432 1433 // Create VM 1434 vm.restore().map_err(|e| { 1435 Response::error().write_to(socket).ok(); 1436 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 1437 })?; 1438 self.vm = Some(vm); 1439 1440 Response::ok().write_to(socket)?; 1441 1442 Ok(()) 1443 } 1444 1445 fn vm_receive_memory<T>( 1446 &mut self, 1447 req: &Request, 1448 socket: &mut T, 1449 memory_manager: &mut MemoryManager, 1450 ) -> std::result::Result<(), MigratableError> 1451 where 1452 T: Read + Write, 1453 { 1454 // Read table 1455 let table = MemoryRangeTable::read_from(socket, req.length())?; 1456 1457 // And then read the memory itself 1458 memory_manager 1459 .receive_memory_regions(&table, socket) 1460 .map_err(|e| { 1461 Response::error().write_to(socket).ok(); 1462 e 1463 })?; 1464 Response::ok().write_to(socket)?; 1465 Ok(()) 1466 } 1467 1468 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 1469 url.strip_prefix("unix:") 1470 .ok_or_else(|| { 1471 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 1472 }) 1473 .map(|s| s.into()) 1474 } 1475 1476 fn vm_receive_migration( 1477 &mut self, 1478 receive_data_migration: VmReceiveMigrationData, 1479 ) -> result::Result<(), MigratableError> { 1480 info!( 1481 "Receiving migration: receiver_url = {}", 1482 receive_data_migration.receiver_url 1483 ); 1484 1485 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1486 let listener = UnixListener::bind(&path).map_err(|e| { 1487 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1488 })?; 1489 let (mut socket, _addr) = listener.accept().map_err(|e| { 1490 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1491 })?; 1492 std::fs::remove_file(&path).map_err(|e| { 1493 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1494 })?; 1495 1496 let mut started = false; 1497 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1498 let mut existing_memory_files = None; 1499 loop { 1500 let req = Request::read_from(&mut socket)?; 1501 match req.command() { 1502 Command::Invalid => info!("Invalid Command Received"), 1503 Command::Start => { 1504 info!("Start Command Received"); 1505 started = true; 1506 1507 Response::ok().write_to(&mut socket)?; 1508 } 1509 Command::Config => { 1510 info!("Config Command Received"); 1511 1512 if !started { 1513 warn!("Migration not started yet"); 1514 Response::error().write_to(&mut socket)?; 1515 continue; 1516 } 1517 memory_manager = Some(self.vm_receive_config( 1518 &req, 1519 &mut socket, 1520 existing_memory_files.take(), 1521 )?); 1522 } 1523 Command::State => { 1524 info!("State Command Received"); 1525 1526 if !started { 1527 warn!("Migration not started yet"); 1528 Response::error().write_to(&mut socket)?; 1529 continue; 1530 } 1531 if let Some(mm) = memory_manager.take() { 1532 self.vm_receive_state(&req, &mut socket, mm)?; 1533 } else { 1534 warn!("Configuration not sent yet"); 1535 Response::error().write_to(&mut socket)?; 1536 } 1537 } 1538 Command::Memory => { 1539 info!("Memory Command Received"); 1540 1541 if !started { 1542 warn!("Migration not started yet"); 1543 Response::error().write_to(&mut socket)?; 1544 continue; 1545 } 1546 if let Some(mm) = memory_manager.as_ref() { 1547 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 1548 } else { 1549 warn!("Configuration not sent yet"); 1550 Response::error().write_to(&mut socket)?; 1551 } 1552 } 1553 Command::MemoryFd => { 1554 info!("MemoryFd Command Received"); 1555 1556 if !started { 1557 warn!("Migration not started yet"); 1558 Response::error().write_to(&mut socket)?; 1559 continue; 1560 } 1561 1562 let mut buf = [0u8; 4]; 1563 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 1564 MigratableError::MigrateReceive(anyhow!( 1565 "Error receiving slot from socket: {}", 1566 e 1567 )) 1568 })?; 1569 1570 if existing_memory_files.is_none() { 1571 existing_memory_files = Some(HashMap::default()) 1572 } 1573 1574 if let Some(ref mut existing_memory_files) = existing_memory_files { 1575 let slot = u32::from_le_bytes(buf); 1576 existing_memory_files.insert(slot, file.unwrap()); 1577 } 1578 1579 Response::ok().write_to(&mut socket)?; 1580 } 1581 Command::Complete => { 1582 info!("Complete Command Received"); 1583 if let Some(ref mut vm) = self.vm.as_mut() { 1584 vm.resume()?; 1585 Response::ok().write_to(&mut socket)?; 1586 } else { 1587 warn!("VM not created yet"); 1588 Response::error().write_to(&mut socket)?; 1589 } 1590 break; 1591 } 1592 Command::Abandon => { 1593 info!("Abandon Command Received"); 1594 self.vm = None; 1595 self.vm_config = None; 1596 Response::ok().write_to(&mut socket).ok(); 1597 break; 1598 } 1599 } 1600 } 1601 1602 Ok(()) 1603 } 1604 1605 // Returns true if there were dirty pages to send 1606 fn vm_maybe_send_dirty_pages<T>( 1607 vm: &mut Vm, 1608 socket: &mut T, 1609 ) -> result::Result<bool, MigratableError> 1610 where 1611 T: Read + Write, 1612 { 1613 // Send (dirty) memory table 1614 let table = vm.dirty_log()?; 1615 1616 // But if there are no regions go straight to pause 1617 if table.regions().is_empty() { 1618 return Ok(false); 1619 } 1620 1621 Request::memory(table.length()).write_to(socket).unwrap(); 1622 table.write_to(socket)?; 1623 // And then the memory itself 1624 vm.send_memory_regions(&table, socket)?; 1625 let res = Response::read_from(socket)?; 1626 if res.status() != Status::Ok { 1627 warn!("Error during dirty memory migration"); 1628 Request::abandon().write_to(socket)?; 1629 Response::read_from(socket).ok(); 1630 return Err(MigratableError::MigrateSend(anyhow!( 1631 "Error during dirty memory migration" 1632 ))); 1633 } 1634 1635 Ok(true) 1636 } 1637 1638 fn send_migration( 1639 vm: &mut Vm, 1640 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 1641 dyn hypervisor::Hypervisor, 1642 >, 1643 send_data_migration: VmSendMigrationData, 1644 ) -> result::Result<(), MigratableError> { 1645 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 1646 let mut socket = UnixStream::connect(path).map_err(|e| { 1647 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 1648 })?; 1649 1650 // Start the migration 1651 Request::start().write_to(&mut socket)?; 1652 let res = Response::read_from(&mut socket)?; 1653 if res.status() != Status::Ok { 1654 warn!("Error starting migration"); 1655 Request::abandon().write_to(&mut socket)?; 1656 Response::read_from(&mut socket).ok(); 1657 return Err(MigratableError::MigrateSend(anyhow!( 1658 "Error starting migration" 1659 ))); 1660 } 1661 1662 // Send config 1663 let vm_config = vm.get_config(); 1664 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1665 let common_cpuid = { 1666 #[cfg(feature = "tdx")] 1667 let tdx = vm_config.lock().unwrap().is_tdx_enabled(); 1668 let amx = vm_config.lock().unwrap().cpus.features.amx; 1669 let phys_bits = 1670 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 1671 arch::generate_common_cpuid( 1672 &hypervisor, 1673 &arch::CpuidConfig { 1674 sgx_epc_sections: None, 1675 phys_bits, 1676 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 1677 #[cfg(feature = "tdx")] 1678 tdx, 1679 amx, 1680 }, 1681 ) 1682 .map_err(|e| { 1683 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid': {:?}", e)) 1684 })? 1685 }; 1686 1687 if send_data_migration.local { 1688 vm.send_memory_fds(&mut socket)?; 1689 } 1690 1691 let vm_migration_config = VmMigrationConfig { 1692 vm_config, 1693 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1694 common_cpuid, 1695 memory_manager_data: vm.memory_manager_data(), 1696 }; 1697 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1698 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1699 socket 1700 .write_all(&config_data) 1701 .map_err(MigratableError::MigrateSocket)?; 1702 let res = Response::read_from(&mut socket)?; 1703 if res.status() != Status::Ok { 1704 warn!("Error during config migration"); 1705 Request::abandon().write_to(&mut socket)?; 1706 Response::read_from(&mut socket).ok(); 1707 return Err(MigratableError::MigrateSend(anyhow!( 1708 "Error during config migration" 1709 ))); 1710 } 1711 1712 // Let every Migratable object know about the migration being started. 1713 vm.start_migration()?; 1714 1715 if send_data_migration.local { 1716 // Now pause VM 1717 vm.pause()?; 1718 } else { 1719 // Start logging dirty pages 1720 vm.start_dirty_log()?; 1721 1722 // Send memory table 1723 let table = vm.memory_range_table()?; 1724 Request::memory(table.length()) 1725 .write_to(&mut socket) 1726 .unwrap(); 1727 table.write_to(&mut socket)?; 1728 // And then the memory itself 1729 vm.send_memory_regions(&table, &mut socket)?; 1730 let res = Response::read_from(&mut socket)?; 1731 if res.status() != Status::Ok { 1732 warn!("Error during memory migration"); 1733 Request::abandon().write_to(&mut socket)?; 1734 Response::read_from(&mut socket).ok(); 1735 return Err(MigratableError::MigrateSend(anyhow!( 1736 "Error during memory migration" 1737 ))); 1738 } 1739 1740 // Try at most 5 passes of dirty memory sending 1741 const MAX_DIRTY_MIGRATIONS: usize = 5; 1742 for i in 0..MAX_DIRTY_MIGRATIONS { 1743 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1744 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1745 break; 1746 } 1747 } 1748 1749 // Now pause VM 1750 vm.pause()?; 1751 1752 // Send last batch of dirty pages 1753 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1754 1755 // Stop logging dirty pages 1756 vm.stop_dirty_log()?; 1757 } 1758 // Capture snapshot and send it 1759 let vm_snapshot = vm.snapshot()?; 1760 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1761 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1762 socket 1763 .write_all(&snapshot_data) 1764 .map_err(MigratableError::MigrateSocket)?; 1765 let res = Response::read_from(&mut socket)?; 1766 if res.status() != Status::Ok { 1767 warn!("Error during state migration"); 1768 Request::abandon().write_to(&mut socket)?; 1769 Response::read_from(&mut socket).ok(); 1770 return Err(MigratableError::MigrateSend(anyhow!( 1771 "Error during state migration" 1772 ))); 1773 } 1774 1775 // Complete the migration 1776 Request::complete().write_to(&mut socket)?; 1777 let res = Response::read_from(&mut socket)?; 1778 if res.status() != Status::Ok { 1779 warn!("Error completing migration"); 1780 Request::abandon().write_to(&mut socket)?; 1781 Response::read_from(&mut socket).ok(); 1782 return Err(MigratableError::MigrateSend(anyhow!( 1783 "Error completing migration" 1784 ))); 1785 } 1786 info!("Migration complete"); 1787 1788 // Let every Migratable object know about the migration being complete 1789 vm.complete_migration() 1790 } 1791 1792 fn vm_send_migration( 1793 &mut self, 1794 send_data_migration: VmSendMigrationData, 1795 ) -> result::Result<(), MigratableError> { 1796 info!( 1797 "Sending migration: destination_url = {}, local = {}", 1798 send_data_migration.destination_url, send_data_migration.local 1799 ); 1800 1801 if !self 1802 .vm_config 1803 .as_ref() 1804 .unwrap() 1805 .lock() 1806 .unwrap() 1807 .backed_by_shared_memory() 1808 && send_data_migration.local 1809 { 1810 return Err(MigratableError::MigrateSend(anyhow!( 1811 "Local migration requires shared memory or hugepages enabled" 1812 ))); 1813 } 1814 1815 if let Some(vm) = self.vm.as_mut() { 1816 Self::send_migration( 1817 vm, 1818 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1819 self.hypervisor.clone(), 1820 send_data_migration, 1821 ) 1822 .map_err(|migration_err| { 1823 error!("Migration failed: {:?}", migration_err); 1824 1825 // Stop logging dirty pages 1826 if let Err(e) = vm.stop_dirty_log() { 1827 return e; 1828 } 1829 1830 if vm.get_state().unwrap() == VmState::Paused { 1831 if let Err(e) = vm.resume() { 1832 return e; 1833 } 1834 } 1835 1836 migration_err 1837 })?; 1838 1839 // Shutdown the VM after the migration succeeded 1840 self.exit_evt.write(1).map_err(|e| { 1841 MigratableError::MigrateSend(anyhow!( 1842 "Failed shutting down the VM after migration: {:?}", 1843 e 1844 )) 1845 }) 1846 } else { 1847 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 1848 } 1849 } 1850 1851 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1852 fn vm_check_cpuid_compatibility( 1853 &self, 1854 src_vm_config: &Arc<Mutex<VmConfig>>, 1855 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1856 ) -> result::Result<(), MigratableError> { 1857 // We check the `CPUID` compatibility of between the source vm and destination, which is 1858 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1859 let dest_cpuid = &{ 1860 let vm_config = &src_vm_config.lock().unwrap(); 1861 1862 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1863 arch::generate_common_cpuid( 1864 &self.hypervisor.clone(), 1865 &arch::CpuidConfig { 1866 sgx_epc_sections: None, 1867 phys_bits, 1868 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1869 #[cfg(feature = "tdx")] 1870 tdx: vm_config.is_tdx_enabled(), 1871 amx: vm_config.cpus.features.amx, 1872 }, 1873 ) 1874 .map_err(|e| { 1875 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1876 })? 1877 }; 1878 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1879 MigratableError::MigrateReceive(anyhow!( 1880 "Error checking cpu feature compatibility': {:?}", 1881 e 1882 )) 1883 }) 1884 } 1885 1886 fn control_loop( 1887 &mut self, 1888 api_receiver: Rc<Receiver<ApiRequest>>, 1889 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1890 ) -> Result<()> { 1891 const EPOLL_EVENTS_LEN: usize = 100; 1892 1893 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1894 let epoll_fd = self.epoll.as_raw_fd(); 1895 1896 'outer: loop { 1897 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1898 Ok(res) => res, 1899 Err(e) => { 1900 if e.kind() == io::ErrorKind::Interrupted { 1901 // It's well defined from the epoll_wait() syscall 1902 // documentation that the epoll loop can be interrupted 1903 // before any of the requested events occurred or the 1904 // timeout expired. In both those cases, epoll_wait() 1905 // returns an error of type EINTR, but this should not 1906 // be considered as a regular error. Instead it is more 1907 // appropriate to retry, by calling into epoll_wait(). 1908 continue; 1909 } 1910 return Err(Error::Epoll(e)); 1911 } 1912 }; 1913 1914 for event in events.iter().take(num_events) { 1915 let dispatch_event: EpollDispatch = event.data.into(); 1916 match dispatch_event { 1917 EpollDispatch::Unknown => { 1918 let event = event.data; 1919 warn!("Unknown VMM loop event: {}", event); 1920 } 1921 EpollDispatch::Exit => { 1922 info!("VM exit event"); 1923 // Consume the event. 1924 self.exit_evt.read().map_err(Error::EventFdRead)?; 1925 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1926 1927 break 'outer; 1928 } 1929 EpollDispatch::Reset => { 1930 info!("VM reset event"); 1931 // Consume the event. 1932 self.reset_evt.read().map_err(Error::EventFdRead)?; 1933 self.vm_reboot().map_err(Error::VmReboot)?; 1934 } 1935 EpollDispatch::ActivateVirtioDevices => { 1936 if let Some(ref vm) = self.vm { 1937 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1938 info!( 1939 "Trying to activate pending virtio devices: count = {}", 1940 count 1941 ); 1942 vm.activate_virtio_devices() 1943 .map_err(Error::ActivateVirtioDevices)?; 1944 } 1945 } 1946 EpollDispatch::Api => { 1947 // Consume the events. 1948 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1949 // Read from the API receiver channel 1950 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1951 1952 info!("API request event: {:?}", api_request); 1953 match api_request { 1954 ApiRequest::VmCreate(config, sender) => { 1955 let response = self 1956 .vm_create(config) 1957 .map_err(ApiError::VmCreate) 1958 .map(|_| ApiResponsePayload::Empty); 1959 1960 sender.send(response).map_err(Error::ApiResponseSend)?; 1961 } 1962 ApiRequest::VmDelete(sender) => { 1963 let response = self 1964 .vm_delete() 1965 .map_err(ApiError::VmDelete) 1966 .map(|_| ApiResponsePayload::Empty); 1967 1968 sender.send(response).map_err(Error::ApiResponseSend)?; 1969 } 1970 ApiRequest::VmBoot(sender) => { 1971 let response = self 1972 .vm_boot() 1973 .map_err(ApiError::VmBoot) 1974 .map(|_| ApiResponsePayload::Empty); 1975 1976 sender.send(response).map_err(Error::ApiResponseSend)?; 1977 } 1978 ApiRequest::VmShutdown(sender) => { 1979 let response = self 1980 .vm_shutdown() 1981 .map_err(ApiError::VmShutdown) 1982 .map(|_| ApiResponsePayload::Empty); 1983 1984 sender.send(response).map_err(Error::ApiResponseSend)?; 1985 } 1986 ApiRequest::VmReboot(sender) => { 1987 let response = self 1988 .vm_reboot() 1989 .map_err(ApiError::VmReboot) 1990 .map(|_| ApiResponsePayload::Empty); 1991 1992 sender.send(response).map_err(Error::ApiResponseSend)?; 1993 } 1994 ApiRequest::VmInfo(sender) => { 1995 let response = self 1996 .vm_info() 1997 .map_err(ApiError::VmInfo) 1998 .map(ApiResponsePayload::VmInfo); 1999 2000 sender.send(response).map_err(Error::ApiResponseSend)?; 2001 } 2002 ApiRequest::VmmPing(sender) => { 2003 let response = ApiResponsePayload::VmmPing(self.vmm_ping()); 2004 2005 sender.send(Ok(response)).map_err(Error::ApiResponseSend)?; 2006 } 2007 ApiRequest::VmPause(sender) => { 2008 let response = self 2009 .vm_pause() 2010 .map_err(ApiError::VmPause) 2011 .map(|_| ApiResponsePayload::Empty); 2012 2013 sender.send(response).map_err(Error::ApiResponseSend)?; 2014 } 2015 ApiRequest::VmResume(sender) => { 2016 let response = self 2017 .vm_resume() 2018 .map_err(ApiError::VmResume) 2019 .map(|_| ApiResponsePayload::Empty); 2020 2021 sender.send(response).map_err(Error::ApiResponseSend)?; 2022 } 2023 ApiRequest::VmSnapshot(snapshot_data, sender) => { 2024 let response = self 2025 .vm_snapshot(&snapshot_data.destination_url) 2026 .map_err(ApiError::VmSnapshot) 2027 .map(|_| ApiResponsePayload::Empty); 2028 2029 sender.send(response).map_err(Error::ApiResponseSend)?; 2030 } 2031 ApiRequest::VmRestore(restore_data, sender) => { 2032 let response = self 2033 .vm_restore(restore_data.as_ref().clone()) 2034 .map_err(ApiError::VmRestore) 2035 .map(|_| ApiResponsePayload::Empty); 2036 2037 sender.send(response).map_err(Error::ApiResponseSend)?; 2038 } 2039 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 2040 ApiRequest::VmCoredump(coredump_data, sender) => { 2041 let response = self 2042 .vm_coredump(&coredump_data.destination_url) 2043 .map_err(ApiError::VmCoredump) 2044 .map(|_| ApiResponsePayload::Empty); 2045 2046 sender.send(response).map_err(Error::ApiResponseSend)?; 2047 } 2048 ApiRequest::VmmShutdown(sender) => { 2049 let response = self 2050 .vmm_shutdown() 2051 .map_err(ApiError::VmmShutdown) 2052 .map(|_| ApiResponsePayload::Empty); 2053 2054 sender.send(response).map_err(Error::ApiResponseSend)?; 2055 2056 break 'outer; 2057 } 2058 ApiRequest::VmResize(resize_data, sender) => { 2059 let response = self 2060 .vm_resize( 2061 resize_data.desired_vcpus, 2062 resize_data.desired_ram, 2063 resize_data.desired_balloon, 2064 ) 2065 .map_err(ApiError::VmResize) 2066 .map(|_| ApiResponsePayload::Empty); 2067 sender.send(response).map_err(Error::ApiResponseSend)?; 2068 } 2069 ApiRequest::VmResizeZone(resize_zone_data, sender) => { 2070 let response = self 2071 .vm_resize_zone( 2072 resize_zone_data.id.clone(), 2073 resize_zone_data.desired_ram, 2074 ) 2075 .map_err(ApiError::VmResizeZone) 2076 .map(|_| ApiResponsePayload::Empty); 2077 sender.send(response).map_err(Error::ApiResponseSend)?; 2078 } 2079 ApiRequest::VmAddDevice(add_device_data, sender) => { 2080 let response = self 2081 .vm_add_device(add_device_data.as_ref().clone()) 2082 .map_err(ApiError::VmAddDevice) 2083 .map(ApiResponsePayload::VmAction); 2084 sender.send(response).map_err(Error::ApiResponseSend)?; 2085 } 2086 ApiRequest::VmAddUserDevice(add_device_data, sender) => { 2087 let response = self 2088 .vm_add_user_device(add_device_data.as_ref().clone()) 2089 .map_err(ApiError::VmAddUserDevice) 2090 .map(ApiResponsePayload::VmAction); 2091 sender.send(response).map_err(Error::ApiResponseSend)?; 2092 } 2093 ApiRequest::VmRemoveDevice(remove_device_data, sender) => { 2094 let response = self 2095 .vm_remove_device(remove_device_data.id.clone()) 2096 .map_err(ApiError::VmRemoveDevice) 2097 .map(|_| ApiResponsePayload::Empty); 2098 sender.send(response).map_err(Error::ApiResponseSend)?; 2099 } 2100 ApiRequest::VmAddDisk(add_disk_data, sender) => { 2101 let response = self 2102 .vm_add_disk(add_disk_data.as_ref().clone()) 2103 .map_err(ApiError::VmAddDisk) 2104 .map(ApiResponsePayload::VmAction); 2105 sender.send(response).map_err(Error::ApiResponseSend)?; 2106 } 2107 ApiRequest::VmAddFs(add_fs_data, sender) => { 2108 let response = self 2109 .vm_add_fs(add_fs_data.as_ref().clone()) 2110 .map_err(ApiError::VmAddFs) 2111 .map(ApiResponsePayload::VmAction); 2112 sender.send(response).map_err(Error::ApiResponseSend)?; 2113 } 2114 ApiRequest::VmAddPmem(add_pmem_data, sender) => { 2115 let response = self 2116 .vm_add_pmem(add_pmem_data.as_ref().clone()) 2117 .map_err(ApiError::VmAddPmem) 2118 .map(ApiResponsePayload::VmAction); 2119 sender.send(response).map_err(Error::ApiResponseSend)?; 2120 } 2121 ApiRequest::VmAddNet(add_net_data, sender) => { 2122 let response = self 2123 .vm_add_net(add_net_data.as_ref().clone()) 2124 .map_err(ApiError::VmAddNet) 2125 .map(ApiResponsePayload::VmAction); 2126 sender.send(response).map_err(Error::ApiResponseSend)?; 2127 } 2128 ApiRequest::VmAddVdpa(add_vdpa_data, sender) => { 2129 let response = self 2130 .vm_add_vdpa(add_vdpa_data.as_ref().clone()) 2131 .map_err(ApiError::VmAddVdpa) 2132 .map(ApiResponsePayload::VmAction); 2133 sender.send(response).map_err(Error::ApiResponseSend)?; 2134 } 2135 ApiRequest::VmAddVsock(add_vsock_data, sender) => { 2136 let response = self 2137 .vm_add_vsock(add_vsock_data.as_ref().clone()) 2138 .map_err(ApiError::VmAddVsock) 2139 .map(ApiResponsePayload::VmAction); 2140 sender.send(response).map_err(Error::ApiResponseSend)?; 2141 } 2142 ApiRequest::VmCounters(sender) => { 2143 let response = self 2144 .vm_counters() 2145 .map_err(ApiError::VmInfo) 2146 .map(ApiResponsePayload::VmAction); 2147 sender.send(response).map_err(Error::ApiResponseSend)?; 2148 } 2149 ApiRequest::VmReceiveMigration(receive_migration_data, sender) => { 2150 let response = self 2151 .vm_receive_migration( 2152 receive_migration_data.as_ref().clone(), 2153 ) 2154 .map_err(ApiError::VmReceiveMigration) 2155 .map(|_| ApiResponsePayload::Empty); 2156 sender.send(response).map_err(Error::ApiResponseSend)?; 2157 } 2158 ApiRequest::VmSendMigration(send_migration_data, sender) => { 2159 let response = self 2160 .vm_send_migration(send_migration_data.as_ref().clone()) 2161 .map_err(ApiError::VmSendMigration) 2162 .map(|_| ApiResponsePayload::Empty); 2163 sender.send(response).map_err(Error::ApiResponseSend)?; 2164 } 2165 ApiRequest::VmPowerButton(sender) => { 2166 let response = self 2167 .vm_power_button() 2168 .map_err(ApiError::VmPowerButton) 2169 .map(|_| ApiResponsePayload::Empty); 2170 2171 sender.send(response).map_err(Error::ApiResponseSend)?; 2172 } 2173 } 2174 } 2175 } 2176 #[cfg(feature = "guest_debug")] 2177 EpollDispatch::Debug => { 2178 // Consume the events. 2179 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 2180 // Read from the API receiver channel 2181 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 2182 2183 let response = if let Some(ref mut vm) = self.vm { 2184 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 2185 } else { 2186 Err(VmError::VmNotRunning) 2187 } 2188 .map_err(gdb::Error::Vm); 2189 2190 gdb_request 2191 .sender 2192 .send(response) 2193 .map_err(Error::GdbResponseSend)?; 2194 } 2195 } 2196 #[cfg(not(feature = "guest_debug"))] 2197 EpollDispatch::Debug => {} 2198 } 2199 } 2200 } 2201 2202 // Trigger the termination of the signal_handler thread 2203 if let Some(signals) = self.signals.take() { 2204 signals.close(); 2205 } 2206 2207 // Wait for all the threads to finish 2208 for thread in self.threads.drain(..) { 2209 thread.join().map_err(Error::ThreadCleanup)? 2210 } 2211 2212 Ok(()) 2213 } 2214 } 2215 2216 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2217 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2218 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2219 2220 #[cfg(test)] 2221 mod unit_tests { 2222 use super::*; 2223 use config::{ 2224 ConsoleConfig, ConsoleOutputMode, CpusConfig, HotplugMethod, MemoryConfig, PayloadConfig, 2225 RngConfig, VmConfig, 2226 }; 2227 2228 fn create_dummy_vmm() -> Vmm { 2229 Vmm::new( 2230 VmmVersionInfo::new("dummy", "dummy"), 2231 EventFd::new(EFD_NONBLOCK).unwrap(), 2232 #[cfg(feature = "guest_debug")] 2233 EventFd::new(EFD_NONBLOCK).unwrap(), 2234 #[cfg(feature = "guest_debug")] 2235 EventFd::new(EFD_NONBLOCK).unwrap(), 2236 SeccompAction::Allow, 2237 hypervisor::new().unwrap(), 2238 EventFd::new(EFD_NONBLOCK).unwrap(), 2239 ) 2240 .unwrap() 2241 } 2242 2243 fn create_dummy_vm_config() -> Arc<Mutex<VmConfig>> { 2244 Arc::new(Mutex::new(VmConfig { 2245 cpus: CpusConfig { 2246 boot_vcpus: 1, 2247 max_vcpus: 1, 2248 topology: None, 2249 kvm_hyperv: false, 2250 max_phys_bits: 46, 2251 affinity: None, 2252 features: config::CpuFeatures::default(), 2253 }, 2254 memory: MemoryConfig { 2255 size: 536_870_912, 2256 mergeable: false, 2257 hotplug_method: HotplugMethod::Acpi, 2258 hotplug_size: None, 2259 hotplugged_size: None, 2260 shared: true, 2261 hugepages: false, 2262 hugepage_size: None, 2263 prefault: false, 2264 zones: None, 2265 thp: true, 2266 }, 2267 payload: Some(PayloadConfig { 2268 kernel: Some(PathBuf::from("/path/to/kernel")), 2269 ..Default::default() 2270 }), 2271 disks: None, 2272 net: None, 2273 rng: RngConfig { 2274 src: PathBuf::from("/dev/urandom"), 2275 iommu: false, 2276 }, 2277 balloon: None, 2278 fs: None, 2279 pmem: None, 2280 serial: ConsoleConfig { 2281 file: None, 2282 mode: ConsoleOutputMode::Null, 2283 iommu: false, 2284 socket: None, 2285 }, 2286 console: ConsoleConfig { 2287 file: None, 2288 mode: ConsoleOutputMode::Tty, 2289 iommu: false, 2290 socket: None, 2291 }, 2292 devices: None, 2293 user_devices: None, 2294 vdpa: None, 2295 vsock: None, 2296 pvpanic: false, 2297 iommu: false, 2298 #[cfg(target_arch = "x86_64")] 2299 sgx_epc: None, 2300 numa: None, 2301 watchdog: false, 2302 #[cfg(feature = "guest_debug")] 2303 gdb: false, 2304 platform: None, 2305 tpm: None, 2306 preserved_fds: None, 2307 })) 2308 } 2309 2310 #[test] 2311 fn test_vmm_vm_create() { 2312 let mut vmm = create_dummy_vmm(); 2313 let config = create_dummy_vm_config(); 2314 2315 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2316 assert!(matches!( 2317 vmm.vm_create(config), 2318 Err(VmError::VmAlreadyCreated) 2319 )); 2320 } 2321 2322 #[test] 2323 fn test_vmm_vm_cold_add_device() { 2324 let mut vmm = create_dummy_vmm(); 2325 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2326 2327 assert!(matches!( 2328 vmm.vm_add_device(device_config.clone()), 2329 Err(VmError::VmNotCreated) 2330 )); 2331 2332 let _ = vmm.vm_create(create_dummy_vm_config()); 2333 assert!(vmm 2334 .vm_config 2335 .as_ref() 2336 .unwrap() 2337 .lock() 2338 .unwrap() 2339 .devices 2340 .is_none()); 2341 2342 let result = vmm.vm_add_device(device_config.clone()); 2343 assert!(result.is_ok()); 2344 assert!(result.unwrap().is_none()); 2345 assert_eq!( 2346 vmm.vm_config 2347 .as_ref() 2348 .unwrap() 2349 .lock() 2350 .unwrap() 2351 .devices 2352 .clone() 2353 .unwrap() 2354 .len(), 2355 1 2356 ); 2357 assert_eq!( 2358 vmm.vm_config 2359 .as_ref() 2360 .unwrap() 2361 .lock() 2362 .unwrap() 2363 .devices 2364 .clone() 2365 .unwrap()[0], 2366 device_config 2367 ); 2368 } 2369 2370 #[test] 2371 fn test_vmm_vm_cold_add_user_device() { 2372 let mut vmm = create_dummy_vmm(); 2373 let user_device_config = 2374 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2375 2376 assert!(matches!( 2377 vmm.vm_add_user_device(user_device_config.clone()), 2378 Err(VmError::VmNotCreated) 2379 )); 2380 2381 let _ = vmm.vm_create(create_dummy_vm_config()); 2382 assert!(vmm 2383 .vm_config 2384 .as_ref() 2385 .unwrap() 2386 .lock() 2387 .unwrap() 2388 .user_devices 2389 .is_none()); 2390 2391 let result = vmm.vm_add_user_device(user_device_config.clone()); 2392 assert!(result.is_ok()); 2393 assert!(result.unwrap().is_none()); 2394 assert_eq!( 2395 vmm.vm_config 2396 .as_ref() 2397 .unwrap() 2398 .lock() 2399 .unwrap() 2400 .user_devices 2401 .clone() 2402 .unwrap() 2403 .len(), 2404 1 2405 ); 2406 assert_eq!( 2407 vmm.vm_config 2408 .as_ref() 2409 .unwrap() 2410 .lock() 2411 .unwrap() 2412 .user_devices 2413 .clone() 2414 .unwrap()[0], 2415 user_device_config 2416 ); 2417 } 2418 2419 #[test] 2420 fn test_vmm_vm_cold_add_disk() { 2421 let mut vmm = create_dummy_vmm(); 2422 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2423 2424 assert!(matches!( 2425 vmm.vm_add_disk(disk_config.clone()), 2426 Err(VmError::VmNotCreated) 2427 )); 2428 2429 let _ = vmm.vm_create(create_dummy_vm_config()); 2430 assert!(vmm 2431 .vm_config 2432 .as_ref() 2433 .unwrap() 2434 .lock() 2435 .unwrap() 2436 .disks 2437 .is_none()); 2438 2439 let result = vmm.vm_add_disk(disk_config.clone()); 2440 assert!(result.is_ok()); 2441 assert!(result.unwrap().is_none()); 2442 assert_eq!( 2443 vmm.vm_config 2444 .as_ref() 2445 .unwrap() 2446 .lock() 2447 .unwrap() 2448 .disks 2449 .clone() 2450 .unwrap() 2451 .len(), 2452 1 2453 ); 2454 assert_eq!( 2455 vmm.vm_config 2456 .as_ref() 2457 .unwrap() 2458 .lock() 2459 .unwrap() 2460 .disks 2461 .clone() 2462 .unwrap()[0], 2463 disk_config 2464 ); 2465 } 2466 2467 #[test] 2468 fn test_vmm_vm_cold_add_fs() { 2469 let mut vmm = create_dummy_vmm(); 2470 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2471 2472 assert!(matches!( 2473 vmm.vm_add_fs(fs_config.clone()), 2474 Err(VmError::VmNotCreated) 2475 )); 2476 2477 let _ = vmm.vm_create(create_dummy_vm_config()); 2478 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2479 2480 let result = vmm.vm_add_fs(fs_config.clone()); 2481 assert!(result.is_ok()); 2482 assert!(result.unwrap().is_none()); 2483 assert_eq!( 2484 vmm.vm_config 2485 .as_ref() 2486 .unwrap() 2487 .lock() 2488 .unwrap() 2489 .fs 2490 .clone() 2491 .unwrap() 2492 .len(), 2493 1 2494 ); 2495 assert_eq!( 2496 vmm.vm_config 2497 .as_ref() 2498 .unwrap() 2499 .lock() 2500 .unwrap() 2501 .fs 2502 .clone() 2503 .unwrap()[0], 2504 fs_config 2505 ); 2506 } 2507 2508 #[test] 2509 fn test_vmm_vm_cold_add_pmem() { 2510 let mut vmm = create_dummy_vmm(); 2511 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2512 2513 assert!(matches!( 2514 vmm.vm_add_pmem(pmem_config.clone()), 2515 Err(VmError::VmNotCreated) 2516 )); 2517 2518 let _ = vmm.vm_create(create_dummy_vm_config()); 2519 assert!(vmm 2520 .vm_config 2521 .as_ref() 2522 .unwrap() 2523 .lock() 2524 .unwrap() 2525 .pmem 2526 .is_none()); 2527 2528 let result = vmm.vm_add_pmem(pmem_config.clone()); 2529 assert!(result.is_ok()); 2530 assert!(result.unwrap().is_none()); 2531 assert_eq!( 2532 vmm.vm_config 2533 .as_ref() 2534 .unwrap() 2535 .lock() 2536 .unwrap() 2537 .pmem 2538 .clone() 2539 .unwrap() 2540 .len(), 2541 1 2542 ); 2543 assert_eq!( 2544 vmm.vm_config 2545 .as_ref() 2546 .unwrap() 2547 .lock() 2548 .unwrap() 2549 .pmem 2550 .clone() 2551 .unwrap()[0], 2552 pmem_config 2553 ); 2554 } 2555 2556 #[test] 2557 fn test_vmm_vm_cold_add_net() { 2558 let mut vmm = create_dummy_vmm(); 2559 let net_config = NetConfig::parse( 2560 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2561 ) 2562 .unwrap(); 2563 2564 assert!(matches!( 2565 vmm.vm_add_net(net_config.clone()), 2566 Err(VmError::VmNotCreated) 2567 )); 2568 2569 let _ = vmm.vm_create(create_dummy_vm_config()); 2570 assert!(vmm 2571 .vm_config 2572 .as_ref() 2573 .unwrap() 2574 .lock() 2575 .unwrap() 2576 .net 2577 .is_none()); 2578 2579 let result = vmm.vm_add_net(net_config.clone()); 2580 assert!(result.is_ok()); 2581 assert!(result.unwrap().is_none()); 2582 assert_eq!( 2583 vmm.vm_config 2584 .as_ref() 2585 .unwrap() 2586 .lock() 2587 .unwrap() 2588 .net 2589 .clone() 2590 .unwrap() 2591 .len(), 2592 1 2593 ); 2594 assert_eq!( 2595 vmm.vm_config 2596 .as_ref() 2597 .unwrap() 2598 .lock() 2599 .unwrap() 2600 .net 2601 .clone() 2602 .unwrap()[0], 2603 net_config 2604 ); 2605 } 2606 2607 #[test] 2608 fn test_vmm_vm_cold_add_vdpa() { 2609 let mut vmm = create_dummy_vmm(); 2610 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2611 2612 assert!(matches!( 2613 vmm.vm_add_vdpa(vdpa_config.clone()), 2614 Err(VmError::VmNotCreated) 2615 )); 2616 2617 let _ = vmm.vm_create(create_dummy_vm_config()); 2618 assert!(vmm 2619 .vm_config 2620 .as_ref() 2621 .unwrap() 2622 .lock() 2623 .unwrap() 2624 .vdpa 2625 .is_none()); 2626 2627 let result = vmm.vm_add_vdpa(vdpa_config.clone()); 2628 assert!(result.is_ok()); 2629 assert!(result.unwrap().is_none()); 2630 assert_eq!( 2631 vmm.vm_config 2632 .as_ref() 2633 .unwrap() 2634 .lock() 2635 .unwrap() 2636 .vdpa 2637 .clone() 2638 .unwrap() 2639 .len(), 2640 1 2641 ); 2642 assert_eq!( 2643 vmm.vm_config 2644 .as_ref() 2645 .unwrap() 2646 .lock() 2647 .unwrap() 2648 .vdpa 2649 .clone() 2650 .unwrap()[0], 2651 vdpa_config 2652 ); 2653 } 2654 2655 #[test] 2656 fn test_vmm_vm_cold_add_vsock() { 2657 let mut vmm = create_dummy_vmm(); 2658 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=1,iommu=on").unwrap(); 2659 2660 assert!(matches!( 2661 vmm.vm_add_vsock(vsock_config.clone()), 2662 Err(VmError::VmNotCreated) 2663 )); 2664 2665 let _ = vmm.vm_create(create_dummy_vm_config()); 2666 assert!(vmm 2667 .vm_config 2668 .as_ref() 2669 .unwrap() 2670 .lock() 2671 .unwrap() 2672 .vsock 2673 .is_none()); 2674 2675 let result = vmm.vm_add_vsock(vsock_config.clone()); 2676 assert!(result.is_ok()); 2677 assert!(result.unwrap().is_none()); 2678 assert_eq!( 2679 vmm.vm_config 2680 .as_ref() 2681 .unwrap() 2682 .lock() 2683 .unwrap() 2684 .vsock 2685 .clone() 2686 .unwrap(), 2687 vsock_config 2688 ); 2689 } 2690 } 2691