1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 15 use std::os::unix::net::{UnixListener, UnixStream}; 16 use std::panic::AssertUnwindSafe; 17 use std::path::PathBuf; 18 use std::rc::Rc; 19 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 use std::{io, result, thread}; 23 24 use anyhow::anyhow; 25 #[cfg(feature = "dbus_api")] 26 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 27 use api::http::HttpApiHandle; 28 use console_devices::{pre_create_console_devices, ConsoleInfo}; 29 use landlock::LandlockError; 30 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 31 use memory_manager::MemoryManagerSnapshotData; 32 use pci::PciBdf; 33 use seccompiler::{apply_filter, SeccompAction}; 34 use serde::ser::{SerializeStruct, Serializer}; 35 use serde::{Deserialize, Serialize}; 36 use signal_hook::iterator::{Handle, Signals}; 37 use thiserror::Error; 38 use tracer::trace_scoped; 39 use vm_memory::bitmap::AtomicBitmap; 40 use vm_memory::{ReadVolatile, WriteVolatile}; 41 use vm_migration::protocol::*; 42 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 43 use vmm_sys_util::eventfd::EventFd; 44 use vmm_sys_util::signal::unblock_signal; 45 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 46 47 use crate::api::{ 48 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 49 VmSendMigrationData, VmmPingResponse, 50 }; 51 use crate::config::{add_to_config, RestoreConfig}; 52 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 53 use crate::coredump::GuestDebuggable; 54 use crate::landlock::Landlock; 55 use crate::memory_manager::MemoryManager; 56 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 57 use crate::migration::get_vm_snapshot; 58 use crate::migration::{recv_vm_config, recv_vm_state}; 59 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 60 use crate::vm::{Error as VmError, Vm, VmState}; 61 use crate::vm_config::{ 62 DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, 63 VmConfig, VsockConfig, 64 }; 65 66 mod acpi; 67 pub mod api; 68 mod clone3; 69 pub mod config; 70 pub mod console_devices; 71 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 72 mod coredump; 73 pub mod cpu; 74 pub mod device_manager; 75 pub mod device_tree; 76 #[cfg(feature = "guest_debug")] 77 mod gdb; 78 #[cfg(feature = "igvm")] 79 mod igvm; 80 pub mod interrupt; 81 pub mod landlock; 82 pub mod memory_manager; 83 pub mod migration; 84 mod pci_segment; 85 pub mod seccomp_filters; 86 mod serial_manager; 87 mod sigwinch_listener; 88 pub mod vm; 89 pub mod vm_config; 90 91 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 92 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 93 94 /// Errors associated with VMM management 95 #[derive(Debug, Error)] 96 pub enum Error { 97 /// API request receive error 98 #[error("Error receiving API request: {0}")] 99 ApiRequestRecv(#[source] RecvError), 100 101 /// API response send error 102 #[error("Error sending API request: {0}")] 103 ApiResponseSend(#[source] SendError<ApiResponse>), 104 105 /// Cannot bind to the UNIX domain socket path 106 #[error("Error binding to UNIX domain socket: {0}")] 107 Bind(#[source] io::Error), 108 109 /// Cannot clone EventFd. 110 #[error("Error cloning EventFd: {0}")] 111 EventFdClone(#[source] io::Error), 112 113 /// Cannot create EventFd. 114 #[error("Error creating EventFd: {0}")] 115 EventFdCreate(#[source] io::Error), 116 117 /// Cannot read from EventFd. 118 #[error("Error reading from EventFd: {0}")] 119 EventFdRead(#[source] io::Error), 120 121 /// Cannot create epoll context. 122 #[error("Error creating epoll context: {0}")] 123 Epoll(#[source] io::Error), 124 125 /// Cannot create HTTP thread 126 #[error("Error spawning HTTP thread: {0}")] 127 HttpThreadSpawn(#[source] io::Error), 128 129 /// Cannot create D-Bus thread 130 #[cfg(feature = "dbus_api")] 131 #[error("Error spawning D-Bus thread: {0}")] 132 DBusThreadSpawn(#[source] io::Error), 133 134 /// Cannot start D-Bus session 135 #[cfg(feature = "dbus_api")] 136 #[error("Error starting D-Bus session: {0}")] 137 CreateDBusSession(#[source] zbus::Error), 138 139 /// Cannot create `event-monitor` thread 140 #[error("Error spawning `event-monitor` thread: {0}")] 141 EventMonitorThreadSpawn(#[source] io::Error), 142 143 /// Cannot handle the VM STDIN stream 144 #[error("Error handling VM stdin: {0:?}")] 145 Stdin(VmError), 146 147 /// Cannot handle the VM pty stream 148 #[error("Error handling VM pty: {0:?}")] 149 Pty(VmError), 150 151 /// Cannot reboot the VM 152 #[error("Error rebooting VM: {0:?}")] 153 VmReboot(VmError), 154 155 /// Cannot create VMM thread 156 #[error("Error spawning VMM thread {0:?}")] 157 VmmThreadSpawn(#[source] io::Error), 158 159 /// Cannot shut the VMM down 160 #[error("Error shutting down VMM: {0:?}")] 161 VmmShutdown(VmError), 162 163 /// Cannot create seccomp filter 164 #[error("Error creating seccomp filter: {0}")] 165 CreateSeccompFilter(seccompiler::Error), 166 167 /// Cannot apply seccomp filter 168 #[error("Error applying seccomp filter: {0}")] 169 ApplySeccompFilter(seccompiler::Error), 170 171 /// Error activating virtio devices 172 #[error("Error activating virtio devices: {0:?}")] 173 ActivateVirtioDevices(VmError), 174 175 /// Error creating API server 176 #[error("Error creating API server {0:?}")] 177 CreateApiServer(micro_http::ServerError), 178 179 /// Error binding API server socket 180 #[error("Error creation API server's socket {0:?}")] 181 CreateApiServerSocket(#[source] io::Error), 182 183 #[cfg(feature = "guest_debug")] 184 #[error("Failed to start the GDB thread: {0}")] 185 GdbThreadSpawn(io::Error), 186 187 /// GDB request receive error 188 #[cfg(feature = "guest_debug")] 189 #[error("Error receiving GDB request: {0}")] 190 GdbRequestRecv(#[source] RecvError), 191 192 /// GDB response send error 193 #[cfg(feature = "guest_debug")] 194 #[error("Error sending GDB request: {0}")] 195 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 196 197 #[error("Cannot spawn a signal handler thread: {0}")] 198 SignalHandlerSpawn(#[source] io::Error), 199 200 #[error("Failed to join on threads: {0:?}")] 201 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 202 203 /// Cannot create Landlock object 204 #[error("Error creating landlock object: {0}")] 205 CreateLandlock(LandlockError), 206 207 /// Cannot apply landlock based sandboxing 208 #[error("Error applying landlock: {0}")] 209 ApplyLandlock(LandlockError), 210 } 211 pub type Result<T> = result::Result<T, Error>; 212 213 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 214 #[repr(u64)] 215 pub enum EpollDispatch { 216 Exit = 0, 217 Reset = 1, 218 Api = 2, 219 ActivateVirtioDevices = 3, 220 Debug = 4, 221 Unknown, 222 } 223 224 impl From<u64> for EpollDispatch { 225 fn from(v: u64) -> Self { 226 use EpollDispatch::*; 227 match v { 228 0 => Exit, 229 1 => Reset, 230 2 => Api, 231 3 => ActivateVirtioDevices, 232 4 => Debug, 233 _ => Unknown, 234 } 235 } 236 } 237 238 pub struct EpollContext { 239 epoll_file: File, 240 } 241 242 impl EpollContext { 243 pub fn new() -> result::Result<EpollContext, io::Error> { 244 let epoll_fd = epoll::create(true)?; 245 // Use 'File' to enforce closing on 'epoll_fd' 246 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 247 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 248 249 Ok(EpollContext { epoll_file }) 250 } 251 252 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 253 where 254 T: AsRawFd, 255 { 256 let dispatch_index = token as u64; 257 epoll::ctl( 258 self.epoll_file.as_raw_fd(), 259 epoll::ControlOptions::EPOLL_CTL_ADD, 260 fd.as_raw_fd(), 261 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 262 )?; 263 264 Ok(()) 265 } 266 267 #[cfg(fuzzing)] 268 pub fn add_event_custom<T>( 269 &mut self, 270 fd: &T, 271 id: u64, 272 evts: epoll::Events, 273 ) -> result::Result<(), io::Error> 274 where 275 T: AsRawFd, 276 { 277 epoll::ctl( 278 self.epoll_file.as_raw_fd(), 279 epoll::ControlOptions::EPOLL_CTL_ADD, 280 fd.as_raw_fd(), 281 epoll::Event::new(evts, id), 282 )?; 283 284 Ok(()) 285 } 286 } 287 288 impl AsRawFd for EpollContext { 289 fn as_raw_fd(&self) -> RawFd { 290 self.epoll_file.as_raw_fd() 291 } 292 } 293 294 pub struct PciDeviceInfo { 295 pub id: String, 296 pub bdf: PciBdf, 297 } 298 299 impl Serialize for PciDeviceInfo { 300 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 301 where 302 S: Serializer, 303 { 304 let bdf_str = self.bdf.to_string(); 305 306 // Serialize the structure. 307 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 308 state.serialize_field("id", &self.id)?; 309 state.serialize_field("bdf", &bdf_str)?; 310 state.end() 311 } 312 } 313 314 pub fn feature_list() -> Vec<String> { 315 vec![ 316 #[cfg(feature = "dbus_api")] 317 "dbus_api".to_string(), 318 #[cfg(feature = "dhat-heap")] 319 "dhat-heap".to_string(), 320 #[cfg(feature = "guest_debug")] 321 "guest_debug".to_string(), 322 #[cfg(feature = "igvm")] 323 "igvm".to_string(), 324 #[cfg(feature = "io_uring")] 325 "io_uring".to_string(), 326 #[cfg(feature = "kvm")] 327 "kvm".to_string(), 328 #[cfg(feature = "mshv")] 329 "mshv".to_string(), 330 #[cfg(feature = "sev_snp")] 331 "sev_snp".to_string(), 332 #[cfg(feature = "tdx")] 333 "tdx".to_string(), 334 #[cfg(feature = "tracing")] 335 "tracing".to_string(), 336 ] 337 } 338 339 pub fn start_event_monitor_thread( 340 mut monitor: event_monitor::Monitor, 341 seccomp_action: &SeccompAction, 342 landlock_enable: bool, 343 hypervisor_type: hypervisor::HypervisorType, 344 exit_event: EventFd, 345 ) -> Result<thread::JoinHandle<Result<()>>> { 346 // Retrieve seccomp filter 347 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 348 .map_err(Error::CreateSeccompFilter)?; 349 350 thread::Builder::new() 351 .name("event-monitor".to_owned()) 352 .spawn(move || { 353 // Apply seccomp filter 354 if !seccomp_filter.is_empty() { 355 apply_filter(&seccomp_filter) 356 .map_err(Error::ApplySeccompFilter) 357 .map_err(|e| { 358 error!("Error applying seccomp filter: {:?}", e); 359 exit_event.write(1).ok(); 360 e 361 })?; 362 } 363 if landlock_enable { 364 Landlock::new() 365 .map_err(Error::CreateLandlock)? 366 .restrict_self() 367 .map_err(Error::ApplyLandlock) 368 .map_err(|e| { 369 error!("Error applying landlock to event monitor thread: {:?}", e); 370 exit_event.write(1).ok(); 371 e 372 })?; 373 } 374 375 std::panic::catch_unwind(AssertUnwindSafe(move || { 376 while let Ok(event) = monitor.rx.recv() { 377 let event = Arc::new(event); 378 379 if let Some(ref mut file) = monitor.file { 380 file.write_all(event.as_bytes().as_ref()).ok(); 381 file.write_all(b"\n\n").ok(); 382 } 383 384 for tx in monitor.broadcast.iter() { 385 tx.send(event.clone()).ok(); 386 } 387 } 388 })) 389 .map_err(|_| { 390 error!("`event-monitor` thread panicked"); 391 exit_event.write(1).ok(); 392 }) 393 .ok(); 394 395 Ok(()) 396 }) 397 .map_err(Error::EventMonitorThreadSpawn) 398 } 399 400 #[allow(unused_variables)] 401 #[allow(clippy::too_many_arguments)] 402 pub fn start_vmm_thread( 403 vmm_version: VmmVersionInfo, 404 http_path: &Option<String>, 405 http_fd: Option<RawFd>, 406 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 407 api_event: EventFd, 408 api_sender: Sender<ApiRequest>, 409 api_receiver: Receiver<ApiRequest>, 410 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 411 #[cfg(feature = "guest_debug")] debug_event: EventFd, 412 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 413 exit_event: EventFd, 414 seccomp_action: &SeccompAction, 415 hypervisor: Arc<dyn hypervisor::Hypervisor>, 416 landlock_enable: bool, 417 ) -> Result<VmmThreadHandle> { 418 #[cfg(feature = "guest_debug")] 419 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 420 #[cfg(feature = "guest_debug")] 421 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 422 #[cfg(feature = "guest_debug")] 423 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 424 #[cfg(feature = "guest_debug")] 425 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 426 427 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 428 let hypervisor_type = hypervisor.hypervisor_type(); 429 430 // Retrieve seccomp filter 431 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 432 .map_err(Error::CreateSeccompFilter)?; 433 434 let vmm_seccomp_action = seccomp_action.clone(); 435 let thread = { 436 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 437 thread::Builder::new() 438 .name("vmm".to_string()) 439 .spawn(move || { 440 // Apply seccomp filter for VMM thread. 441 if !vmm_seccomp_filter.is_empty() { 442 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 443 } 444 445 let mut vmm = Vmm::new( 446 vmm_version, 447 api_event, 448 #[cfg(feature = "guest_debug")] 449 debug_event, 450 #[cfg(feature = "guest_debug")] 451 vm_debug_event, 452 vmm_seccomp_action, 453 hypervisor, 454 exit_event, 455 )?; 456 457 vmm.setup_signal_handler(landlock_enable)?; 458 459 vmm.control_loop( 460 Rc::new(api_receiver), 461 #[cfg(feature = "guest_debug")] 462 Rc::new(gdb_receiver), 463 ) 464 }) 465 .map_err(Error::VmmThreadSpawn)? 466 }; 467 468 // The VMM thread is started, we can start the dbus thread 469 // and start serving HTTP requests 470 #[cfg(feature = "dbus_api")] 471 let dbus_shutdown_chs = match dbus_options { 472 Some(opts) => { 473 let (_, chs) = api::start_dbus_thread( 474 opts, 475 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 476 api_sender.clone(), 477 seccomp_action, 478 exit_event.try_clone().map_err(Error::EventFdClone)?, 479 hypervisor_type, 480 )?; 481 Some(chs) 482 } 483 None => None, 484 }; 485 486 let http_api_handle = if let Some(http_path) = http_path { 487 Some(api::start_http_path_thread( 488 http_path, 489 api_event_clone, 490 api_sender, 491 seccomp_action, 492 exit_event, 493 hypervisor_type, 494 landlock_enable, 495 )?) 496 } else if let Some(http_fd) = http_fd { 497 Some(api::start_http_fd_thread( 498 http_fd, 499 api_event_clone, 500 api_sender, 501 seccomp_action, 502 exit_event, 503 hypervisor_type, 504 landlock_enable, 505 )?) 506 } else { 507 None 508 }; 509 510 #[cfg(feature = "guest_debug")] 511 if let Some(debug_path) = debug_path { 512 let target = gdb::GdbStub::new( 513 gdb_sender, 514 gdb_debug_event, 515 gdb_vm_debug_event, 516 gdb_hw_breakpoints, 517 ); 518 thread::Builder::new() 519 .name("gdb".to_owned()) 520 .spawn(move || gdb::gdb_thread(target, &debug_path)) 521 .map_err(Error::GdbThreadSpawn)?; 522 } 523 524 Ok(VmmThreadHandle { 525 thread_handle: thread, 526 #[cfg(feature = "dbus_api")] 527 dbus_shutdown_chs, 528 http_api_handle, 529 }) 530 } 531 532 #[derive(Clone, Deserialize, Serialize)] 533 struct VmMigrationConfig { 534 vm_config: Arc<Mutex<VmConfig>>, 535 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 536 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 537 memory_manager_data: MemoryManagerSnapshotData, 538 } 539 540 #[derive(Debug, Clone)] 541 pub struct VmmVersionInfo { 542 pub build_version: String, 543 pub version: String, 544 } 545 546 impl VmmVersionInfo { 547 pub fn new(build_version: &str, version: &str) -> Self { 548 Self { 549 build_version: build_version.to_owned(), 550 version: version.to_owned(), 551 } 552 } 553 } 554 555 pub struct VmmThreadHandle { 556 pub thread_handle: thread::JoinHandle<Result<()>>, 557 #[cfg(feature = "dbus_api")] 558 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 559 pub http_api_handle: Option<HttpApiHandle>, 560 } 561 562 pub struct Vmm { 563 epoll: EpollContext, 564 exit_evt: EventFd, 565 reset_evt: EventFd, 566 api_evt: EventFd, 567 #[cfg(feature = "guest_debug")] 568 debug_evt: EventFd, 569 #[cfg(feature = "guest_debug")] 570 vm_debug_evt: EventFd, 571 version: VmmVersionInfo, 572 vm: Option<Vm>, 573 vm_config: Option<Arc<Mutex<VmConfig>>>, 574 seccomp_action: SeccompAction, 575 hypervisor: Arc<dyn hypervisor::Hypervisor>, 576 activate_evt: EventFd, 577 signals: Option<Handle>, 578 threads: Vec<thread::JoinHandle<()>>, 579 original_termios_opt: Arc<Mutex<Option<termios>>>, 580 console_resize_pipe: Option<Arc<File>>, 581 console_info: Option<ConsoleInfo>, 582 } 583 584 impl Vmm { 585 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 586 587 fn signal_handler( 588 mut signals: Signals, 589 original_termios_opt: Arc<Mutex<Option<termios>>>, 590 exit_evt: &EventFd, 591 ) { 592 for sig in &Self::HANDLED_SIGNALS { 593 unblock_signal(*sig).unwrap(); 594 } 595 596 for signal in signals.forever() { 597 match signal { 598 SIGTERM | SIGINT => { 599 if exit_evt.write(1).is_err() { 600 // Resetting the terminal is usually done as the VMM exits 601 if let Ok(lock) = original_termios_opt.lock() { 602 if let Some(termios) = *lock { 603 // SAFETY: FFI call 604 let _ = unsafe { 605 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 606 }; 607 } 608 } else { 609 warn!("Failed to lock original termios"); 610 } 611 612 std::process::exit(1); 613 } 614 } 615 _ => (), 616 } 617 } 618 } 619 620 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 621 let signals = Signals::new(Self::HANDLED_SIGNALS); 622 match signals { 623 Ok(signals) => { 624 self.signals = Some(signals.handle()); 625 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 626 let original_termios_opt = Arc::clone(&self.original_termios_opt); 627 628 let signal_handler_seccomp_filter = get_seccomp_filter( 629 &self.seccomp_action, 630 Thread::SignalHandler, 631 self.hypervisor.hypervisor_type(), 632 ) 633 .map_err(Error::CreateSeccompFilter)?; 634 self.threads.push( 635 thread::Builder::new() 636 .name("vmm_signal_handler".to_string()) 637 .spawn(move || { 638 if !signal_handler_seccomp_filter.is_empty() { 639 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 640 .map_err(Error::ApplySeccompFilter) 641 { 642 error!("Error applying seccomp filter: {:?}", e); 643 exit_evt.write(1).ok(); 644 return; 645 } 646 } 647 if landlock_enable{ 648 match Landlock::new() { 649 Ok(landlock) => { 650 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 651 error!("Error applying Landlock to signal handler thread: {:?}", e); 652 exit_evt.write(1).ok(); 653 }); 654 } 655 Err(e) => { 656 error!("Error creating Landlock object: {:?}", e); 657 exit_evt.write(1).ok(); 658 } 659 }; 660 } 661 662 std::panic::catch_unwind(AssertUnwindSafe(|| { 663 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 664 })) 665 .map_err(|_| { 666 error!("vmm signal_handler thread panicked"); 667 exit_evt.write(1).ok() 668 }) 669 .ok(); 670 }) 671 .map_err(Error::SignalHandlerSpawn)?, 672 ); 673 } 674 Err(e) => error!("Signal not found {}", e), 675 } 676 Ok(()) 677 } 678 679 #[allow(clippy::too_many_arguments)] 680 fn new( 681 vmm_version: VmmVersionInfo, 682 api_evt: EventFd, 683 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 684 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 685 seccomp_action: SeccompAction, 686 hypervisor: Arc<dyn hypervisor::Hypervisor>, 687 exit_evt: EventFd, 688 ) -> Result<Self> { 689 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 690 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 691 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 692 693 epoll 694 .add_event(&exit_evt, EpollDispatch::Exit) 695 .map_err(Error::Epoll)?; 696 697 epoll 698 .add_event(&reset_evt, EpollDispatch::Reset) 699 .map_err(Error::Epoll)?; 700 701 epoll 702 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 703 .map_err(Error::Epoll)?; 704 705 epoll 706 .add_event(&api_evt, EpollDispatch::Api) 707 .map_err(Error::Epoll)?; 708 709 #[cfg(feature = "guest_debug")] 710 epoll 711 .add_event(&debug_evt, EpollDispatch::Debug) 712 .map_err(Error::Epoll)?; 713 714 Ok(Vmm { 715 epoll, 716 exit_evt, 717 reset_evt, 718 api_evt, 719 #[cfg(feature = "guest_debug")] 720 debug_evt, 721 #[cfg(feature = "guest_debug")] 722 vm_debug_evt, 723 version: vmm_version, 724 vm: None, 725 vm_config: None, 726 seccomp_action, 727 hypervisor, 728 activate_evt, 729 signals: None, 730 threads: vec![], 731 original_termios_opt: Arc::new(Mutex::new(None)), 732 console_resize_pipe: None, 733 console_info: None, 734 }) 735 } 736 737 fn vm_receive_config<T>( 738 &mut self, 739 req: &Request, 740 socket: &mut T, 741 existing_memory_files: Option<HashMap<u32, File>>, 742 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 743 where 744 T: Read + Write, 745 { 746 // Read in config data along with memory manager data 747 let mut data: Vec<u8> = Vec::new(); 748 data.resize_with(req.length() as usize, Default::default); 749 socket 750 .read_exact(&mut data) 751 .map_err(MigratableError::MigrateSocket)?; 752 753 let vm_migration_config: VmMigrationConfig = 754 serde_json::from_slice(&data).map_err(|e| { 755 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 756 })?; 757 758 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 759 self.vm_check_cpuid_compatibility( 760 &vm_migration_config.vm_config, 761 &vm_migration_config.common_cpuid, 762 )?; 763 764 let config = vm_migration_config.vm_config.clone(); 765 self.vm_config = Some(vm_migration_config.vm_config); 766 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 767 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 768 })?); 769 770 if self 771 .vm_config 772 .as_ref() 773 .unwrap() 774 .lock() 775 .unwrap() 776 .landlock_enable 777 { 778 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 779 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 780 })?; 781 } 782 783 let vm = Vm::create_hypervisor_vm( 784 &self.hypervisor, 785 #[cfg(feature = "tdx")] 786 false, 787 #[cfg(feature = "sev_snp")] 788 false, 789 #[cfg(feature = "sev_snp")] 790 config.lock().unwrap().memory.total_size(), 791 ) 792 .map_err(|e| { 793 MigratableError::MigrateReceive(anyhow!( 794 "Error creating hypervisor VM from snapshot: {:?}", 795 e 796 )) 797 })?; 798 799 let phys_bits = 800 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 801 802 let memory_manager = MemoryManager::new( 803 vm, 804 &config.lock().unwrap().memory.clone(), 805 None, 806 phys_bits, 807 #[cfg(feature = "tdx")] 808 false, 809 Some(&vm_migration_config.memory_manager_data), 810 existing_memory_files, 811 #[cfg(target_arch = "x86_64")] 812 None, 813 ) 814 .map_err(|e| { 815 MigratableError::MigrateReceive(anyhow!( 816 "Error creating MemoryManager from snapshot: {:?}", 817 e 818 )) 819 })?; 820 821 Response::ok().write_to(socket)?; 822 823 Ok(memory_manager) 824 } 825 826 fn vm_receive_state<T>( 827 &mut self, 828 req: &Request, 829 socket: &mut T, 830 mm: Arc<Mutex<MemoryManager>>, 831 ) -> std::result::Result<(), MigratableError> 832 where 833 T: Read + Write, 834 { 835 // Read in state data 836 let mut data: Vec<u8> = Vec::new(); 837 data.resize_with(req.length() as usize, Default::default); 838 socket 839 .read_exact(&mut data) 840 .map_err(MigratableError::MigrateSocket)?; 841 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 842 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 843 })?; 844 845 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 846 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 847 })?; 848 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 849 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 850 })?; 851 #[cfg(feature = "guest_debug")] 852 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 853 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 854 })?; 855 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 856 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 857 })?; 858 859 let timestamp = Instant::now(); 860 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 861 let mut vm = Vm::new_from_memory_manager( 862 self.vm_config.clone().unwrap(), 863 mm, 864 hypervisor_vm, 865 exit_evt, 866 reset_evt, 867 #[cfg(feature = "guest_debug")] 868 debug_evt, 869 &self.seccomp_action, 870 self.hypervisor.clone(), 871 activate_evt, 872 timestamp, 873 self.console_info.clone(), 874 self.console_resize_pipe.clone(), 875 Arc::clone(&self.original_termios_opt), 876 Some(snapshot), 877 ) 878 .map_err(|e| { 879 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 880 })?; 881 882 // Create VM 883 vm.restore().map_err(|e| { 884 Response::error().write_to(socket).ok(); 885 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 886 })?; 887 self.vm = Some(vm); 888 889 Response::ok().write_to(socket)?; 890 891 Ok(()) 892 } 893 894 fn vm_receive_memory<T>( 895 &mut self, 896 req: &Request, 897 socket: &mut T, 898 memory_manager: &mut MemoryManager, 899 ) -> std::result::Result<(), MigratableError> 900 where 901 T: Read + ReadVolatile + Write, 902 { 903 // Read table 904 let table = MemoryRangeTable::read_from(socket, req.length())?; 905 906 // And then read the memory itself 907 memory_manager 908 .receive_memory_regions(&table, socket) 909 .inspect_err(|_| { 910 Response::error().write_to(socket).ok(); 911 })?; 912 Response::ok().write_to(socket)?; 913 Ok(()) 914 } 915 916 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 917 url.strip_prefix("unix:") 918 .ok_or_else(|| { 919 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 920 }) 921 .map(|s| s.into()) 922 } 923 924 // Returns true if there were dirty pages to send 925 fn vm_maybe_send_dirty_pages<T>( 926 vm: &mut Vm, 927 socket: &mut T, 928 ) -> result::Result<bool, MigratableError> 929 where 930 T: Read + Write + WriteVolatile, 931 { 932 // Send (dirty) memory table 933 let table = vm.dirty_log()?; 934 935 // But if there are no regions go straight to pause 936 if table.regions().is_empty() { 937 return Ok(false); 938 } 939 940 Request::memory(table.length()).write_to(socket).unwrap(); 941 table.write_to(socket)?; 942 // And then the memory itself 943 vm.send_memory_regions(&table, socket)?; 944 Response::read_from(socket)?.ok_or_abandon( 945 socket, 946 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 947 )?; 948 949 Ok(true) 950 } 951 952 fn send_migration( 953 vm: &mut Vm, 954 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 955 dyn hypervisor::Hypervisor, 956 >, 957 send_data_migration: VmSendMigrationData, 958 ) -> result::Result<(), MigratableError> { 959 let path = Self::socket_url_to_path(&send_data_migration.destination_url)?; 960 let mut socket = UnixStream::connect(path).map_err(|e| { 961 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 962 })?; 963 964 // Start the migration 965 Request::start().write_to(&mut socket)?; 966 Response::read_from(&mut socket)?.ok_or_abandon( 967 &mut socket, 968 MigratableError::MigrateSend(anyhow!("Error starting migration")), 969 )?; 970 971 // Send config 972 let vm_config = vm.get_config(); 973 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 974 let common_cpuid = { 975 #[cfg(feature = "tdx")] 976 if vm_config.lock().unwrap().is_tdx_enabled() { 977 return Err(MigratableError::MigrateSend(anyhow!( 978 "Live Migration is not supported when TDX is enabled" 979 ))); 980 }; 981 982 let amx = vm_config.lock().unwrap().cpus.features.amx; 983 let phys_bits = 984 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 985 arch::generate_common_cpuid( 986 &hypervisor, 987 &arch::CpuidConfig { 988 sgx_epc_sections: None, 989 phys_bits, 990 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 991 #[cfg(feature = "tdx")] 992 tdx: false, 993 amx, 994 }, 995 ) 996 .map_err(|e| { 997 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 998 })? 999 }; 1000 1001 if send_data_migration.local { 1002 vm.send_memory_fds(&mut socket)?; 1003 } 1004 1005 let vm_migration_config = VmMigrationConfig { 1006 vm_config, 1007 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1008 common_cpuid, 1009 memory_manager_data: vm.memory_manager_data(), 1010 }; 1011 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1012 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1013 socket 1014 .write_all(&config_data) 1015 .map_err(MigratableError::MigrateSocket)?; 1016 Response::read_from(&mut socket)?.ok_or_abandon( 1017 &mut socket, 1018 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1019 )?; 1020 1021 // Let every Migratable object know about the migration being started. 1022 vm.start_migration()?; 1023 1024 if send_data_migration.local { 1025 // Now pause VM 1026 vm.pause()?; 1027 } else { 1028 // Start logging dirty pages 1029 vm.start_dirty_log()?; 1030 1031 // Send memory table 1032 let table = vm.memory_range_table()?; 1033 Request::memory(table.length()) 1034 .write_to(&mut socket) 1035 .unwrap(); 1036 table.write_to(&mut socket)?; 1037 // And then the memory itself 1038 vm.send_memory_regions(&table, &mut socket)?; 1039 Response::read_from(&mut socket)?.ok_or_abandon( 1040 &mut socket, 1041 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1042 )?; 1043 1044 // Try at most 5 passes of dirty memory sending 1045 const MAX_DIRTY_MIGRATIONS: usize = 5; 1046 for i in 0..MAX_DIRTY_MIGRATIONS { 1047 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1048 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1049 break; 1050 } 1051 } 1052 1053 // Now pause VM 1054 vm.pause()?; 1055 1056 // Send last batch of dirty pages 1057 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1058 1059 // Stop logging dirty pages 1060 vm.stop_dirty_log()?; 1061 } 1062 // Capture snapshot and send it 1063 let vm_snapshot = vm.snapshot()?; 1064 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1065 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1066 socket 1067 .write_all(&snapshot_data) 1068 .map_err(MigratableError::MigrateSocket)?; 1069 Response::read_from(&mut socket)?.ok_or_abandon( 1070 &mut socket, 1071 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1072 )?; 1073 // Complete the migration 1074 Request::complete().write_to(&mut socket)?; 1075 Response::read_from(&mut socket)?.ok_or_abandon( 1076 &mut socket, 1077 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1078 )?; 1079 1080 info!("Migration complete"); 1081 1082 // Let every Migratable object know about the migration being complete 1083 vm.complete_migration() 1084 } 1085 1086 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1087 fn vm_check_cpuid_compatibility( 1088 &self, 1089 src_vm_config: &Arc<Mutex<VmConfig>>, 1090 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1091 ) -> result::Result<(), MigratableError> { 1092 #[cfg(feature = "tdx")] 1093 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1094 return Err(MigratableError::MigrateReceive(anyhow!( 1095 "Live Migration is not supported when TDX is enabled" 1096 ))); 1097 }; 1098 1099 // We check the `CPUID` compatibility of between the source vm and destination, which is 1100 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1101 let dest_cpuid = &{ 1102 let vm_config = &src_vm_config.lock().unwrap(); 1103 1104 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1105 arch::generate_common_cpuid( 1106 &self.hypervisor.clone(), 1107 &arch::CpuidConfig { 1108 sgx_epc_sections: None, 1109 phys_bits, 1110 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1111 #[cfg(feature = "tdx")] 1112 tdx: false, 1113 amx: vm_config.cpus.features.amx, 1114 }, 1115 ) 1116 .map_err(|e| { 1117 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1118 })? 1119 }; 1120 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1121 MigratableError::MigrateReceive(anyhow!( 1122 "Error checking cpu feature compatibility': {:?}", 1123 e 1124 )) 1125 }) 1126 } 1127 1128 fn control_loop( 1129 &mut self, 1130 api_receiver: Rc<Receiver<ApiRequest>>, 1131 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1132 ) -> Result<()> { 1133 const EPOLL_EVENTS_LEN: usize = 100; 1134 1135 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1136 let epoll_fd = self.epoll.as_raw_fd(); 1137 1138 'outer: loop { 1139 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1140 Ok(res) => res, 1141 Err(e) => { 1142 if e.kind() == io::ErrorKind::Interrupted { 1143 // It's well defined from the epoll_wait() syscall 1144 // documentation that the epoll loop can be interrupted 1145 // before any of the requested events occurred or the 1146 // timeout expired. In both those cases, epoll_wait() 1147 // returns an error of type EINTR, but this should not 1148 // be considered as a regular error. Instead it is more 1149 // appropriate to retry, by calling into epoll_wait(). 1150 continue; 1151 } 1152 return Err(Error::Epoll(e)); 1153 } 1154 }; 1155 1156 for event in events.iter().take(num_events) { 1157 let dispatch_event: EpollDispatch = event.data.into(); 1158 match dispatch_event { 1159 EpollDispatch::Unknown => { 1160 let event = event.data; 1161 warn!("Unknown VMM loop event: {}", event); 1162 } 1163 EpollDispatch::Exit => { 1164 info!("VM exit event"); 1165 // Consume the event. 1166 self.exit_evt.read().map_err(Error::EventFdRead)?; 1167 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1168 1169 break 'outer; 1170 } 1171 EpollDispatch::Reset => { 1172 info!("VM reset event"); 1173 // Consume the event. 1174 self.reset_evt.read().map_err(Error::EventFdRead)?; 1175 self.vm_reboot().map_err(Error::VmReboot)?; 1176 } 1177 EpollDispatch::ActivateVirtioDevices => { 1178 if let Some(ref vm) = self.vm { 1179 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1180 info!( 1181 "Trying to activate pending virtio devices: count = {}", 1182 count 1183 ); 1184 vm.activate_virtio_devices() 1185 .map_err(Error::ActivateVirtioDevices)?; 1186 } 1187 } 1188 EpollDispatch::Api => { 1189 // Consume the events. 1190 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1191 // Read from the API receiver channel 1192 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1193 1194 if api_request(self)? { 1195 break 'outer; 1196 } 1197 } 1198 } 1199 #[cfg(feature = "guest_debug")] 1200 EpollDispatch::Debug => { 1201 // Consume the events. 1202 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1203 // Read from the API receiver channel 1204 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1205 1206 let response = if let Some(ref mut vm) = self.vm { 1207 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1208 } else { 1209 Err(VmError::VmNotRunning) 1210 } 1211 .map_err(gdb::Error::Vm); 1212 1213 gdb_request 1214 .sender 1215 .send(response) 1216 .map_err(Error::GdbResponseSend)?; 1217 } 1218 } 1219 #[cfg(not(feature = "guest_debug"))] 1220 EpollDispatch::Debug => {} 1221 } 1222 } 1223 } 1224 1225 // Trigger the termination of the signal_handler thread 1226 if let Some(signals) = self.signals.take() { 1227 signals.close(); 1228 } 1229 1230 // Wait for all the threads to finish 1231 for thread in self.threads.drain(..) { 1232 thread.join().map_err(Error::ThreadCleanup)? 1233 } 1234 1235 Ok(()) 1236 } 1237 } 1238 1239 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1240 vm_config.lock().unwrap().apply_landlock()?; 1241 Ok(()) 1242 } 1243 1244 impl RequestHandler for Vmm { 1245 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1246 // We only store the passed VM config. 1247 // The VM will be created when being asked to boot it. 1248 if self.vm_config.is_none() { 1249 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1250 self.console_info = 1251 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1252 1253 if self 1254 .vm_config 1255 .as_ref() 1256 .unwrap() 1257 .lock() 1258 .unwrap() 1259 .landlock_enable 1260 { 1261 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1262 .map_err(VmError::ApplyLandlock)?; 1263 } 1264 Ok(()) 1265 } else { 1266 Err(VmError::VmAlreadyCreated) 1267 } 1268 } 1269 1270 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1271 tracer::start(); 1272 info!("Booting VM"); 1273 event!("vm", "booting"); 1274 let r = { 1275 trace_scoped!("vm_boot"); 1276 // If we don't have a config, we cannot boot a VM. 1277 if self.vm_config.is_none() { 1278 return Err(VmError::VmMissingConfig); 1279 }; 1280 1281 // console_info is set to None in vm_shutdown. re-populate here if empty 1282 if self.console_info.is_none() { 1283 self.console_info = 1284 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1285 } 1286 1287 // Create a new VM if we don't have one yet. 1288 if self.vm.is_none() { 1289 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1290 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1291 #[cfg(feature = "guest_debug")] 1292 let vm_debug_evt = self 1293 .vm_debug_evt 1294 .try_clone() 1295 .map_err(VmError::EventFdClone)?; 1296 let activate_evt = self 1297 .activate_evt 1298 .try_clone() 1299 .map_err(VmError::EventFdClone)?; 1300 1301 if let Some(ref vm_config) = self.vm_config { 1302 let vm = Vm::new( 1303 Arc::clone(vm_config), 1304 exit_evt, 1305 reset_evt, 1306 #[cfg(feature = "guest_debug")] 1307 vm_debug_evt, 1308 &self.seccomp_action, 1309 self.hypervisor.clone(), 1310 activate_evt, 1311 self.console_info.clone(), 1312 self.console_resize_pipe.clone(), 1313 Arc::clone(&self.original_termios_opt), 1314 None, 1315 None, 1316 None, 1317 )?; 1318 1319 self.vm = Some(vm); 1320 } 1321 } 1322 1323 // Now we can boot the VM. 1324 if let Some(ref mut vm) = self.vm { 1325 vm.boot() 1326 } else { 1327 Err(VmError::VmNotCreated) 1328 } 1329 }; 1330 tracer::end(); 1331 if r.is_ok() { 1332 event!("vm", "booted"); 1333 } 1334 r 1335 } 1336 1337 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1338 if let Some(ref mut vm) = self.vm { 1339 vm.pause().map_err(VmError::Pause) 1340 } else { 1341 Err(VmError::VmNotRunning) 1342 } 1343 } 1344 1345 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1346 if let Some(ref mut vm) = self.vm { 1347 vm.resume().map_err(VmError::Resume) 1348 } else { 1349 Err(VmError::VmNotRunning) 1350 } 1351 } 1352 1353 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1354 if let Some(ref mut vm) = self.vm { 1355 // Drain console_info so that FDs are not reused 1356 let _ = self.console_info.take(); 1357 vm.snapshot() 1358 .map_err(VmError::Snapshot) 1359 .and_then(|snapshot| { 1360 vm.send(&snapshot, destination_url) 1361 .map_err(VmError::SnapshotSend) 1362 }) 1363 } else { 1364 Err(VmError::VmNotRunning) 1365 } 1366 } 1367 1368 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1369 if self.vm.is_some() || self.vm_config.is_some() { 1370 return Err(VmError::VmAlreadyCreated); 1371 } 1372 1373 let source_url = restore_cfg.source_url.as_path().to_str(); 1374 if source_url.is_none() { 1375 return Err(VmError::InvalidRestoreSourceUrl); 1376 } 1377 // Safe to unwrap as we checked it was Some(&str). 1378 let source_url = source_url.unwrap(); 1379 1380 let vm_config = Arc::new(Mutex::new( 1381 recv_vm_config(source_url).map_err(VmError::Restore)?, 1382 )); 1383 restore_cfg 1384 .validate(&vm_config.lock().unwrap().clone()) 1385 .map_err(VmError::ConfigValidation)?; 1386 1387 // Update VM's net configurations with new fds received for restore operation 1388 if let (Some(restored_nets), Some(vm_net_configs)) = 1389 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1390 { 1391 for net in restored_nets.iter() { 1392 for net_config in vm_net_configs.iter_mut() { 1393 // update only if the net dev is backed by FDs 1394 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1395 net_config.fds.clone_from(&net.fds); 1396 } 1397 } 1398 } 1399 } 1400 1401 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1402 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1403 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1404 1405 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1406 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1407 .map_err(VmError::Restore)?; 1408 1409 self.vm_config = Some(Arc::clone(&vm_config)); 1410 1411 // console_info is set to None in vm_snapshot. re-populate here if empty 1412 if self.console_info.is_none() { 1413 self.console_info = 1414 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1415 } 1416 1417 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1418 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1419 #[cfg(feature = "guest_debug")] 1420 let debug_evt = self 1421 .vm_debug_evt 1422 .try_clone() 1423 .map_err(VmError::EventFdClone)?; 1424 let activate_evt = self 1425 .activate_evt 1426 .try_clone() 1427 .map_err(VmError::EventFdClone)?; 1428 1429 let vm = Vm::new( 1430 vm_config, 1431 exit_evt, 1432 reset_evt, 1433 #[cfg(feature = "guest_debug")] 1434 debug_evt, 1435 &self.seccomp_action, 1436 self.hypervisor.clone(), 1437 activate_evt, 1438 self.console_info.clone(), 1439 self.console_resize_pipe.clone(), 1440 Arc::clone(&self.original_termios_opt), 1441 Some(snapshot), 1442 Some(source_url), 1443 Some(restore_cfg.prefault), 1444 )?; 1445 self.vm = Some(vm); 1446 1447 if self 1448 .vm_config 1449 .as_ref() 1450 .unwrap() 1451 .lock() 1452 .unwrap() 1453 .landlock_enable 1454 { 1455 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1456 .map_err(VmError::ApplyLandlock)?; 1457 } 1458 1459 // Now we can restore the rest of the VM. 1460 if let Some(ref mut vm) = self.vm { 1461 vm.restore() 1462 } else { 1463 Err(VmError::VmNotCreated) 1464 } 1465 } 1466 1467 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1468 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1469 if let Some(ref mut vm) = self.vm { 1470 vm.coredump(destination_url).map_err(VmError::Coredump) 1471 } else { 1472 Err(VmError::VmNotRunning) 1473 } 1474 } 1475 1476 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1477 let r = if let Some(ref mut vm) = self.vm.take() { 1478 // Drain console_info so that the FDs are not reused 1479 let _ = self.console_info.take(); 1480 vm.shutdown() 1481 } else { 1482 Err(VmError::VmNotRunning) 1483 }; 1484 1485 if r.is_ok() { 1486 event!("vm", "shutdown"); 1487 } 1488 1489 r 1490 } 1491 1492 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1493 event!("vm", "rebooting"); 1494 1495 // First we stop the current VM 1496 let config = if let Some(mut vm) = self.vm.take() { 1497 let config = vm.get_config(); 1498 vm.shutdown()?; 1499 config 1500 } else { 1501 return Err(VmError::VmNotCreated); 1502 }; 1503 1504 // vm.shutdown() closes all the console devices, so set console_info to None 1505 // so that the closed FD #s are not reused. 1506 let _ = self.console_info.take(); 1507 1508 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1509 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1510 #[cfg(feature = "guest_debug")] 1511 let debug_evt = self 1512 .vm_debug_evt 1513 .try_clone() 1514 .map_err(VmError::EventFdClone)?; 1515 let activate_evt = self 1516 .activate_evt 1517 .try_clone() 1518 .map_err(VmError::EventFdClone)?; 1519 1520 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1521 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1522 // during the boot process. 1523 if self.reset_evt.read().is_ok() { 1524 warn!("Spurious second reset event received. Ignoring."); 1525 } 1526 1527 self.console_info = 1528 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1529 1530 // Then we create the new VM 1531 let mut vm = Vm::new( 1532 config, 1533 exit_evt, 1534 reset_evt, 1535 #[cfg(feature = "guest_debug")] 1536 debug_evt, 1537 &self.seccomp_action, 1538 self.hypervisor.clone(), 1539 activate_evt, 1540 self.console_info.clone(), 1541 self.console_resize_pipe.clone(), 1542 Arc::clone(&self.original_termios_opt), 1543 None, 1544 None, 1545 None, 1546 )?; 1547 1548 // And we boot it 1549 vm.boot()?; 1550 1551 self.vm = Some(vm); 1552 1553 event!("vm", "rebooted"); 1554 1555 Ok(()) 1556 } 1557 1558 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1559 match &self.vm_config { 1560 Some(vm_config) => { 1561 let state = match &self.vm { 1562 Some(vm) => vm.get_state()?, 1563 None => VmState::Created, 1564 }; 1565 let config = vm_config.lock().unwrap().clone(); 1566 1567 let mut memory_actual_size = config.memory.total_size(); 1568 if let Some(vm) = &self.vm { 1569 memory_actual_size -= vm.balloon_size(); 1570 } 1571 1572 let device_tree = self 1573 .vm 1574 .as_ref() 1575 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1576 1577 Ok(VmInfoResponse { 1578 config: Box::new(config), 1579 state, 1580 memory_actual_size, 1581 device_tree, 1582 }) 1583 } 1584 None => Err(VmError::VmNotCreated), 1585 } 1586 } 1587 1588 fn vmm_ping(&self) -> VmmPingResponse { 1589 let VmmVersionInfo { 1590 build_version, 1591 version, 1592 } = self.version.clone(); 1593 1594 VmmPingResponse { 1595 build_version, 1596 version, 1597 pid: std::process::id() as i64, 1598 features: feature_list(), 1599 } 1600 } 1601 1602 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1603 if self.vm_config.is_none() { 1604 return Ok(()); 1605 } 1606 1607 // If a VM is booted, we first try to shut it down. 1608 if self.vm.is_some() { 1609 self.vm_shutdown()?; 1610 } 1611 1612 self.vm_config = None; 1613 1614 event!("vm", "deleted"); 1615 1616 Ok(()) 1617 } 1618 1619 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1620 self.vm_delete()?; 1621 event!("vmm", "shutdown"); 1622 Ok(()) 1623 } 1624 1625 fn vm_resize( 1626 &mut self, 1627 desired_vcpus: Option<u8>, 1628 desired_ram: Option<u64>, 1629 desired_balloon: Option<u64>, 1630 ) -> result::Result<(), VmError> { 1631 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1632 1633 if let Some(ref mut vm) = self.vm { 1634 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1635 error!("Error when resizing VM: {:?}", e); 1636 Err(e) 1637 } else { 1638 Ok(()) 1639 } 1640 } else { 1641 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1642 if let Some(desired_vcpus) = desired_vcpus { 1643 config.cpus.boot_vcpus = desired_vcpus; 1644 } 1645 if let Some(desired_ram) = desired_ram { 1646 config.memory.size = desired_ram; 1647 } 1648 if let Some(desired_balloon) = desired_balloon { 1649 if let Some(balloon_config) = &mut config.balloon { 1650 balloon_config.size = desired_balloon; 1651 } 1652 } 1653 Ok(()) 1654 } 1655 } 1656 1657 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1658 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1659 1660 if let Some(ref mut vm) = self.vm { 1661 if let Err(e) = vm.resize_zone(id, desired_ram) { 1662 error!("Error when resizing VM: {:?}", e); 1663 Err(e) 1664 } else { 1665 Ok(()) 1666 } 1667 } else { 1668 // Update VmConfig by setting the new desired ram. 1669 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1670 1671 if let Some(zones) = &mut memory_config.zones { 1672 for zone in zones.iter_mut() { 1673 if zone.id == id { 1674 zone.size = desired_ram; 1675 return Ok(()); 1676 } 1677 } 1678 } 1679 1680 error!("Could not find the memory zone {} for the resize", id); 1681 Err(VmError::ResizeZone) 1682 } 1683 } 1684 1685 fn vm_add_device( 1686 &mut self, 1687 device_cfg: DeviceConfig, 1688 ) -> result::Result<Option<Vec<u8>>, VmError> { 1689 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1690 1691 { 1692 // Validate the configuration change in a cloned configuration 1693 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1694 add_to_config(&mut config.devices, device_cfg.clone()); 1695 config.validate().map_err(VmError::ConfigValidation)?; 1696 } 1697 1698 if let Some(ref mut vm) = self.vm { 1699 let info = vm.add_device(device_cfg).map_err(|e| { 1700 error!("Error when adding new device to the VM: {:?}", e); 1701 e 1702 })?; 1703 serde_json::to_vec(&info) 1704 .map(Some) 1705 .map_err(VmError::SerializeJson) 1706 } else { 1707 // Update VmConfig by adding the new device. 1708 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1709 add_to_config(&mut config.devices, device_cfg); 1710 Ok(None) 1711 } 1712 } 1713 1714 fn vm_add_user_device( 1715 &mut self, 1716 device_cfg: UserDeviceConfig, 1717 ) -> result::Result<Option<Vec<u8>>, VmError> { 1718 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1719 1720 { 1721 // Validate the configuration change in a cloned configuration 1722 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1723 add_to_config(&mut config.user_devices, device_cfg.clone()); 1724 config.validate().map_err(VmError::ConfigValidation)?; 1725 } 1726 1727 if let Some(ref mut vm) = self.vm { 1728 let info = vm.add_user_device(device_cfg).map_err(|e| { 1729 error!("Error when adding new user device to the VM: {:?}", e); 1730 e 1731 })?; 1732 serde_json::to_vec(&info) 1733 .map(Some) 1734 .map_err(VmError::SerializeJson) 1735 } else { 1736 // Update VmConfig by adding the new device. 1737 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1738 add_to_config(&mut config.user_devices, device_cfg); 1739 Ok(None) 1740 } 1741 } 1742 1743 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1744 if let Some(ref mut vm) = self.vm { 1745 if let Err(e) = vm.remove_device(id) { 1746 error!("Error when removing device from the VM: {:?}", e); 1747 Err(e) 1748 } else { 1749 Ok(()) 1750 } 1751 } else if let Some(ref config) = self.vm_config { 1752 let mut config = config.lock().unwrap(); 1753 if config.remove_device(&id) { 1754 Ok(()) 1755 } else { 1756 Err(VmError::NoDeviceToRemove(id)) 1757 } 1758 } else { 1759 Err(VmError::VmNotCreated) 1760 } 1761 } 1762 1763 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1764 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1765 1766 { 1767 // Validate the configuration change in a cloned configuration 1768 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1769 add_to_config(&mut config.disks, disk_cfg.clone()); 1770 config.validate().map_err(VmError::ConfigValidation)?; 1771 } 1772 1773 if let Some(ref mut vm) = self.vm { 1774 let info = vm.add_disk(disk_cfg).map_err(|e| { 1775 error!("Error when adding new disk to the VM: {:?}", e); 1776 e 1777 })?; 1778 serde_json::to_vec(&info) 1779 .map(Some) 1780 .map_err(VmError::SerializeJson) 1781 } else { 1782 // Update VmConfig by adding the new device. 1783 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1784 add_to_config(&mut config.disks, disk_cfg); 1785 Ok(None) 1786 } 1787 } 1788 1789 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1790 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1791 1792 { 1793 // Validate the configuration change in a cloned configuration 1794 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1795 add_to_config(&mut config.fs, fs_cfg.clone()); 1796 config.validate().map_err(VmError::ConfigValidation)?; 1797 } 1798 1799 if let Some(ref mut vm) = self.vm { 1800 let info = vm.add_fs(fs_cfg).map_err(|e| { 1801 error!("Error when adding new fs to the VM: {:?}", e); 1802 e 1803 })?; 1804 serde_json::to_vec(&info) 1805 .map(Some) 1806 .map_err(VmError::SerializeJson) 1807 } else { 1808 // Update VmConfig by adding the new device. 1809 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1810 add_to_config(&mut config.fs, fs_cfg); 1811 Ok(None) 1812 } 1813 } 1814 1815 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1816 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1817 1818 { 1819 // Validate the configuration change in a cloned configuration 1820 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1821 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1822 config.validate().map_err(VmError::ConfigValidation)?; 1823 } 1824 1825 if let Some(ref mut vm) = self.vm { 1826 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1827 error!("Error when adding new pmem device to the VM: {:?}", e); 1828 e 1829 })?; 1830 serde_json::to_vec(&info) 1831 .map(Some) 1832 .map_err(VmError::SerializeJson) 1833 } else { 1834 // Update VmConfig by adding the new device. 1835 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1836 add_to_config(&mut config.pmem, pmem_cfg); 1837 Ok(None) 1838 } 1839 } 1840 1841 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1842 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1843 1844 { 1845 // Validate the configuration change in a cloned configuration 1846 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1847 add_to_config(&mut config.net, net_cfg.clone()); 1848 config.validate().map_err(VmError::ConfigValidation)?; 1849 } 1850 1851 if let Some(ref mut vm) = self.vm { 1852 let info = vm.add_net(net_cfg).map_err(|e| { 1853 error!("Error when adding new network device to the VM: {:?}", e); 1854 e 1855 })?; 1856 serde_json::to_vec(&info) 1857 .map(Some) 1858 .map_err(VmError::SerializeJson) 1859 } else { 1860 // Update VmConfig by adding the new device. 1861 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1862 add_to_config(&mut config.net, net_cfg); 1863 Ok(None) 1864 } 1865 } 1866 1867 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1868 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1869 1870 { 1871 // Validate the configuration change in a cloned configuration 1872 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1873 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 1874 config.validate().map_err(VmError::ConfigValidation)?; 1875 } 1876 1877 if let Some(ref mut vm) = self.vm { 1878 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 1879 error!("Error when adding new vDPA device to the VM: {:?}", e); 1880 e 1881 })?; 1882 serde_json::to_vec(&info) 1883 .map(Some) 1884 .map_err(VmError::SerializeJson) 1885 } else { 1886 // Update VmConfig by adding the new device. 1887 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1888 add_to_config(&mut config.vdpa, vdpa_cfg); 1889 Ok(None) 1890 } 1891 } 1892 1893 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1894 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1895 1896 { 1897 // Validate the configuration change in a cloned configuration 1898 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1899 1900 if config.vsock.is_some() { 1901 return Err(VmError::TooManyVsockDevices); 1902 } 1903 1904 config.vsock = Some(vsock_cfg.clone()); 1905 config.validate().map_err(VmError::ConfigValidation)?; 1906 } 1907 1908 if let Some(ref mut vm) = self.vm { 1909 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 1910 error!("Error when adding new vsock device to the VM: {:?}", e); 1911 e 1912 })?; 1913 serde_json::to_vec(&info) 1914 .map(Some) 1915 .map_err(VmError::SerializeJson) 1916 } else { 1917 // Update VmConfig by adding the new device. 1918 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1919 config.vsock = Some(vsock_cfg); 1920 Ok(None) 1921 } 1922 } 1923 1924 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 1925 if let Some(ref mut vm) = self.vm { 1926 let info = vm.counters().map_err(|e| { 1927 error!("Error when getting counters from the VM: {:?}", e); 1928 e 1929 })?; 1930 serde_json::to_vec(&info) 1931 .map(Some) 1932 .map_err(VmError::SerializeJson) 1933 } else { 1934 Err(VmError::VmNotRunning) 1935 } 1936 } 1937 1938 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 1939 if let Some(ref mut vm) = self.vm { 1940 vm.power_button() 1941 } else { 1942 Err(VmError::VmNotRunning) 1943 } 1944 } 1945 1946 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 1947 if let Some(ref mut vm) = self.vm { 1948 vm.nmi() 1949 } else { 1950 Err(VmError::VmNotRunning) 1951 } 1952 } 1953 1954 fn vm_receive_migration( 1955 &mut self, 1956 receive_data_migration: VmReceiveMigrationData, 1957 ) -> result::Result<(), MigratableError> { 1958 info!( 1959 "Receiving migration: receiver_url = {}", 1960 receive_data_migration.receiver_url 1961 ); 1962 1963 let path = Self::socket_url_to_path(&receive_data_migration.receiver_url)?; 1964 let listener = UnixListener::bind(&path).map_err(|e| { 1965 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1966 })?; 1967 let (mut socket, _addr) = listener.accept().map_err(|e| { 1968 MigratableError::MigrateReceive(anyhow!("Error accepting on UNIX socket: {}", e)) 1969 })?; 1970 std::fs::remove_file(&path).map_err(|e| { 1971 MigratableError::MigrateReceive(anyhow!("Error unlinking UNIX socket: {}", e)) 1972 })?; 1973 1974 let mut started = false; 1975 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 1976 let mut existing_memory_files = None; 1977 loop { 1978 let req = Request::read_from(&mut socket)?; 1979 match req.command() { 1980 Command::Invalid => info!("Invalid Command Received"), 1981 Command::Start => { 1982 info!("Start Command Received"); 1983 started = true; 1984 1985 Response::ok().write_to(&mut socket)?; 1986 } 1987 Command::Config => { 1988 info!("Config Command Received"); 1989 1990 if !started { 1991 warn!("Migration not started yet"); 1992 Response::error().write_to(&mut socket)?; 1993 continue; 1994 } 1995 memory_manager = Some(self.vm_receive_config( 1996 &req, 1997 &mut socket, 1998 existing_memory_files.take(), 1999 )?); 2000 } 2001 Command::State => { 2002 info!("State Command Received"); 2003 2004 if !started { 2005 warn!("Migration not started yet"); 2006 Response::error().write_to(&mut socket)?; 2007 continue; 2008 } 2009 if let Some(mm) = memory_manager.take() { 2010 self.vm_receive_state(&req, &mut socket, mm)?; 2011 } else { 2012 warn!("Configuration not sent yet"); 2013 Response::error().write_to(&mut socket)?; 2014 } 2015 } 2016 Command::Memory => { 2017 info!("Memory Command Received"); 2018 2019 if !started { 2020 warn!("Migration not started yet"); 2021 Response::error().write_to(&mut socket)?; 2022 continue; 2023 } 2024 if let Some(mm) = memory_manager.as_ref() { 2025 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2026 } else { 2027 warn!("Configuration not sent yet"); 2028 Response::error().write_to(&mut socket)?; 2029 } 2030 } 2031 Command::MemoryFd => { 2032 info!("MemoryFd Command Received"); 2033 2034 if !started { 2035 warn!("Migration not started yet"); 2036 Response::error().write_to(&mut socket)?; 2037 continue; 2038 } 2039 2040 let mut buf = [0u8; 4]; 2041 let (_, file) = socket.recv_with_fd(&mut buf).map_err(|e| { 2042 MigratableError::MigrateReceive(anyhow!( 2043 "Error receiving slot from socket: {}", 2044 e 2045 )) 2046 })?; 2047 2048 if existing_memory_files.is_none() { 2049 existing_memory_files = Some(HashMap::default()) 2050 } 2051 2052 if let Some(ref mut existing_memory_files) = existing_memory_files { 2053 let slot = u32::from_le_bytes(buf); 2054 existing_memory_files.insert(slot, file.unwrap()); 2055 } 2056 2057 Response::ok().write_to(&mut socket)?; 2058 } 2059 Command::Complete => { 2060 info!("Complete Command Received"); 2061 if let Some(ref mut vm) = self.vm.as_mut() { 2062 vm.resume()?; 2063 Response::ok().write_to(&mut socket)?; 2064 } else { 2065 warn!("VM not created yet"); 2066 Response::error().write_to(&mut socket)?; 2067 } 2068 break; 2069 } 2070 Command::Abandon => { 2071 info!("Abandon Command Received"); 2072 self.vm = None; 2073 self.vm_config = None; 2074 Response::ok().write_to(&mut socket).ok(); 2075 break; 2076 } 2077 } 2078 } 2079 2080 Ok(()) 2081 } 2082 2083 fn vm_send_migration( 2084 &mut self, 2085 send_data_migration: VmSendMigrationData, 2086 ) -> result::Result<(), MigratableError> { 2087 info!( 2088 "Sending migration: destination_url = {}, local = {}", 2089 send_data_migration.destination_url, send_data_migration.local 2090 ); 2091 2092 if !self 2093 .vm_config 2094 .as_ref() 2095 .unwrap() 2096 .lock() 2097 .unwrap() 2098 .backed_by_shared_memory() 2099 && send_data_migration.local 2100 { 2101 return Err(MigratableError::MigrateSend(anyhow!( 2102 "Local migration requires shared memory or hugepages enabled" 2103 ))); 2104 } 2105 2106 if let Some(vm) = self.vm.as_mut() { 2107 Self::send_migration( 2108 vm, 2109 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2110 self.hypervisor.clone(), 2111 send_data_migration, 2112 ) 2113 .map_err(|migration_err| { 2114 error!("Migration failed: {:?}", migration_err); 2115 2116 // Stop logging dirty pages 2117 if let Err(e) = vm.stop_dirty_log() { 2118 return e; 2119 } 2120 2121 if vm.get_state().unwrap() == VmState::Paused { 2122 if let Err(e) = vm.resume() { 2123 return e; 2124 } 2125 } 2126 2127 migration_err 2128 })?; 2129 2130 // Shutdown the VM after the migration succeeded 2131 self.exit_evt.write(1).map_err(|e| { 2132 MigratableError::MigrateSend(anyhow!( 2133 "Failed shutting down the VM after migration: {:?}", 2134 e 2135 )) 2136 }) 2137 } else { 2138 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2139 } 2140 } 2141 } 2142 2143 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2144 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2145 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2146 2147 #[cfg(test)] 2148 mod unit_tests { 2149 use super::*; 2150 #[cfg(target_arch = "x86_64")] 2151 use crate::vm_config::DebugConsoleConfig; 2152 use crate::vm_config::{ 2153 ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig, 2154 PayloadConfig, RngConfig, 2155 }; 2156 2157 fn create_dummy_vmm() -> Vmm { 2158 Vmm::new( 2159 VmmVersionInfo::new("dummy", "dummy"), 2160 EventFd::new(EFD_NONBLOCK).unwrap(), 2161 #[cfg(feature = "guest_debug")] 2162 EventFd::new(EFD_NONBLOCK).unwrap(), 2163 #[cfg(feature = "guest_debug")] 2164 EventFd::new(EFD_NONBLOCK).unwrap(), 2165 SeccompAction::Allow, 2166 hypervisor::new().unwrap(), 2167 EventFd::new(EFD_NONBLOCK).unwrap(), 2168 ) 2169 .unwrap() 2170 } 2171 2172 fn create_dummy_vm_config() -> Box<VmConfig> { 2173 Box::new(VmConfig { 2174 cpus: CpusConfig { 2175 boot_vcpus: 1, 2176 max_vcpus: 1, 2177 topology: None, 2178 kvm_hyperv: false, 2179 max_phys_bits: 46, 2180 affinity: None, 2181 features: CpuFeatures::default(), 2182 }, 2183 memory: MemoryConfig { 2184 size: 536_870_912, 2185 mergeable: false, 2186 hotplug_method: HotplugMethod::Acpi, 2187 hotplug_size: None, 2188 hotplugged_size: None, 2189 shared: true, 2190 hugepages: false, 2191 hugepage_size: None, 2192 prefault: false, 2193 zones: None, 2194 thp: true, 2195 }, 2196 payload: Some(PayloadConfig { 2197 kernel: Some(PathBuf::from("/path/to/kernel")), 2198 firmware: None, 2199 cmdline: None, 2200 initramfs: None, 2201 #[cfg(feature = "igvm")] 2202 igvm: None, 2203 #[cfg(feature = "sev_snp")] 2204 host_data: None, 2205 }), 2206 rate_limit_groups: None, 2207 disks: None, 2208 net: None, 2209 rng: RngConfig { 2210 src: PathBuf::from("/dev/urandom"), 2211 iommu: false, 2212 }, 2213 balloon: None, 2214 fs: None, 2215 pmem: None, 2216 serial: ConsoleConfig { 2217 file: None, 2218 mode: ConsoleOutputMode::Null, 2219 iommu: false, 2220 socket: None, 2221 }, 2222 console: ConsoleConfig { 2223 file: None, 2224 mode: ConsoleOutputMode::Tty, 2225 iommu: false, 2226 socket: None, 2227 }, 2228 #[cfg(target_arch = "x86_64")] 2229 debug_console: DebugConsoleConfig::default(), 2230 devices: None, 2231 user_devices: None, 2232 vdpa: None, 2233 vsock: None, 2234 #[cfg(feature = "pvmemcontrol")] 2235 pvmemcontrol: None, 2236 pvpanic: false, 2237 iommu: false, 2238 #[cfg(target_arch = "x86_64")] 2239 sgx_epc: None, 2240 numa: None, 2241 watchdog: false, 2242 #[cfg(feature = "guest_debug")] 2243 gdb: false, 2244 pci_segments: None, 2245 platform: None, 2246 tpm: None, 2247 preserved_fds: None, 2248 landlock_enable: false, 2249 landlock_rules: None, 2250 }) 2251 } 2252 2253 #[test] 2254 fn test_vmm_vm_create() { 2255 let mut vmm = create_dummy_vmm(); 2256 let config = create_dummy_vm_config(); 2257 2258 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2259 assert!(matches!( 2260 vmm.vm_create(config), 2261 Err(VmError::VmAlreadyCreated) 2262 )); 2263 } 2264 2265 #[test] 2266 fn test_vmm_vm_cold_add_device() { 2267 let mut vmm = create_dummy_vmm(); 2268 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2269 2270 assert!(matches!( 2271 vmm.vm_add_device(device_config.clone()), 2272 Err(VmError::VmNotCreated) 2273 )); 2274 2275 let _ = vmm.vm_create(create_dummy_vm_config()); 2276 assert!(vmm 2277 .vm_config 2278 .as_ref() 2279 .unwrap() 2280 .lock() 2281 .unwrap() 2282 .devices 2283 .is_none()); 2284 2285 assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); 2286 assert_eq!( 2287 vmm.vm_config 2288 .as_ref() 2289 .unwrap() 2290 .lock() 2291 .unwrap() 2292 .devices 2293 .clone() 2294 .unwrap() 2295 .len(), 2296 1 2297 ); 2298 assert_eq!( 2299 vmm.vm_config 2300 .as_ref() 2301 .unwrap() 2302 .lock() 2303 .unwrap() 2304 .devices 2305 .clone() 2306 .unwrap()[0], 2307 device_config 2308 ); 2309 } 2310 2311 #[test] 2312 fn test_vmm_vm_cold_add_user_device() { 2313 let mut vmm = create_dummy_vmm(); 2314 let user_device_config = 2315 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2316 2317 assert!(matches!( 2318 vmm.vm_add_user_device(user_device_config.clone()), 2319 Err(VmError::VmNotCreated) 2320 )); 2321 2322 let _ = vmm.vm_create(create_dummy_vm_config()); 2323 assert!(vmm 2324 .vm_config 2325 .as_ref() 2326 .unwrap() 2327 .lock() 2328 .unwrap() 2329 .user_devices 2330 .is_none()); 2331 2332 assert!(vmm 2333 .vm_add_user_device(user_device_config.clone()) 2334 .unwrap() 2335 .is_none()); 2336 assert_eq!( 2337 vmm.vm_config 2338 .as_ref() 2339 .unwrap() 2340 .lock() 2341 .unwrap() 2342 .user_devices 2343 .clone() 2344 .unwrap() 2345 .len(), 2346 1 2347 ); 2348 assert_eq!( 2349 vmm.vm_config 2350 .as_ref() 2351 .unwrap() 2352 .lock() 2353 .unwrap() 2354 .user_devices 2355 .clone() 2356 .unwrap()[0], 2357 user_device_config 2358 ); 2359 } 2360 2361 #[test] 2362 fn test_vmm_vm_cold_add_disk() { 2363 let mut vmm = create_dummy_vmm(); 2364 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2365 2366 assert!(matches!( 2367 vmm.vm_add_disk(disk_config.clone()), 2368 Err(VmError::VmNotCreated) 2369 )); 2370 2371 let _ = vmm.vm_create(create_dummy_vm_config()); 2372 assert!(vmm 2373 .vm_config 2374 .as_ref() 2375 .unwrap() 2376 .lock() 2377 .unwrap() 2378 .disks 2379 .is_none()); 2380 2381 assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); 2382 assert_eq!( 2383 vmm.vm_config 2384 .as_ref() 2385 .unwrap() 2386 .lock() 2387 .unwrap() 2388 .disks 2389 .clone() 2390 .unwrap() 2391 .len(), 2392 1 2393 ); 2394 assert_eq!( 2395 vmm.vm_config 2396 .as_ref() 2397 .unwrap() 2398 .lock() 2399 .unwrap() 2400 .disks 2401 .clone() 2402 .unwrap()[0], 2403 disk_config 2404 ); 2405 } 2406 2407 #[test] 2408 fn test_vmm_vm_cold_add_fs() { 2409 let mut vmm = create_dummy_vmm(); 2410 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2411 2412 assert!(matches!( 2413 vmm.vm_add_fs(fs_config.clone()), 2414 Err(VmError::VmNotCreated) 2415 )); 2416 2417 let _ = vmm.vm_create(create_dummy_vm_config()); 2418 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2419 2420 assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none()); 2421 assert_eq!( 2422 vmm.vm_config 2423 .as_ref() 2424 .unwrap() 2425 .lock() 2426 .unwrap() 2427 .fs 2428 .clone() 2429 .unwrap() 2430 .len(), 2431 1 2432 ); 2433 assert_eq!( 2434 vmm.vm_config 2435 .as_ref() 2436 .unwrap() 2437 .lock() 2438 .unwrap() 2439 .fs 2440 .clone() 2441 .unwrap()[0], 2442 fs_config 2443 ); 2444 } 2445 2446 #[test] 2447 fn test_vmm_vm_cold_add_pmem() { 2448 let mut vmm = create_dummy_vmm(); 2449 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2450 2451 assert!(matches!( 2452 vmm.vm_add_pmem(pmem_config.clone()), 2453 Err(VmError::VmNotCreated) 2454 )); 2455 2456 let _ = vmm.vm_create(create_dummy_vm_config()); 2457 assert!(vmm 2458 .vm_config 2459 .as_ref() 2460 .unwrap() 2461 .lock() 2462 .unwrap() 2463 .pmem 2464 .is_none()); 2465 2466 assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); 2467 assert_eq!( 2468 vmm.vm_config 2469 .as_ref() 2470 .unwrap() 2471 .lock() 2472 .unwrap() 2473 .pmem 2474 .clone() 2475 .unwrap() 2476 .len(), 2477 1 2478 ); 2479 assert_eq!( 2480 vmm.vm_config 2481 .as_ref() 2482 .unwrap() 2483 .lock() 2484 .unwrap() 2485 .pmem 2486 .clone() 2487 .unwrap()[0], 2488 pmem_config 2489 ); 2490 } 2491 2492 #[test] 2493 fn test_vmm_vm_cold_add_net() { 2494 let mut vmm = create_dummy_vmm(); 2495 let net_config = NetConfig::parse( 2496 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2497 ) 2498 .unwrap(); 2499 2500 assert!(matches!( 2501 vmm.vm_add_net(net_config.clone()), 2502 Err(VmError::VmNotCreated) 2503 )); 2504 2505 let _ = vmm.vm_create(create_dummy_vm_config()); 2506 assert!(vmm 2507 .vm_config 2508 .as_ref() 2509 .unwrap() 2510 .lock() 2511 .unwrap() 2512 .net 2513 .is_none()); 2514 2515 assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); 2516 assert_eq!( 2517 vmm.vm_config 2518 .as_ref() 2519 .unwrap() 2520 .lock() 2521 .unwrap() 2522 .net 2523 .clone() 2524 .unwrap() 2525 .len(), 2526 1 2527 ); 2528 assert_eq!( 2529 vmm.vm_config 2530 .as_ref() 2531 .unwrap() 2532 .lock() 2533 .unwrap() 2534 .net 2535 .clone() 2536 .unwrap()[0], 2537 net_config 2538 ); 2539 } 2540 2541 #[test] 2542 fn test_vmm_vm_cold_add_vdpa() { 2543 let mut vmm = create_dummy_vmm(); 2544 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2545 2546 assert!(matches!( 2547 vmm.vm_add_vdpa(vdpa_config.clone()), 2548 Err(VmError::VmNotCreated) 2549 )); 2550 2551 let _ = vmm.vm_create(create_dummy_vm_config()); 2552 assert!(vmm 2553 .vm_config 2554 .as_ref() 2555 .unwrap() 2556 .lock() 2557 .unwrap() 2558 .vdpa 2559 .is_none()); 2560 2561 assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); 2562 assert_eq!( 2563 vmm.vm_config 2564 .as_ref() 2565 .unwrap() 2566 .lock() 2567 .unwrap() 2568 .vdpa 2569 .clone() 2570 .unwrap() 2571 .len(), 2572 1 2573 ); 2574 assert_eq!( 2575 vmm.vm_config 2576 .as_ref() 2577 .unwrap() 2578 .lock() 2579 .unwrap() 2580 .vdpa 2581 .clone() 2582 .unwrap()[0], 2583 vdpa_config 2584 ); 2585 } 2586 2587 #[test] 2588 fn test_vmm_vm_cold_add_vsock() { 2589 let mut vmm = create_dummy_vmm(); 2590 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2591 2592 assert!(matches!( 2593 vmm.vm_add_vsock(vsock_config.clone()), 2594 Err(VmError::VmNotCreated) 2595 )); 2596 2597 let _ = vmm.vm_create(create_dummy_vm_config()); 2598 assert!(vmm 2599 .vm_config 2600 .as_ref() 2601 .unwrap() 2602 .lock() 2603 .unwrap() 2604 .vsock 2605 .is_none()); 2606 2607 assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); 2608 assert_eq!( 2609 vmm.vm_config 2610 .as_ref() 2611 .unwrap() 2612 .lock() 2613 .unwrap() 2614 .vsock 2615 .clone() 2616 .unwrap(), 2617 vsock_config 2618 ); 2619 } 2620 } 2621