1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::net::{TcpListener, TcpStream}; 15 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 16 use std::os::unix::net::{UnixListener, UnixStream}; 17 use std::panic::AssertUnwindSafe; 18 use std::path::PathBuf; 19 use std::rc::Rc; 20 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 21 use std::sync::{Arc, Mutex}; 22 use std::time::Instant; 23 use std::{io, result, thread}; 24 25 use anyhow::anyhow; 26 #[cfg(feature = "dbus_api")] 27 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 28 use api::http::HttpApiHandle; 29 use console_devices::{pre_create_console_devices, ConsoleInfo}; 30 use landlock::LandlockError; 31 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 32 use memory_manager::MemoryManagerSnapshotData; 33 use pci::PciBdf; 34 use seccompiler::{apply_filter, SeccompAction}; 35 use serde::ser::{SerializeStruct, Serializer}; 36 use serde::{Deserialize, Serialize}; 37 use signal_hook::iterator::{Handle, Signals}; 38 use thiserror::Error; 39 use tracer::trace_scoped; 40 use vm_memory::bitmap::{AtomicBitmap, BitmapSlice}; 41 use vm_memory::{ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile}; 42 use vm_migration::protocol::*; 43 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 44 use vmm_sys_util::eventfd::EventFd; 45 use vmm_sys_util::signal::unblock_signal; 46 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 47 48 use crate::api::{ 49 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 50 VmSendMigrationData, VmmPingResponse, 51 }; 52 use crate::config::{add_to_config, RestoreConfig}; 53 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 54 use crate::coredump::GuestDebuggable; 55 use crate::landlock::Landlock; 56 use crate::memory_manager::MemoryManager; 57 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 58 use crate::migration::get_vm_snapshot; 59 use crate::migration::{recv_vm_config, recv_vm_state}; 60 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 61 use crate::vm::{Error as VmError, Vm, VmState}; 62 use crate::vm_config::{ 63 DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, 64 VmConfig, VsockConfig, 65 }; 66 67 mod acpi; 68 pub mod api; 69 mod clone3; 70 pub mod config; 71 pub mod console_devices; 72 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 73 mod coredump; 74 pub mod cpu; 75 pub mod device_manager; 76 pub mod device_tree; 77 #[cfg(feature = "guest_debug")] 78 mod gdb; 79 #[cfg(feature = "igvm")] 80 mod igvm; 81 pub mod interrupt; 82 pub mod landlock; 83 pub mod memory_manager; 84 pub mod migration; 85 mod pci_segment; 86 pub mod seccomp_filters; 87 mod serial_manager; 88 mod sigwinch_listener; 89 pub mod vm; 90 pub mod vm_config; 91 92 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 93 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 94 95 /// Errors associated with VMM management 96 #[derive(Debug, Error)] 97 pub enum Error { 98 /// API request receive error 99 #[error("Error receiving API request: {0}")] 100 ApiRequestRecv(#[source] RecvError), 101 102 /// API response send error 103 #[error("Error sending API request: {0}")] 104 ApiResponseSend(#[source] SendError<ApiResponse>), 105 106 /// Cannot bind to the UNIX domain socket path 107 #[error("Error binding to UNIX domain socket: {0}")] 108 Bind(#[source] io::Error), 109 110 /// Cannot clone EventFd. 111 #[error("Error cloning EventFd: {0}")] 112 EventFdClone(#[source] io::Error), 113 114 /// Cannot create EventFd. 115 #[error("Error creating EventFd: {0}")] 116 EventFdCreate(#[source] io::Error), 117 118 /// Cannot read from EventFd. 119 #[error("Error reading from EventFd: {0}")] 120 EventFdRead(#[source] io::Error), 121 122 /// Cannot create epoll context. 123 #[error("Error creating epoll context: {0}")] 124 Epoll(#[source] io::Error), 125 126 /// Cannot create HTTP thread 127 #[error("Error spawning HTTP thread: {0}")] 128 HttpThreadSpawn(#[source] io::Error), 129 130 /// Cannot create D-Bus thread 131 #[cfg(feature = "dbus_api")] 132 #[error("Error spawning D-Bus thread: {0}")] 133 DBusThreadSpawn(#[source] io::Error), 134 135 /// Cannot start D-Bus session 136 #[cfg(feature = "dbus_api")] 137 #[error("Error starting D-Bus session: {0}")] 138 CreateDBusSession(#[source] zbus::Error), 139 140 /// Cannot create `event-monitor` thread 141 #[error("Error spawning `event-monitor` thread: {0}")] 142 EventMonitorThreadSpawn(#[source] io::Error), 143 144 /// Cannot handle the VM STDIN stream 145 #[error("Error handling VM stdin: {0:?}")] 146 Stdin(VmError), 147 148 /// Cannot handle the VM pty stream 149 #[error("Error handling VM pty: {0:?}")] 150 Pty(VmError), 151 152 /// Cannot reboot the VM 153 #[error("Error rebooting VM: {0:?}")] 154 VmReboot(VmError), 155 156 /// Cannot create VMM thread 157 #[error("Error spawning VMM thread {0:?}")] 158 VmmThreadSpawn(#[source] io::Error), 159 160 /// Cannot shut the VMM down 161 #[error("Error shutting down VMM: {0:?}")] 162 VmmShutdown(VmError), 163 164 /// Cannot create seccomp filter 165 #[error("Error creating seccomp filter: {0}")] 166 CreateSeccompFilter(seccompiler::Error), 167 168 /// Cannot apply seccomp filter 169 #[error("Error applying seccomp filter: {0}")] 170 ApplySeccompFilter(seccompiler::Error), 171 172 /// Error activating virtio devices 173 #[error("Error activating virtio devices: {0:?}")] 174 ActivateVirtioDevices(VmError), 175 176 /// Error creating API server 177 #[error("Error creating API server {0:?}")] 178 CreateApiServer(micro_http::ServerError), 179 180 /// Error binding API server socket 181 #[error("Error creation API server's socket {0:?}")] 182 CreateApiServerSocket(#[source] io::Error), 183 184 #[cfg(feature = "guest_debug")] 185 #[error("Failed to start the GDB thread: {0}")] 186 GdbThreadSpawn(io::Error), 187 188 /// GDB request receive error 189 #[cfg(feature = "guest_debug")] 190 #[error("Error receiving GDB request: {0}")] 191 GdbRequestRecv(#[source] RecvError), 192 193 /// GDB response send error 194 #[cfg(feature = "guest_debug")] 195 #[error("Error sending GDB request: {0}")] 196 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 197 198 #[error("Cannot spawn a signal handler thread: {0}")] 199 SignalHandlerSpawn(#[source] io::Error), 200 201 #[error("Failed to join on threads: {0:?}")] 202 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 203 204 /// Cannot create Landlock object 205 #[error("Error creating landlock object: {0}")] 206 CreateLandlock(LandlockError), 207 208 /// Cannot apply landlock based sandboxing 209 #[error("Error applying landlock: {0}")] 210 ApplyLandlock(LandlockError), 211 } 212 pub type Result<T> = result::Result<T, Error>; 213 214 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 215 #[repr(u64)] 216 pub enum EpollDispatch { 217 Exit = 0, 218 Reset = 1, 219 Api = 2, 220 ActivateVirtioDevices = 3, 221 Debug = 4, 222 Unknown, 223 } 224 225 impl From<u64> for EpollDispatch { 226 fn from(v: u64) -> Self { 227 use EpollDispatch::*; 228 match v { 229 0 => Exit, 230 1 => Reset, 231 2 => Api, 232 3 => ActivateVirtioDevices, 233 4 => Debug, 234 _ => Unknown, 235 } 236 } 237 } 238 239 enum SocketStream { 240 Unix(UnixStream), 241 Tcp(TcpStream), 242 } 243 244 impl Read for SocketStream { 245 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { 246 match self { 247 SocketStream::Unix(stream) => stream.read(buf), 248 SocketStream::Tcp(stream) => stream.read(buf), 249 } 250 } 251 } 252 253 impl Write for SocketStream { 254 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 255 match self { 256 SocketStream::Unix(stream) => stream.write(buf), 257 SocketStream::Tcp(stream) => stream.write(buf), 258 } 259 } 260 261 fn flush(&mut self) -> std::io::Result<()> { 262 match self { 263 SocketStream::Unix(stream) => stream.flush(), 264 SocketStream::Tcp(stream) => stream.flush(), 265 } 266 } 267 } 268 269 impl AsRawFd for SocketStream { 270 fn as_raw_fd(&self) -> RawFd { 271 match self { 272 SocketStream::Unix(s) => s.as_raw_fd(), 273 SocketStream::Tcp(s) => s.as_raw_fd(), 274 } 275 } 276 } 277 278 impl ReadVolatile for SocketStream { 279 fn read_volatile<B: BitmapSlice>( 280 &mut self, 281 buf: &mut VolatileSlice<B>, 282 ) -> std::result::Result<usize, VolatileMemoryError> { 283 match self { 284 SocketStream::Unix(s) => s.read_volatile(buf), 285 SocketStream::Tcp(s) => s.read_volatile(buf), 286 } 287 } 288 289 fn read_exact_volatile<B: BitmapSlice>( 290 &mut self, 291 buf: &mut VolatileSlice<B>, 292 ) -> std::result::Result<(), VolatileMemoryError> { 293 match self { 294 SocketStream::Unix(s) => s.read_exact_volatile(buf), 295 SocketStream::Tcp(s) => s.read_exact_volatile(buf), 296 } 297 } 298 } 299 300 impl WriteVolatile for SocketStream { 301 fn write_volatile<B: BitmapSlice>( 302 &mut self, 303 buf: &VolatileSlice<B>, 304 ) -> std::result::Result<usize, VolatileMemoryError> { 305 match self { 306 SocketStream::Unix(s) => s.write_volatile(buf), 307 SocketStream::Tcp(s) => s.write_volatile(buf), 308 } 309 } 310 311 fn write_all_volatile<B: BitmapSlice>( 312 &mut self, 313 buf: &VolatileSlice<B>, 314 ) -> std::result::Result<(), VolatileMemoryError> { 315 match self { 316 SocketStream::Unix(s) => s.write_all_volatile(buf), 317 SocketStream::Tcp(s) => s.write_all_volatile(buf), 318 } 319 } 320 } 321 322 pub struct EpollContext { 323 epoll_file: File, 324 } 325 326 impl EpollContext { 327 pub fn new() -> result::Result<EpollContext, io::Error> { 328 let epoll_fd = epoll::create(true)?; 329 // Use 'File' to enforce closing on 'epoll_fd' 330 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 331 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 332 333 Ok(EpollContext { epoll_file }) 334 } 335 336 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 337 where 338 T: AsRawFd, 339 { 340 let dispatch_index = token as u64; 341 epoll::ctl( 342 self.epoll_file.as_raw_fd(), 343 epoll::ControlOptions::EPOLL_CTL_ADD, 344 fd.as_raw_fd(), 345 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 346 )?; 347 348 Ok(()) 349 } 350 351 #[cfg(fuzzing)] 352 pub fn add_event_custom<T>( 353 &mut self, 354 fd: &T, 355 id: u64, 356 evts: epoll::Events, 357 ) -> result::Result<(), io::Error> 358 where 359 T: AsRawFd, 360 { 361 epoll::ctl( 362 self.epoll_file.as_raw_fd(), 363 epoll::ControlOptions::EPOLL_CTL_ADD, 364 fd.as_raw_fd(), 365 epoll::Event::new(evts, id), 366 )?; 367 368 Ok(()) 369 } 370 } 371 372 impl AsRawFd for EpollContext { 373 fn as_raw_fd(&self) -> RawFd { 374 self.epoll_file.as_raw_fd() 375 } 376 } 377 378 pub struct PciDeviceInfo { 379 pub id: String, 380 pub bdf: PciBdf, 381 } 382 383 impl Serialize for PciDeviceInfo { 384 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 385 where 386 S: Serializer, 387 { 388 let bdf_str = self.bdf.to_string(); 389 390 // Serialize the structure. 391 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 392 state.serialize_field("id", &self.id)?; 393 state.serialize_field("bdf", &bdf_str)?; 394 state.end() 395 } 396 } 397 398 pub fn feature_list() -> Vec<String> { 399 vec![ 400 #[cfg(feature = "dbus_api")] 401 "dbus_api".to_string(), 402 #[cfg(feature = "dhat-heap")] 403 "dhat-heap".to_string(), 404 #[cfg(feature = "guest_debug")] 405 "guest_debug".to_string(), 406 #[cfg(feature = "igvm")] 407 "igvm".to_string(), 408 #[cfg(feature = "io_uring")] 409 "io_uring".to_string(), 410 #[cfg(feature = "kvm")] 411 "kvm".to_string(), 412 #[cfg(feature = "mshv")] 413 "mshv".to_string(), 414 #[cfg(feature = "sev_snp")] 415 "sev_snp".to_string(), 416 #[cfg(feature = "tdx")] 417 "tdx".to_string(), 418 #[cfg(feature = "tracing")] 419 "tracing".to_string(), 420 ] 421 } 422 423 pub fn start_event_monitor_thread( 424 mut monitor: event_monitor::Monitor, 425 seccomp_action: &SeccompAction, 426 landlock_enable: bool, 427 hypervisor_type: hypervisor::HypervisorType, 428 exit_event: EventFd, 429 ) -> Result<thread::JoinHandle<Result<()>>> { 430 // Retrieve seccomp filter 431 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 432 .map_err(Error::CreateSeccompFilter)?; 433 434 thread::Builder::new() 435 .name("event-monitor".to_owned()) 436 .spawn(move || { 437 // Apply seccomp filter 438 if !seccomp_filter.is_empty() { 439 apply_filter(&seccomp_filter) 440 .map_err(Error::ApplySeccompFilter) 441 .map_err(|e| { 442 error!("Error applying seccomp filter: {:?}", e); 443 exit_event.write(1).ok(); 444 e 445 })?; 446 } 447 if landlock_enable { 448 Landlock::new() 449 .map_err(Error::CreateLandlock)? 450 .restrict_self() 451 .map_err(Error::ApplyLandlock) 452 .map_err(|e| { 453 error!("Error applying landlock to event monitor thread: {:?}", e); 454 exit_event.write(1).ok(); 455 e 456 })?; 457 } 458 459 std::panic::catch_unwind(AssertUnwindSafe(move || { 460 while let Ok(event) = monitor.rx.recv() { 461 let event = Arc::new(event); 462 463 if let Some(ref mut file) = monitor.file { 464 file.write_all(event.as_bytes().as_ref()).ok(); 465 file.write_all(b"\n\n").ok(); 466 } 467 468 for tx in monitor.broadcast.iter() { 469 tx.send(event.clone()).ok(); 470 } 471 } 472 })) 473 .map_err(|_| { 474 error!("`event-monitor` thread panicked"); 475 exit_event.write(1).ok(); 476 }) 477 .ok(); 478 479 Ok(()) 480 }) 481 .map_err(Error::EventMonitorThreadSpawn) 482 } 483 484 #[allow(unused_variables)] 485 #[allow(clippy::too_many_arguments)] 486 pub fn start_vmm_thread( 487 vmm_version: VmmVersionInfo, 488 http_path: &Option<String>, 489 http_fd: Option<RawFd>, 490 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 491 api_event: EventFd, 492 api_sender: Sender<ApiRequest>, 493 api_receiver: Receiver<ApiRequest>, 494 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 495 #[cfg(feature = "guest_debug")] debug_event: EventFd, 496 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 497 exit_event: EventFd, 498 seccomp_action: &SeccompAction, 499 hypervisor: Arc<dyn hypervisor::Hypervisor>, 500 landlock_enable: bool, 501 ) -> Result<VmmThreadHandle> { 502 #[cfg(feature = "guest_debug")] 503 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 504 #[cfg(feature = "guest_debug")] 505 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 506 #[cfg(feature = "guest_debug")] 507 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 508 #[cfg(feature = "guest_debug")] 509 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 510 511 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 512 let hypervisor_type = hypervisor.hypervisor_type(); 513 514 // Retrieve seccomp filter 515 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 516 .map_err(Error::CreateSeccompFilter)?; 517 518 let vmm_seccomp_action = seccomp_action.clone(); 519 let thread = { 520 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 521 thread::Builder::new() 522 .name("vmm".to_string()) 523 .spawn(move || { 524 // Apply seccomp filter for VMM thread. 525 if !vmm_seccomp_filter.is_empty() { 526 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 527 } 528 529 let mut vmm = Vmm::new( 530 vmm_version, 531 api_event, 532 #[cfg(feature = "guest_debug")] 533 debug_event, 534 #[cfg(feature = "guest_debug")] 535 vm_debug_event, 536 vmm_seccomp_action, 537 hypervisor, 538 exit_event, 539 )?; 540 541 vmm.setup_signal_handler(landlock_enable)?; 542 543 vmm.control_loop( 544 Rc::new(api_receiver), 545 #[cfg(feature = "guest_debug")] 546 Rc::new(gdb_receiver), 547 ) 548 }) 549 .map_err(Error::VmmThreadSpawn)? 550 }; 551 552 // The VMM thread is started, we can start the dbus thread 553 // and start serving HTTP requests 554 #[cfg(feature = "dbus_api")] 555 let dbus_shutdown_chs = match dbus_options { 556 Some(opts) => { 557 let (_, chs) = api::start_dbus_thread( 558 opts, 559 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 560 api_sender.clone(), 561 seccomp_action, 562 exit_event.try_clone().map_err(Error::EventFdClone)?, 563 hypervisor_type, 564 )?; 565 Some(chs) 566 } 567 None => None, 568 }; 569 570 let http_api_handle = if let Some(http_path) = http_path { 571 Some(api::start_http_path_thread( 572 http_path, 573 api_event_clone, 574 api_sender, 575 seccomp_action, 576 exit_event, 577 hypervisor_type, 578 landlock_enable, 579 )?) 580 } else if let Some(http_fd) = http_fd { 581 Some(api::start_http_fd_thread( 582 http_fd, 583 api_event_clone, 584 api_sender, 585 seccomp_action, 586 exit_event, 587 hypervisor_type, 588 landlock_enable, 589 )?) 590 } else { 591 None 592 }; 593 594 #[cfg(feature = "guest_debug")] 595 if let Some(debug_path) = debug_path { 596 let target = gdb::GdbStub::new( 597 gdb_sender, 598 gdb_debug_event, 599 gdb_vm_debug_event, 600 gdb_hw_breakpoints, 601 ); 602 thread::Builder::new() 603 .name("gdb".to_owned()) 604 .spawn(move || gdb::gdb_thread(target, &debug_path)) 605 .map_err(Error::GdbThreadSpawn)?; 606 } 607 608 Ok(VmmThreadHandle { 609 thread_handle: thread, 610 #[cfg(feature = "dbus_api")] 611 dbus_shutdown_chs, 612 http_api_handle, 613 }) 614 } 615 616 #[derive(Clone, Deserialize, Serialize)] 617 struct VmMigrationConfig { 618 vm_config: Arc<Mutex<VmConfig>>, 619 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 620 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 621 memory_manager_data: MemoryManagerSnapshotData, 622 } 623 624 #[derive(Debug, Clone)] 625 pub struct VmmVersionInfo { 626 pub build_version: String, 627 pub version: String, 628 } 629 630 impl VmmVersionInfo { 631 pub fn new(build_version: &str, version: &str) -> Self { 632 Self { 633 build_version: build_version.to_owned(), 634 version: version.to_owned(), 635 } 636 } 637 } 638 639 pub struct VmmThreadHandle { 640 pub thread_handle: thread::JoinHandle<Result<()>>, 641 #[cfg(feature = "dbus_api")] 642 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 643 pub http_api_handle: Option<HttpApiHandle>, 644 } 645 646 pub struct Vmm { 647 epoll: EpollContext, 648 exit_evt: EventFd, 649 reset_evt: EventFd, 650 api_evt: EventFd, 651 #[cfg(feature = "guest_debug")] 652 debug_evt: EventFd, 653 #[cfg(feature = "guest_debug")] 654 vm_debug_evt: EventFd, 655 version: VmmVersionInfo, 656 vm: Option<Vm>, 657 vm_config: Option<Arc<Mutex<VmConfig>>>, 658 seccomp_action: SeccompAction, 659 hypervisor: Arc<dyn hypervisor::Hypervisor>, 660 activate_evt: EventFd, 661 signals: Option<Handle>, 662 threads: Vec<thread::JoinHandle<()>>, 663 original_termios_opt: Arc<Mutex<Option<termios>>>, 664 console_resize_pipe: Option<Arc<File>>, 665 console_info: Option<ConsoleInfo>, 666 } 667 668 impl Vmm { 669 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 670 671 fn signal_handler( 672 mut signals: Signals, 673 original_termios_opt: Arc<Mutex<Option<termios>>>, 674 exit_evt: &EventFd, 675 ) { 676 for sig in &Self::HANDLED_SIGNALS { 677 unblock_signal(*sig).unwrap(); 678 } 679 680 for signal in signals.forever() { 681 match signal { 682 SIGTERM | SIGINT => { 683 if exit_evt.write(1).is_err() { 684 // Resetting the terminal is usually done as the VMM exits 685 if let Ok(lock) = original_termios_opt.lock() { 686 if let Some(termios) = *lock { 687 // SAFETY: FFI call 688 let _ = unsafe { 689 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 690 }; 691 } 692 } else { 693 warn!("Failed to lock original termios"); 694 } 695 696 std::process::exit(1); 697 } 698 } 699 _ => (), 700 } 701 } 702 } 703 704 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 705 let signals = Signals::new(Self::HANDLED_SIGNALS); 706 match signals { 707 Ok(signals) => { 708 self.signals = Some(signals.handle()); 709 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 710 let original_termios_opt = Arc::clone(&self.original_termios_opt); 711 712 let signal_handler_seccomp_filter = get_seccomp_filter( 713 &self.seccomp_action, 714 Thread::SignalHandler, 715 self.hypervisor.hypervisor_type(), 716 ) 717 .map_err(Error::CreateSeccompFilter)?; 718 self.threads.push( 719 thread::Builder::new() 720 .name("vmm_signal_handler".to_string()) 721 .spawn(move || { 722 if !signal_handler_seccomp_filter.is_empty() { 723 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 724 .map_err(Error::ApplySeccompFilter) 725 { 726 error!("Error applying seccomp filter: {:?}", e); 727 exit_evt.write(1).ok(); 728 return; 729 } 730 } 731 if landlock_enable{ 732 match Landlock::new() { 733 Ok(landlock) => { 734 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 735 error!("Error applying Landlock to signal handler thread: {:?}", e); 736 exit_evt.write(1).ok(); 737 }); 738 } 739 Err(e) => { 740 error!("Error creating Landlock object: {:?}", e); 741 exit_evt.write(1).ok(); 742 } 743 }; 744 } 745 746 std::panic::catch_unwind(AssertUnwindSafe(|| { 747 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 748 })) 749 .map_err(|_| { 750 error!("vmm signal_handler thread panicked"); 751 exit_evt.write(1).ok() 752 }) 753 .ok(); 754 }) 755 .map_err(Error::SignalHandlerSpawn)?, 756 ); 757 } 758 Err(e) => error!("Signal not found {}", e), 759 } 760 Ok(()) 761 } 762 763 #[allow(clippy::too_many_arguments)] 764 fn new( 765 vmm_version: VmmVersionInfo, 766 api_evt: EventFd, 767 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 768 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 769 seccomp_action: SeccompAction, 770 hypervisor: Arc<dyn hypervisor::Hypervisor>, 771 exit_evt: EventFd, 772 ) -> Result<Self> { 773 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 774 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 775 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 776 777 epoll 778 .add_event(&exit_evt, EpollDispatch::Exit) 779 .map_err(Error::Epoll)?; 780 781 epoll 782 .add_event(&reset_evt, EpollDispatch::Reset) 783 .map_err(Error::Epoll)?; 784 785 epoll 786 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 787 .map_err(Error::Epoll)?; 788 789 epoll 790 .add_event(&api_evt, EpollDispatch::Api) 791 .map_err(Error::Epoll)?; 792 793 #[cfg(feature = "guest_debug")] 794 epoll 795 .add_event(&debug_evt, EpollDispatch::Debug) 796 .map_err(Error::Epoll)?; 797 798 Ok(Vmm { 799 epoll, 800 exit_evt, 801 reset_evt, 802 api_evt, 803 #[cfg(feature = "guest_debug")] 804 debug_evt, 805 #[cfg(feature = "guest_debug")] 806 vm_debug_evt, 807 version: vmm_version, 808 vm: None, 809 vm_config: None, 810 seccomp_action, 811 hypervisor, 812 activate_evt, 813 signals: None, 814 threads: vec![], 815 original_termios_opt: Arc::new(Mutex::new(None)), 816 console_resize_pipe: None, 817 console_info: None, 818 }) 819 } 820 821 fn vm_receive_config<T>( 822 &mut self, 823 req: &Request, 824 socket: &mut T, 825 existing_memory_files: Option<HashMap<u32, File>>, 826 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 827 where 828 T: Read + Write, 829 { 830 // Read in config data along with memory manager data 831 let mut data: Vec<u8> = Vec::new(); 832 data.resize_with(req.length() as usize, Default::default); 833 socket 834 .read_exact(&mut data) 835 .map_err(MigratableError::MigrateSocket)?; 836 837 let vm_migration_config: VmMigrationConfig = 838 serde_json::from_slice(&data).map_err(|e| { 839 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 840 })?; 841 842 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 843 self.vm_check_cpuid_compatibility( 844 &vm_migration_config.vm_config, 845 &vm_migration_config.common_cpuid, 846 )?; 847 848 let config = vm_migration_config.vm_config.clone(); 849 self.vm_config = Some(vm_migration_config.vm_config); 850 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 851 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 852 })?); 853 854 if self 855 .vm_config 856 .as_ref() 857 .unwrap() 858 .lock() 859 .unwrap() 860 .landlock_enable 861 { 862 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 863 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 864 })?; 865 } 866 867 let vm = Vm::create_hypervisor_vm( 868 &self.hypervisor, 869 #[cfg(feature = "tdx")] 870 false, 871 #[cfg(feature = "sev_snp")] 872 false, 873 #[cfg(feature = "sev_snp")] 874 config.lock().unwrap().memory.total_size(), 875 ) 876 .map_err(|e| { 877 MigratableError::MigrateReceive(anyhow!( 878 "Error creating hypervisor VM from snapshot: {:?}", 879 e 880 )) 881 })?; 882 883 let phys_bits = 884 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 885 886 let memory_manager = MemoryManager::new( 887 vm, 888 &config.lock().unwrap().memory.clone(), 889 None, 890 phys_bits, 891 #[cfg(feature = "tdx")] 892 false, 893 Some(&vm_migration_config.memory_manager_data), 894 existing_memory_files, 895 #[cfg(target_arch = "x86_64")] 896 None, 897 ) 898 .map_err(|e| { 899 MigratableError::MigrateReceive(anyhow!( 900 "Error creating MemoryManager from snapshot: {:?}", 901 e 902 )) 903 })?; 904 905 Response::ok().write_to(socket)?; 906 907 Ok(memory_manager) 908 } 909 910 fn vm_receive_state<T>( 911 &mut self, 912 req: &Request, 913 socket: &mut T, 914 mm: Arc<Mutex<MemoryManager>>, 915 ) -> std::result::Result<(), MigratableError> 916 where 917 T: Read + Write, 918 { 919 // Read in state data 920 let mut data: Vec<u8> = Vec::new(); 921 data.resize_with(req.length() as usize, Default::default); 922 socket 923 .read_exact(&mut data) 924 .map_err(MigratableError::MigrateSocket)?; 925 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 926 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 927 })?; 928 929 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 930 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 931 })?; 932 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 933 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 934 })?; 935 #[cfg(feature = "guest_debug")] 936 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 937 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 938 })?; 939 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 940 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 941 })?; 942 943 let timestamp = Instant::now(); 944 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 945 let mut vm = Vm::new_from_memory_manager( 946 self.vm_config.clone().unwrap(), 947 mm, 948 hypervisor_vm, 949 exit_evt, 950 reset_evt, 951 #[cfg(feature = "guest_debug")] 952 debug_evt, 953 &self.seccomp_action, 954 self.hypervisor.clone(), 955 activate_evt, 956 timestamp, 957 self.console_info.clone(), 958 self.console_resize_pipe.clone(), 959 Arc::clone(&self.original_termios_opt), 960 Some(snapshot), 961 ) 962 .map_err(|e| { 963 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 964 })?; 965 966 // Create VM 967 vm.restore().map_err(|e| { 968 Response::error().write_to(socket).ok(); 969 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 970 })?; 971 self.vm = Some(vm); 972 973 Response::ok().write_to(socket)?; 974 975 Ok(()) 976 } 977 978 fn vm_receive_memory<T>( 979 &mut self, 980 req: &Request, 981 socket: &mut T, 982 memory_manager: &mut MemoryManager, 983 ) -> std::result::Result<(), MigratableError> 984 where 985 T: Read + ReadVolatile + Write, 986 { 987 // Read table 988 let table = MemoryRangeTable::read_from(socket, req.length())?; 989 990 // And then read the memory itself 991 memory_manager 992 .receive_memory_regions(&table, socket) 993 .inspect_err(|_| { 994 Response::error().write_to(socket).ok(); 995 })?; 996 Response::ok().write_to(socket)?; 997 Ok(()) 998 } 999 1000 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 1001 url.strip_prefix("unix:") 1002 .ok_or_else(|| { 1003 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 1004 }) 1005 .map(|s| s.into()) 1006 } 1007 1008 fn send_migration_socket( 1009 destination_url: &str, 1010 ) -> std::result::Result<SocketStream, MigratableError> { 1011 if let Some(address) = destination_url.strip_prefix("tcp:") { 1012 info!("Connecting to TCP socket at {}", address); 1013 1014 let socket = TcpStream::connect(address).map_err(|e| { 1015 MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) 1016 })?; 1017 1018 Ok(SocketStream::Tcp(socket)) 1019 } else { 1020 let path = Vmm::socket_url_to_path(destination_url)?; 1021 info!("Connecting to UNIX socket at {:?}", path); 1022 1023 let socket = UnixStream::connect(&path).map_err(|e| { 1024 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 1025 })?; 1026 1027 Ok(SocketStream::Unix(socket)) 1028 } 1029 } 1030 1031 fn receive_migration_socket( 1032 receiver_url: &str, 1033 ) -> std::result::Result<SocketStream, MigratableError> { 1034 if let Some(address) = receiver_url.strip_prefix("tcp:") { 1035 let listener = TcpListener::bind(address).map_err(|e| { 1036 MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) 1037 })?; 1038 1039 let (socket, _addr) = listener.accept().map_err(|e| { 1040 MigratableError::MigrateReceive(anyhow!( 1041 "Error accepting connection on TCP socket: {}", 1042 e 1043 )) 1044 })?; 1045 1046 Ok(SocketStream::Tcp(socket)) 1047 } else { 1048 let path = Vmm::socket_url_to_path(receiver_url)?; 1049 let listener = UnixListener::bind(&path).map_err(|e| { 1050 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1051 })?; 1052 1053 let (socket, _addr) = listener.accept().map_err(|e| { 1054 MigratableError::MigrateReceive(anyhow!( 1055 "Error accepting connection on UNIX socket: {}", 1056 e 1057 )) 1058 })?; 1059 1060 // Remove the UNIX socket file after accepting the connection 1061 std::fs::remove_file(&path).map_err(|e| { 1062 MigratableError::MigrateReceive(anyhow!("Error removing UNIX socket file: {}", e)) 1063 })?; 1064 1065 Ok(SocketStream::Unix(socket)) 1066 } 1067 } 1068 1069 // Returns true if there were dirty pages to send 1070 fn vm_maybe_send_dirty_pages( 1071 vm: &mut Vm, 1072 socket: &mut SocketStream, 1073 ) -> result::Result<bool, MigratableError> { 1074 // Send (dirty) memory table 1075 let table = vm.dirty_log()?; 1076 1077 // But if there are no regions go straight to pause 1078 if table.regions().is_empty() { 1079 return Ok(false); 1080 } 1081 1082 Request::memory(table.length()).write_to(socket).unwrap(); 1083 table.write_to(socket)?; 1084 // And then the memory itself 1085 vm.send_memory_regions(&table, socket)?; 1086 Response::read_from(socket)?.ok_or_abandon( 1087 socket, 1088 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1089 )?; 1090 1091 Ok(true) 1092 } 1093 1094 fn send_migration( 1095 vm: &mut Vm, 1096 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 1097 dyn hypervisor::Hypervisor, 1098 >, 1099 send_data_migration: VmSendMigrationData, 1100 ) -> result::Result<(), MigratableError> { 1101 // Set up the socket connection 1102 let mut socket = Self::send_migration_socket(&send_data_migration.destination_url)?; 1103 1104 // Start the migration 1105 Request::start().write_to(&mut socket)?; 1106 Response::read_from(&mut socket)?.ok_or_abandon( 1107 &mut socket, 1108 MigratableError::MigrateSend(anyhow!("Error starting migration")), 1109 )?; 1110 1111 // Send config 1112 let vm_config = vm.get_config(); 1113 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1114 let common_cpuid = { 1115 #[cfg(feature = "tdx")] 1116 if vm_config.lock().unwrap().is_tdx_enabled() { 1117 return Err(MigratableError::MigrateSend(anyhow!( 1118 "Live Migration is not supported when TDX is enabled" 1119 ))); 1120 }; 1121 1122 let amx = vm_config.lock().unwrap().cpus.features.amx; 1123 let phys_bits = 1124 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 1125 arch::generate_common_cpuid( 1126 &hypervisor, 1127 &arch::CpuidConfig { 1128 sgx_epc_sections: None, 1129 phys_bits, 1130 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 1131 #[cfg(feature = "tdx")] 1132 tdx: false, 1133 amx, 1134 }, 1135 ) 1136 .map_err(|e| { 1137 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 1138 })? 1139 }; 1140 1141 if send_data_migration.local { 1142 match &mut socket { 1143 SocketStream::Unix(unix_socket) => { 1144 // Proceed with sending memory file descriptors over UNIX socket 1145 vm.send_memory_fds(unix_socket)?; 1146 } 1147 SocketStream::Tcp(_tcp_socket) => { 1148 return Err(MigratableError::MigrateSend(anyhow!( 1149 "--local option is not supported with TCP sockets", 1150 ))); 1151 } 1152 } 1153 } 1154 1155 let vm_migration_config = VmMigrationConfig { 1156 vm_config, 1157 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1158 common_cpuid, 1159 memory_manager_data: vm.memory_manager_data(), 1160 }; 1161 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1162 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1163 socket 1164 .write_all(&config_data) 1165 .map_err(MigratableError::MigrateSocket)?; 1166 Response::read_from(&mut socket)?.ok_or_abandon( 1167 &mut socket, 1168 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1169 )?; 1170 1171 // Let every Migratable object know about the migration being started. 1172 vm.start_migration()?; 1173 1174 if send_data_migration.local { 1175 // Now pause VM 1176 vm.pause()?; 1177 } else { 1178 // Start logging dirty pages 1179 vm.start_dirty_log()?; 1180 1181 // Send memory table 1182 let table = vm.memory_range_table()?; 1183 Request::memory(table.length()) 1184 .write_to(&mut socket) 1185 .unwrap(); 1186 table.write_to(&mut socket)?; 1187 // And then the memory itself 1188 vm.send_memory_regions(&table, &mut socket)?; 1189 Response::read_from(&mut socket)?.ok_or_abandon( 1190 &mut socket, 1191 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1192 )?; 1193 1194 // Try at most 5 passes of dirty memory sending 1195 const MAX_DIRTY_MIGRATIONS: usize = 5; 1196 for i in 0..MAX_DIRTY_MIGRATIONS { 1197 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1198 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1199 break; 1200 } 1201 } 1202 1203 // Now pause VM 1204 vm.pause()?; 1205 1206 // Send last batch of dirty pages 1207 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1208 1209 // Stop logging dirty pages 1210 vm.stop_dirty_log()?; 1211 } 1212 // Capture snapshot and send it 1213 let vm_snapshot = vm.snapshot()?; 1214 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1215 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1216 socket 1217 .write_all(&snapshot_data) 1218 .map_err(MigratableError::MigrateSocket)?; 1219 Response::read_from(&mut socket)?.ok_or_abandon( 1220 &mut socket, 1221 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1222 )?; 1223 // Complete the migration 1224 Request::complete().write_to(&mut socket)?; 1225 Response::read_from(&mut socket)?.ok_or_abandon( 1226 &mut socket, 1227 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1228 )?; 1229 1230 info!("Migration complete"); 1231 1232 // Let every Migratable object know about the migration being complete 1233 vm.complete_migration() 1234 } 1235 1236 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1237 fn vm_check_cpuid_compatibility( 1238 &self, 1239 src_vm_config: &Arc<Mutex<VmConfig>>, 1240 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1241 ) -> result::Result<(), MigratableError> { 1242 #[cfg(feature = "tdx")] 1243 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1244 return Err(MigratableError::MigrateReceive(anyhow!( 1245 "Live Migration is not supported when TDX is enabled" 1246 ))); 1247 }; 1248 1249 // We check the `CPUID` compatibility of between the source vm and destination, which is 1250 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1251 let dest_cpuid = &{ 1252 let vm_config = &src_vm_config.lock().unwrap(); 1253 1254 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1255 arch::generate_common_cpuid( 1256 &self.hypervisor.clone(), 1257 &arch::CpuidConfig { 1258 sgx_epc_sections: None, 1259 phys_bits, 1260 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1261 #[cfg(feature = "tdx")] 1262 tdx: false, 1263 amx: vm_config.cpus.features.amx, 1264 }, 1265 ) 1266 .map_err(|e| { 1267 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1268 })? 1269 }; 1270 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1271 MigratableError::MigrateReceive(anyhow!( 1272 "Error checking cpu feature compatibility': {:?}", 1273 e 1274 )) 1275 }) 1276 } 1277 1278 fn vm_restore( 1279 &mut self, 1280 source_url: &str, 1281 vm_config: Arc<Mutex<VmConfig>>, 1282 prefault: bool, 1283 ) -> std::result::Result<(), VmError> { 1284 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1285 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1286 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1287 1288 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1289 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1290 .map_err(VmError::Restore)?; 1291 1292 self.vm_config = Some(Arc::clone(&vm_config)); 1293 1294 // Always re-populate the 'console_info' based on the new 'vm_config' 1295 self.console_info = 1296 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1297 1298 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1299 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1300 #[cfg(feature = "guest_debug")] 1301 let debug_evt = self 1302 .vm_debug_evt 1303 .try_clone() 1304 .map_err(VmError::EventFdClone)?; 1305 let activate_evt = self 1306 .activate_evt 1307 .try_clone() 1308 .map_err(VmError::EventFdClone)?; 1309 1310 let vm = Vm::new( 1311 vm_config, 1312 exit_evt, 1313 reset_evt, 1314 #[cfg(feature = "guest_debug")] 1315 debug_evt, 1316 &self.seccomp_action, 1317 self.hypervisor.clone(), 1318 activate_evt, 1319 self.console_info.clone(), 1320 self.console_resize_pipe.clone(), 1321 Arc::clone(&self.original_termios_opt), 1322 Some(snapshot), 1323 Some(source_url), 1324 Some(prefault), 1325 )?; 1326 self.vm = Some(vm); 1327 1328 if self 1329 .vm_config 1330 .as_ref() 1331 .unwrap() 1332 .lock() 1333 .unwrap() 1334 .landlock_enable 1335 { 1336 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1337 .map_err(VmError::ApplyLandlock)?; 1338 } 1339 1340 // Now we can restore the rest of the VM. 1341 if let Some(ref mut vm) = self.vm { 1342 vm.restore() 1343 } else { 1344 Err(VmError::VmNotCreated) 1345 } 1346 } 1347 1348 fn control_loop( 1349 &mut self, 1350 api_receiver: Rc<Receiver<ApiRequest>>, 1351 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1352 ) -> Result<()> { 1353 const EPOLL_EVENTS_LEN: usize = 100; 1354 1355 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1356 let epoll_fd = self.epoll.as_raw_fd(); 1357 1358 'outer: loop { 1359 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1360 Ok(res) => res, 1361 Err(e) => { 1362 if e.kind() == io::ErrorKind::Interrupted { 1363 // It's well defined from the epoll_wait() syscall 1364 // documentation that the epoll loop can be interrupted 1365 // before any of the requested events occurred or the 1366 // timeout expired. In both those cases, epoll_wait() 1367 // returns an error of type EINTR, but this should not 1368 // be considered as a regular error. Instead it is more 1369 // appropriate to retry, by calling into epoll_wait(). 1370 continue; 1371 } 1372 return Err(Error::Epoll(e)); 1373 } 1374 }; 1375 1376 for event in events.iter().take(num_events) { 1377 let dispatch_event: EpollDispatch = event.data.into(); 1378 match dispatch_event { 1379 EpollDispatch::Unknown => { 1380 let event = event.data; 1381 warn!("Unknown VMM loop event: {}", event); 1382 } 1383 EpollDispatch::Exit => { 1384 info!("VM exit event"); 1385 // Consume the event. 1386 self.exit_evt.read().map_err(Error::EventFdRead)?; 1387 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1388 1389 break 'outer; 1390 } 1391 EpollDispatch::Reset => { 1392 info!("VM reset event"); 1393 // Consume the event. 1394 self.reset_evt.read().map_err(Error::EventFdRead)?; 1395 self.vm_reboot().map_err(Error::VmReboot)?; 1396 } 1397 EpollDispatch::ActivateVirtioDevices => { 1398 if let Some(ref vm) = self.vm { 1399 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1400 info!( 1401 "Trying to activate pending virtio devices: count = {}", 1402 count 1403 ); 1404 vm.activate_virtio_devices() 1405 .map_err(Error::ActivateVirtioDevices)?; 1406 } 1407 } 1408 EpollDispatch::Api => { 1409 // Consume the events. 1410 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1411 // Read from the API receiver channel 1412 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1413 1414 if api_request(self)? { 1415 break 'outer; 1416 } 1417 } 1418 } 1419 #[cfg(feature = "guest_debug")] 1420 EpollDispatch::Debug => { 1421 // Consume the events. 1422 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1423 // Read from the API receiver channel 1424 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1425 1426 let response = if let Some(ref mut vm) = self.vm { 1427 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1428 } else { 1429 Err(VmError::VmNotRunning) 1430 } 1431 .map_err(gdb::Error::Vm); 1432 1433 gdb_request 1434 .sender 1435 .send(response) 1436 .map_err(Error::GdbResponseSend)?; 1437 } 1438 } 1439 #[cfg(not(feature = "guest_debug"))] 1440 EpollDispatch::Debug => {} 1441 } 1442 } 1443 } 1444 1445 // Trigger the termination of the signal_handler thread 1446 if let Some(signals) = self.signals.take() { 1447 signals.close(); 1448 } 1449 1450 // Wait for all the threads to finish 1451 for thread in self.threads.drain(..) { 1452 thread.join().map_err(Error::ThreadCleanup)? 1453 } 1454 1455 Ok(()) 1456 } 1457 } 1458 1459 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1460 vm_config.lock().unwrap().apply_landlock()?; 1461 Ok(()) 1462 } 1463 1464 impl RequestHandler for Vmm { 1465 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1466 // We only store the passed VM config. 1467 // The VM will be created when being asked to boot it. 1468 if self.vm_config.is_none() { 1469 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1470 self.console_info = 1471 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1472 1473 if self 1474 .vm_config 1475 .as_ref() 1476 .unwrap() 1477 .lock() 1478 .unwrap() 1479 .landlock_enable 1480 { 1481 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1482 .map_err(VmError::ApplyLandlock)?; 1483 } 1484 Ok(()) 1485 } else { 1486 Err(VmError::VmAlreadyCreated) 1487 } 1488 } 1489 1490 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1491 tracer::start(); 1492 info!("Booting VM"); 1493 event!("vm", "booting"); 1494 let r = { 1495 trace_scoped!("vm_boot"); 1496 // If we don't have a config, we cannot boot a VM. 1497 if self.vm_config.is_none() { 1498 return Err(VmError::VmMissingConfig); 1499 }; 1500 1501 // console_info is set to None in vm_shutdown. re-populate here if empty 1502 if self.console_info.is_none() { 1503 self.console_info = 1504 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1505 } 1506 1507 // Create a new VM if we don't have one yet. 1508 if self.vm.is_none() { 1509 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1510 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1511 #[cfg(feature = "guest_debug")] 1512 let vm_debug_evt = self 1513 .vm_debug_evt 1514 .try_clone() 1515 .map_err(VmError::EventFdClone)?; 1516 let activate_evt = self 1517 .activate_evt 1518 .try_clone() 1519 .map_err(VmError::EventFdClone)?; 1520 1521 if let Some(ref vm_config) = self.vm_config { 1522 let vm = Vm::new( 1523 Arc::clone(vm_config), 1524 exit_evt, 1525 reset_evt, 1526 #[cfg(feature = "guest_debug")] 1527 vm_debug_evt, 1528 &self.seccomp_action, 1529 self.hypervisor.clone(), 1530 activate_evt, 1531 self.console_info.clone(), 1532 self.console_resize_pipe.clone(), 1533 Arc::clone(&self.original_termios_opt), 1534 None, 1535 None, 1536 None, 1537 )?; 1538 1539 self.vm = Some(vm); 1540 } 1541 } 1542 1543 // Now we can boot the VM. 1544 if let Some(ref mut vm) = self.vm { 1545 vm.boot() 1546 } else { 1547 Err(VmError::VmNotCreated) 1548 } 1549 }; 1550 tracer::end(); 1551 if r.is_ok() { 1552 event!("vm", "booted"); 1553 } 1554 r 1555 } 1556 1557 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1558 if let Some(ref mut vm) = self.vm { 1559 vm.pause().map_err(VmError::Pause) 1560 } else { 1561 Err(VmError::VmNotRunning) 1562 } 1563 } 1564 1565 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1566 if let Some(ref mut vm) = self.vm { 1567 vm.resume().map_err(VmError::Resume) 1568 } else { 1569 Err(VmError::VmNotRunning) 1570 } 1571 } 1572 1573 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1574 if let Some(ref mut vm) = self.vm { 1575 // Drain console_info so that FDs are not reused 1576 let _ = self.console_info.take(); 1577 vm.snapshot() 1578 .map_err(VmError::Snapshot) 1579 .and_then(|snapshot| { 1580 vm.send(&snapshot, destination_url) 1581 .map_err(VmError::SnapshotSend) 1582 }) 1583 } else { 1584 Err(VmError::VmNotRunning) 1585 } 1586 } 1587 1588 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1589 if self.vm.is_some() || self.vm_config.is_some() { 1590 return Err(VmError::VmAlreadyCreated); 1591 } 1592 1593 let source_url = restore_cfg.source_url.as_path().to_str(); 1594 if source_url.is_none() { 1595 return Err(VmError::InvalidRestoreSourceUrl); 1596 } 1597 // Safe to unwrap as we checked it was Some(&str). 1598 let source_url = source_url.unwrap(); 1599 1600 let vm_config = Arc::new(Mutex::new( 1601 recv_vm_config(source_url).map_err(VmError::Restore)?, 1602 )); 1603 restore_cfg 1604 .validate(&vm_config.lock().unwrap().clone()) 1605 .map_err(VmError::ConfigValidation)?; 1606 1607 // Update VM's net configurations with new fds received for restore operation 1608 if let (Some(restored_nets), Some(vm_net_configs)) = 1609 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1610 { 1611 for net in restored_nets.iter() { 1612 for net_config in vm_net_configs.iter_mut() { 1613 // update only if the net dev is backed by FDs 1614 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1615 net_config.fds.clone_from(&net.fds); 1616 } 1617 } 1618 } 1619 } 1620 1621 self.vm_restore(source_url, vm_config, restore_cfg.prefault) 1622 .map_err(|vm_restore_err| { 1623 error!("VM Restore failed: {:?}", vm_restore_err); 1624 1625 // Cleanup the VM being created while vm restore 1626 if let Err(e) = self.vm_delete() { 1627 return e; 1628 } 1629 1630 vm_restore_err 1631 }) 1632 } 1633 1634 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1635 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1636 if let Some(ref mut vm) = self.vm { 1637 vm.coredump(destination_url).map_err(VmError::Coredump) 1638 } else { 1639 Err(VmError::VmNotRunning) 1640 } 1641 } 1642 1643 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1644 let r = if let Some(ref mut vm) = self.vm.take() { 1645 // Drain console_info so that the FDs are not reused 1646 let _ = self.console_info.take(); 1647 vm.shutdown() 1648 } else { 1649 Err(VmError::VmNotRunning) 1650 }; 1651 1652 if r.is_ok() { 1653 event!("vm", "shutdown"); 1654 } 1655 1656 r 1657 } 1658 1659 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1660 event!("vm", "rebooting"); 1661 1662 // First we stop the current VM 1663 let config = if let Some(mut vm) = self.vm.take() { 1664 let config = vm.get_config(); 1665 vm.shutdown()?; 1666 config 1667 } else { 1668 return Err(VmError::VmNotCreated); 1669 }; 1670 1671 // vm.shutdown() closes all the console devices, so set console_info to None 1672 // so that the closed FD #s are not reused. 1673 let _ = self.console_info.take(); 1674 1675 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1676 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1677 #[cfg(feature = "guest_debug")] 1678 let debug_evt = self 1679 .vm_debug_evt 1680 .try_clone() 1681 .map_err(VmError::EventFdClone)?; 1682 let activate_evt = self 1683 .activate_evt 1684 .try_clone() 1685 .map_err(VmError::EventFdClone)?; 1686 1687 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1688 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1689 // during the boot process. 1690 if self.reset_evt.read().is_ok() { 1691 warn!("Spurious second reset event received. Ignoring."); 1692 } 1693 1694 self.console_info = 1695 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1696 1697 // Then we create the new VM 1698 let mut vm = Vm::new( 1699 config, 1700 exit_evt, 1701 reset_evt, 1702 #[cfg(feature = "guest_debug")] 1703 debug_evt, 1704 &self.seccomp_action, 1705 self.hypervisor.clone(), 1706 activate_evt, 1707 self.console_info.clone(), 1708 self.console_resize_pipe.clone(), 1709 Arc::clone(&self.original_termios_opt), 1710 None, 1711 None, 1712 None, 1713 )?; 1714 1715 // And we boot it 1716 vm.boot()?; 1717 1718 self.vm = Some(vm); 1719 1720 event!("vm", "rebooted"); 1721 1722 Ok(()) 1723 } 1724 1725 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1726 match &self.vm_config { 1727 Some(vm_config) => { 1728 let state = match &self.vm { 1729 Some(vm) => vm.get_state()?, 1730 None => VmState::Created, 1731 }; 1732 let config = vm_config.lock().unwrap().clone(); 1733 1734 let mut memory_actual_size = config.memory.total_size(); 1735 if let Some(vm) = &self.vm { 1736 memory_actual_size -= vm.balloon_size(); 1737 } 1738 1739 let device_tree = self 1740 .vm 1741 .as_ref() 1742 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1743 1744 Ok(VmInfoResponse { 1745 config: Box::new(config), 1746 state, 1747 memory_actual_size, 1748 device_tree, 1749 }) 1750 } 1751 None => Err(VmError::VmNotCreated), 1752 } 1753 } 1754 1755 fn vmm_ping(&self) -> VmmPingResponse { 1756 let VmmVersionInfo { 1757 build_version, 1758 version, 1759 } = self.version.clone(); 1760 1761 VmmPingResponse { 1762 build_version, 1763 version, 1764 pid: std::process::id() as i64, 1765 features: feature_list(), 1766 } 1767 } 1768 1769 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1770 if self.vm_config.is_none() { 1771 return Ok(()); 1772 } 1773 1774 // If a VM is booted, we first try to shut it down. 1775 if self.vm.is_some() { 1776 self.vm_shutdown()?; 1777 } 1778 1779 self.vm_config = None; 1780 1781 event!("vm", "deleted"); 1782 1783 Ok(()) 1784 } 1785 1786 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1787 self.vm_delete()?; 1788 event!("vmm", "shutdown"); 1789 Ok(()) 1790 } 1791 1792 fn vm_resize( 1793 &mut self, 1794 desired_vcpus: Option<u8>, 1795 desired_ram: Option<u64>, 1796 desired_balloon: Option<u64>, 1797 ) -> result::Result<(), VmError> { 1798 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1799 1800 if let Some(ref mut vm) = self.vm { 1801 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1802 error!("Error when resizing VM: {:?}", e); 1803 Err(e) 1804 } else { 1805 Ok(()) 1806 } 1807 } else { 1808 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1809 if let Some(desired_vcpus) = desired_vcpus { 1810 config.cpus.boot_vcpus = desired_vcpus; 1811 } 1812 if let Some(desired_ram) = desired_ram { 1813 config.memory.size = desired_ram; 1814 } 1815 if let Some(desired_balloon) = desired_balloon { 1816 if let Some(balloon_config) = &mut config.balloon { 1817 balloon_config.size = desired_balloon; 1818 } 1819 } 1820 Ok(()) 1821 } 1822 } 1823 1824 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1825 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1826 1827 if let Some(ref mut vm) = self.vm { 1828 if let Err(e) = vm.resize_zone(id, desired_ram) { 1829 error!("Error when resizing VM: {:?}", e); 1830 Err(e) 1831 } else { 1832 Ok(()) 1833 } 1834 } else { 1835 // Update VmConfig by setting the new desired ram. 1836 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1837 1838 if let Some(zones) = &mut memory_config.zones { 1839 for zone in zones.iter_mut() { 1840 if zone.id == id { 1841 zone.size = desired_ram; 1842 return Ok(()); 1843 } 1844 } 1845 } 1846 1847 error!("Could not find the memory zone {} for the resize", id); 1848 Err(VmError::ResizeZone) 1849 } 1850 } 1851 1852 fn vm_add_device( 1853 &mut self, 1854 device_cfg: DeviceConfig, 1855 ) -> result::Result<Option<Vec<u8>>, VmError> { 1856 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1857 1858 { 1859 // Validate the configuration change in a cloned configuration 1860 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1861 add_to_config(&mut config.devices, device_cfg.clone()); 1862 config.validate().map_err(VmError::ConfigValidation)?; 1863 } 1864 1865 if let Some(ref mut vm) = self.vm { 1866 let info = vm.add_device(device_cfg).map_err(|e| { 1867 error!("Error when adding new device to the VM: {:?}", e); 1868 e 1869 })?; 1870 serde_json::to_vec(&info) 1871 .map(Some) 1872 .map_err(VmError::SerializeJson) 1873 } else { 1874 // Update VmConfig by adding the new device. 1875 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1876 add_to_config(&mut config.devices, device_cfg); 1877 Ok(None) 1878 } 1879 } 1880 1881 fn vm_add_user_device( 1882 &mut self, 1883 device_cfg: UserDeviceConfig, 1884 ) -> result::Result<Option<Vec<u8>>, VmError> { 1885 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1886 1887 { 1888 // Validate the configuration change in a cloned configuration 1889 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1890 add_to_config(&mut config.user_devices, device_cfg.clone()); 1891 config.validate().map_err(VmError::ConfigValidation)?; 1892 } 1893 1894 if let Some(ref mut vm) = self.vm { 1895 let info = vm.add_user_device(device_cfg).map_err(|e| { 1896 error!("Error when adding new user device to the VM: {:?}", e); 1897 e 1898 })?; 1899 serde_json::to_vec(&info) 1900 .map(Some) 1901 .map_err(VmError::SerializeJson) 1902 } else { 1903 // Update VmConfig by adding the new device. 1904 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1905 add_to_config(&mut config.user_devices, device_cfg); 1906 Ok(None) 1907 } 1908 } 1909 1910 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1911 if let Some(ref mut vm) = self.vm { 1912 if let Err(e) = vm.remove_device(id) { 1913 error!("Error when removing device from the VM: {:?}", e); 1914 Err(e) 1915 } else { 1916 Ok(()) 1917 } 1918 } else if let Some(ref config) = self.vm_config { 1919 let mut config = config.lock().unwrap(); 1920 if config.remove_device(&id) { 1921 Ok(()) 1922 } else { 1923 Err(VmError::NoDeviceToRemove(id)) 1924 } 1925 } else { 1926 Err(VmError::VmNotCreated) 1927 } 1928 } 1929 1930 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1931 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1932 1933 { 1934 // Validate the configuration change in a cloned configuration 1935 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1936 add_to_config(&mut config.disks, disk_cfg.clone()); 1937 config.validate().map_err(VmError::ConfigValidation)?; 1938 } 1939 1940 if let Some(ref mut vm) = self.vm { 1941 let info = vm.add_disk(disk_cfg).map_err(|e| { 1942 error!("Error when adding new disk to the VM: {:?}", e); 1943 e 1944 })?; 1945 serde_json::to_vec(&info) 1946 .map(Some) 1947 .map_err(VmError::SerializeJson) 1948 } else { 1949 // Update VmConfig by adding the new device. 1950 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1951 add_to_config(&mut config.disks, disk_cfg); 1952 Ok(None) 1953 } 1954 } 1955 1956 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1957 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1958 1959 { 1960 // Validate the configuration change in a cloned configuration 1961 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1962 add_to_config(&mut config.fs, fs_cfg.clone()); 1963 config.validate().map_err(VmError::ConfigValidation)?; 1964 } 1965 1966 if let Some(ref mut vm) = self.vm { 1967 let info = vm.add_fs(fs_cfg).map_err(|e| { 1968 error!("Error when adding new fs to the VM: {:?}", e); 1969 e 1970 })?; 1971 serde_json::to_vec(&info) 1972 .map(Some) 1973 .map_err(VmError::SerializeJson) 1974 } else { 1975 // Update VmConfig by adding the new device. 1976 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1977 add_to_config(&mut config.fs, fs_cfg); 1978 Ok(None) 1979 } 1980 } 1981 1982 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1983 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1984 1985 { 1986 // Validate the configuration change in a cloned configuration 1987 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1988 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1989 config.validate().map_err(VmError::ConfigValidation)?; 1990 } 1991 1992 if let Some(ref mut vm) = self.vm { 1993 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1994 error!("Error when adding new pmem device to the VM: {:?}", e); 1995 e 1996 })?; 1997 serde_json::to_vec(&info) 1998 .map(Some) 1999 .map_err(VmError::SerializeJson) 2000 } else { 2001 // Update VmConfig by adding the new device. 2002 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2003 add_to_config(&mut config.pmem, pmem_cfg); 2004 Ok(None) 2005 } 2006 } 2007 2008 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2009 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2010 2011 { 2012 // Validate the configuration change in a cloned configuration 2013 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2014 add_to_config(&mut config.net, net_cfg.clone()); 2015 config.validate().map_err(VmError::ConfigValidation)?; 2016 } 2017 2018 if let Some(ref mut vm) = self.vm { 2019 let info = vm.add_net(net_cfg).map_err(|e| { 2020 error!("Error when adding new network device to the VM: {:?}", e); 2021 e 2022 })?; 2023 serde_json::to_vec(&info) 2024 .map(Some) 2025 .map_err(VmError::SerializeJson) 2026 } else { 2027 // Update VmConfig by adding the new device. 2028 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2029 add_to_config(&mut config.net, net_cfg); 2030 Ok(None) 2031 } 2032 } 2033 2034 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2035 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2036 2037 { 2038 // Validate the configuration change in a cloned configuration 2039 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2040 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 2041 config.validate().map_err(VmError::ConfigValidation)?; 2042 } 2043 2044 if let Some(ref mut vm) = self.vm { 2045 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 2046 error!("Error when adding new vDPA device to the VM: {:?}", e); 2047 e 2048 })?; 2049 serde_json::to_vec(&info) 2050 .map(Some) 2051 .map_err(VmError::SerializeJson) 2052 } else { 2053 // Update VmConfig by adding the new device. 2054 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2055 add_to_config(&mut config.vdpa, vdpa_cfg); 2056 Ok(None) 2057 } 2058 } 2059 2060 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2061 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2062 2063 { 2064 // Validate the configuration change in a cloned configuration 2065 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2066 2067 if config.vsock.is_some() { 2068 return Err(VmError::TooManyVsockDevices); 2069 } 2070 2071 config.vsock = Some(vsock_cfg.clone()); 2072 config.validate().map_err(VmError::ConfigValidation)?; 2073 } 2074 2075 if let Some(ref mut vm) = self.vm { 2076 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 2077 error!("Error when adding new vsock device to the VM: {:?}", e); 2078 e 2079 })?; 2080 serde_json::to_vec(&info) 2081 .map(Some) 2082 .map_err(VmError::SerializeJson) 2083 } else { 2084 // Update VmConfig by adding the new device. 2085 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2086 config.vsock = Some(vsock_cfg); 2087 Ok(None) 2088 } 2089 } 2090 2091 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 2092 if let Some(ref mut vm) = self.vm { 2093 let info = vm.counters().map_err(|e| { 2094 error!("Error when getting counters from the VM: {:?}", e); 2095 e 2096 })?; 2097 serde_json::to_vec(&info) 2098 .map(Some) 2099 .map_err(VmError::SerializeJson) 2100 } else { 2101 Err(VmError::VmNotRunning) 2102 } 2103 } 2104 2105 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 2106 if let Some(ref mut vm) = self.vm { 2107 vm.power_button() 2108 } else { 2109 Err(VmError::VmNotRunning) 2110 } 2111 } 2112 2113 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 2114 if let Some(ref mut vm) = self.vm { 2115 vm.nmi() 2116 } else { 2117 Err(VmError::VmNotRunning) 2118 } 2119 } 2120 2121 fn vm_receive_migration( 2122 &mut self, 2123 receive_data_migration: VmReceiveMigrationData, 2124 ) -> result::Result<(), MigratableError> { 2125 info!( 2126 "Receiving migration: receiver_url = {}", 2127 receive_data_migration.receiver_url 2128 ); 2129 2130 // Accept the connection and get the socket 2131 let mut socket = Vmm::receive_migration_socket(&receive_data_migration.receiver_url)?; 2132 2133 let mut started = false; 2134 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 2135 let mut existing_memory_files = None; 2136 loop { 2137 let req = Request::read_from(&mut socket)?; 2138 match req.command() { 2139 Command::Invalid => info!("Invalid Command Received"), 2140 Command::Start => { 2141 info!("Start Command Received"); 2142 started = true; 2143 2144 Response::ok().write_to(&mut socket)?; 2145 } 2146 Command::Config => { 2147 info!("Config Command Received"); 2148 2149 if !started { 2150 warn!("Migration not started yet"); 2151 Response::error().write_to(&mut socket)?; 2152 continue; 2153 } 2154 memory_manager = Some(self.vm_receive_config( 2155 &req, 2156 &mut socket, 2157 existing_memory_files.take(), 2158 )?); 2159 } 2160 Command::State => { 2161 info!("State Command Received"); 2162 2163 if !started { 2164 warn!("Migration not started yet"); 2165 Response::error().write_to(&mut socket)?; 2166 continue; 2167 } 2168 if let Some(mm) = memory_manager.take() { 2169 self.vm_receive_state(&req, &mut socket, mm)?; 2170 } else { 2171 warn!("Configuration not sent yet"); 2172 Response::error().write_to(&mut socket)?; 2173 } 2174 } 2175 Command::Memory => { 2176 info!("Memory Command Received"); 2177 2178 if !started { 2179 warn!("Migration not started yet"); 2180 Response::error().write_to(&mut socket)?; 2181 continue; 2182 } 2183 if let Some(mm) = memory_manager.as_ref() { 2184 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2185 } else { 2186 warn!("Configuration not sent yet"); 2187 Response::error().write_to(&mut socket)?; 2188 } 2189 } 2190 Command::MemoryFd => { 2191 info!("MemoryFd Command Received"); 2192 2193 if !started { 2194 warn!("Migration not started yet"); 2195 Response::error().write_to(&mut socket)?; 2196 continue; 2197 } 2198 2199 match &mut socket { 2200 SocketStream::Unix(unix_socket) => { 2201 let mut buf = [0u8; 4]; 2202 let (_, file) = unix_socket.recv_with_fd(&mut buf).map_err(|e| { 2203 MigratableError::MigrateReceive(anyhow!( 2204 "Error receiving slot from socket: {}", 2205 e 2206 )) 2207 })?; 2208 2209 if existing_memory_files.is_none() { 2210 existing_memory_files = Some(HashMap::default()) 2211 } 2212 2213 if let Some(ref mut existing_memory_files) = existing_memory_files { 2214 let slot = u32::from_le_bytes(buf); 2215 existing_memory_files.insert(slot, file.unwrap()); 2216 } 2217 2218 Response::ok().write_to(&mut socket)?; 2219 } 2220 SocketStream::Tcp(_tcp_socket) => { 2221 // For TCP sockets, we cannot transfer file descriptors 2222 warn!( 2223 "MemoryFd command received over TCP socket, which is not supported" 2224 ); 2225 Response::error().write_to(&mut socket)?; 2226 } 2227 } 2228 } 2229 Command::Complete => { 2230 info!("Complete Command Received"); 2231 if let Some(ref mut vm) = self.vm.as_mut() { 2232 vm.resume()?; 2233 Response::ok().write_to(&mut socket)?; 2234 } else { 2235 warn!("VM not created yet"); 2236 Response::error().write_to(&mut socket)?; 2237 } 2238 break; 2239 } 2240 Command::Abandon => { 2241 info!("Abandon Command Received"); 2242 self.vm = None; 2243 self.vm_config = None; 2244 Response::ok().write_to(&mut socket).ok(); 2245 break; 2246 } 2247 } 2248 } 2249 2250 Ok(()) 2251 } 2252 2253 fn vm_send_migration( 2254 &mut self, 2255 send_data_migration: VmSendMigrationData, 2256 ) -> result::Result<(), MigratableError> { 2257 info!( 2258 "Sending migration: destination_url = {}, local = {}", 2259 send_data_migration.destination_url, send_data_migration.local 2260 ); 2261 2262 if !self 2263 .vm_config 2264 .as_ref() 2265 .unwrap() 2266 .lock() 2267 .unwrap() 2268 .backed_by_shared_memory() 2269 && send_data_migration.local 2270 { 2271 return Err(MigratableError::MigrateSend(anyhow!( 2272 "Local migration requires shared memory or hugepages enabled" 2273 ))); 2274 } 2275 2276 if let Some(vm) = self.vm.as_mut() { 2277 Self::send_migration( 2278 vm, 2279 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2280 self.hypervisor.clone(), 2281 send_data_migration, 2282 ) 2283 .map_err(|migration_err| { 2284 error!("Migration failed: {:?}", migration_err); 2285 2286 // Stop logging dirty pages 2287 if let Err(e) = vm.stop_dirty_log() { 2288 return e; 2289 } 2290 2291 if vm.get_state().unwrap() == VmState::Paused { 2292 if let Err(e) = vm.resume() { 2293 return e; 2294 } 2295 } 2296 2297 migration_err 2298 })?; 2299 2300 // Shutdown the VM after the migration succeeded 2301 self.exit_evt.write(1).map_err(|e| { 2302 MigratableError::MigrateSend(anyhow!( 2303 "Failed shutting down the VM after migration: {:?}", 2304 e 2305 )) 2306 }) 2307 } else { 2308 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2309 } 2310 } 2311 } 2312 2313 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2314 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2315 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2316 2317 #[cfg(test)] 2318 mod unit_tests { 2319 use super::*; 2320 #[cfg(target_arch = "x86_64")] 2321 use crate::vm_config::DebugConsoleConfig; 2322 use crate::vm_config::{ 2323 ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig, 2324 PayloadConfig, RngConfig, 2325 }; 2326 2327 fn create_dummy_vmm() -> Vmm { 2328 Vmm::new( 2329 VmmVersionInfo::new("dummy", "dummy"), 2330 EventFd::new(EFD_NONBLOCK).unwrap(), 2331 #[cfg(feature = "guest_debug")] 2332 EventFd::new(EFD_NONBLOCK).unwrap(), 2333 #[cfg(feature = "guest_debug")] 2334 EventFd::new(EFD_NONBLOCK).unwrap(), 2335 SeccompAction::Allow, 2336 hypervisor::new().unwrap(), 2337 EventFd::new(EFD_NONBLOCK).unwrap(), 2338 ) 2339 .unwrap() 2340 } 2341 2342 fn create_dummy_vm_config() -> Box<VmConfig> { 2343 Box::new(VmConfig { 2344 cpus: CpusConfig { 2345 boot_vcpus: 1, 2346 max_vcpus: 1, 2347 topology: None, 2348 kvm_hyperv: false, 2349 max_phys_bits: 46, 2350 affinity: None, 2351 features: CpuFeatures::default(), 2352 }, 2353 memory: MemoryConfig { 2354 size: 536_870_912, 2355 mergeable: false, 2356 hotplug_method: HotplugMethod::Acpi, 2357 hotplug_size: None, 2358 hotplugged_size: None, 2359 shared: true, 2360 hugepages: false, 2361 hugepage_size: None, 2362 prefault: false, 2363 zones: None, 2364 thp: true, 2365 }, 2366 payload: Some(PayloadConfig { 2367 kernel: Some(PathBuf::from("/path/to/kernel")), 2368 firmware: None, 2369 cmdline: None, 2370 initramfs: None, 2371 #[cfg(feature = "igvm")] 2372 igvm: None, 2373 #[cfg(feature = "sev_snp")] 2374 host_data: None, 2375 }), 2376 rate_limit_groups: None, 2377 disks: None, 2378 net: None, 2379 rng: RngConfig { 2380 src: PathBuf::from("/dev/urandom"), 2381 iommu: false, 2382 }, 2383 balloon: None, 2384 fs: None, 2385 pmem: None, 2386 serial: ConsoleConfig { 2387 file: None, 2388 mode: ConsoleOutputMode::Null, 2389 iommu: false, 2390 socket: None, 2391 }, 2392 console: ConsoleConfig { 2393 file: None, 2394 mode: ConsoleOutputMode::Tty, 2395 iommu: false, 2396 socket: None, 2397 }, 2398 #[cfg(target_arch = "x86_64")] 2399 debug_console: DebugConsoleConfig::default(), 2400 devices: None, 2401 user_devices: None, 2402 vdpa: None, 2403 vsock: None, 2404 #[cfg(feature = "pvmemcontrol")] 2405 pvmemcontrol: None, 2406 pvpanic: false, 2407 iommu: false, 2408 #[cfg(target_arch = "x86_64")] 2409 sgx_epc: None, 2410 numa: None, 2411 watchdog: false, 2412 #[cfg(feature = "guest_debug")] 2413 gdb: false, 2414 pci_segments: None, 2415 platform: None, 2416 tpm: None, 2417 preserved_fds: None, 2418 landlock_enable: false, 2419 landlock_rules: None, 2420 }) 2421 } 2422 2423 #[test] 2424 fn test_vmm_vm_create() { 2425 let mut vmm = create_dummy_vmm(); 2426 let config = create_dummy_vm_config(); 2427 2428 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2429 assert!(matches!( 2430 vmm.vm_create(config), 2431 Err(VmError::VmAlreadyCreated) 2432 )); 2433 } 2434 2435 #[test] 2436 fn test_vmm_vm_cold_add_device() { 2437 let mut vmm = create_dummy_vmm(); 2438 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2439 2440 assert!(matches!( 2441 vmm.vm_add_device(device_config.clone()), 2442 Err(VmError::VmNotCreated) 2443 )); 2444 2445 let _ = vmm.vm_create(create_dummy_vm_config()); 2446 assert!(vmm 2447 .vm_config 2448 .as_ref() 2449 .unwrap() 2450 .lock() 2451 .unwrap() 2452 .devices 2453 .is_none()); 2454 2455 assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); 2456 assert_eq!( 2457 vmm.vm_config 2458 .as_ref() 2459 .unwrap() 2460 .lock() 2461 .unwrap() 2462 .devices 2463 .clone() 2464 .unwrap() 2465 .len(), 2466 1 2467 ); 2468 assert_eq!( 2469 vmm.vm_config 2470 .as_ref() 2471 .unwrap() 2472 .lock() 2473 .unwrap() 2474 .devices 2475 .clone() 2476 .unwrap()[0], 2477 device_config 2478 ); 2479 } 2480 2481 #[test] 2482 fn test_vmm_vm_cold_add_user_device() { 2483 let mut vmm = create_dummy_vmm(); 2484 let user_device_config = 2485 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2486 2487 assert!(matches!( 2488 vmm.vm_add_user_device(user_device_config.clone()), 2489 Err(VmError::VmNotCreated) 2490 )); 2491 2492 let _ = vmm.vm_create(create_dummy_vm_config()); 2493 assert!(vmm 2494 .vm_config 2495 .as_ref() 2496 .unwrap() 2497 .lock() 2498 .unwrap() 2499 .user_devices 2500 .is_none()); 2501 2502 assert!(vmm 2503 .vm_add_user_device(user_device_config.clone()) 2504 .unwrap() 2505 .is_none()); 2506 assert_eq!( 2507 vmm.vm_config 2508 .as_ref() 2509 .unwrap() 2510 .lock() 2511 .unwrap() 2512 .user_devices 2513 .clone() 2514 .unwrap() 2515 .len(), 2516 1 2517 ); 2518 assert_eq!( 2519 vmm.vm_config 2520 .as_ref() 2521 .unwrap() 2522 .lock() 2523 .unwrap() 2524 .user_devices 2525 .clone() 2526 .unwrap()[0], 2527 user_device_config 2528 ); 2529 } 2530 2531 #[test] 2532 fn test_vmm_vm_cold_add_disk() { 2533 let mut vmm = create_dummy_vmm(); 2534 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2535 2536 assert!(matches!( 2537 vmm.vm_add_disk(disk_config.clone()), 2538 Err(VmError::VmNotCreated) 2539 )); 2540 2541 let _ = vmm.vm_create(create_dummy_vm_config()); 2542 assert!(vmm 2543 .vm_config 2544 .as_ref() 2545 .unwrap() 2546 .lock() 2547 .unwrap() 2548 .disks 2549 .is_none()); 2550 2551 assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); 2552 assert_eq!( 2553 vmm.vm_config 2554 .as_ref() 2555 .unwrap() 2556 .lock() 2557 .unwrap() 2558 .disks 2559 .clone() 2560 .unwrap() 2561 .len(), 2562 1 2563 ); 2564 assert_eq!( 2565 vmm.vm_config 2566 .as_ref() 2567 .unwrap() 2568 .lock() 2569 .unwrap() 2570 .disks 2571 .clone() 2572 .unwrap()[0], 2573 disk_config 2574 ); 2575 } 2576 2577 #[test] 2578 fn test_vmm_vm_cold_add_fs() { 2579 let mut vmm = create_dummy_vmm(); 2580 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2581 2582 assert!(matches!( 2583 vmm.vm_add_fs(fs_config.clone()), 2584 Err(VmError::VmNotCreated) 2585 )); 2586 2587 let _ = vmm.vm_create(create_dummy_vm_config()); 2588 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2589 2590 assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none()); 2591 assert_eq!( 2592 vmm.vm_config 2593 .as_ref() 2594 .unwrap() 2595 .lock() 2596 .unwrap() 2597 .fs 2598 .clone() 2599 .unwrap() 2600 .len(), 2601 1 2602 ); 2603 assert_eq!( 2604 vmm.vm_config 2605 .as_ref() 2606 .unwrap() 2607 .lock() 2608 .unwrap() 2609 .fs 2610 .clone() 2611 .unwrap()[0], 2612 fs_config 2613 ); 2614 } 2615 2616 #[test] 2617 fn test_vmm_vm_cold_add_pmem() { 2618 let mut vmm = create_dummy_vmm(); 2619 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2620 2621 assert!(matches!( 2622 vmm.vm_add_pmem(pmem_config.clone()), 2623 Err(VmError::VmNotCreated) 2624 )); 2625 2626 let _ = vmm.vm_create(create_dummy_vm_config()); 2627 assert!(vmm 2628 .vm_config 2629 .as_ref() 2630 .unwrap() 2631 .lock() 2632 .unwrap() 2633 .pmem 2634 .is_none()); 2635 2636 assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); 2637 assert_eq!( 2638 vmm.vm_config 2639 .as_ref() 2640 .unwrap() 2641 .lock() 2642 .unwrap() 2643 .pmem 2644 .clone() 2645 .unwrap() 2646 .len(), 2647 1 2648 ); 2649 assert_eq!( 2650 vmm.vm_config 2651 .as_ref() 2652 .unwrap() 2653 .lock() 2654 .unwrap() 2655 .pmem 2656 .clone() 2657 .unwrap()[0], 2658 pmem_config 2659 ); 2660 } 2661 2662 #[test] 2663 fn test_vmm_vm_cold_add_net() { 2664 let mut vmm = create_dummy_vmm(); 2665 let net_config = NetConfig::parse( 2666 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2667 ) 2668 .unwrap(); 2669 2670 assert!(matches!( 2671 vmm.vm_add_net(net_config.clone()), 2672 Err(VmError::VmNotCreated) 2673 )); 2674 2675 let _ = vmm.vm_create(create_dummy_vm_config()); 2676 assert!(vmm 2677 .vm_config 2678 .as_ref() 2679 .unwrap() 2680 .lock() 2681 .unwrap() 2682 .net 2683 .is_none()); 2684 2685 assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); 2686 assert_eq!( 2687 vmm.vm_config 2688 .as_ref() 2689 .unwrap() 2690 .lock() 2691 .unwrap() 2692 .net 2693 .clone() 2694 .unwrap() 2695 .len(), 2696 1 2697 ); 2698 assert_eq!( 2699 vmm.vm_config 2700 .as_ref() 2701 .unwrap() 2702 .lock() 2703 .unwrap() 2704 .net 2705 .clone() 2706 .unwrap()[0], 2707 net_config 2708 ); 2709 } 2710 2711 #[test] 2712 fn test_vmm_vm_cold_add_vdpa() { 2713 let mut vmm = create_dummy_vmm(); 2714 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2715 2716 assert!(matches!( 2717 vmm.vm_add_vdpa(vdpa_config.clone()), 2718 Err(VmError::VmNotCreated) 2719 )); 2720 2721 let _ = vmm.vm_create(create_dummy_vm_config()); 2722 assert!(vmm 2723 .vm_config 2724 .as_ref() 2725 .unwrap() 2726 .lock() 2727 .unwrap() 2728 .vdpa 2729 .is_none()); 2730 2731 assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); 2732 assert_eq!( 2733 vmm.vm_config 2734 .as_ref() 2735 .unwrap() 2736 .lock() 2737 .unwrap() 2738 .vdpa 2739 .clone() 2740 .unwrap() 2741 .len(), 2742 1 2743 ); 2744 assert_eq!( 2745 vmm.vm_config 2746 .as_ref() 2747 .unwrap() 2748 .lock() 2749 .unwrap() 2750 .vdpa 2751 .clone() 2752 .unwrap()[0], 2753 vdpa_config 2754 ); 2755 } 2756 2757 #[test] 2758 fn test_vmm_vm_cold_add_vsock() { 2759 let mut vmm = create_dummy_vmm(); 2760 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2761 2762 assert!(matches!( 2763 vmm.vm_add_vsock(vsock_config.clone()), 2764 Err(VmError::VmNotCreated) 2765 )); 2766 2767 let _ = vmm.vm_create(create_dummy_vm_config()); 2768 assert!(vmm 2769 .vm_config 2770 .as_ref() 2771 .unwrap() 2772 .lock() 2773 .unwrap() 2774 .vsock 2775 .is_none()); 2776 2777 assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); 2778 assert_eq!( 2779 vmm.vm_config 2780 .as_ref() 2781 .unwrap() 2782 .lock() 2783 .unwrap() 2784 .vsock 2785 .clone() 2786 .unwrap(), 2787 vsock_config 2788 ); 2789 } 2790 } 2791