1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::net::{TcpListener, TcpStream}; 15 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 16 use std::os::unix::net::{UnixListener, UnixStream}; 17 use std::panic::AssertUnwindSafe; 18 use std::path::PathBuf; 19 use std::rc::Rc; 20 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 21 use std::sync::{Arc, Mutex}; 22 #[cfg(not(target_arch = "riscv64"))] 23 use std::time::Instant; 24 use std::{io, result, thread}; 25 26 use anyhow::anyhow; 27 #[cfg(feature = "dbus_api")] 28 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 29 use api::http::HttpApiHandle; 30 use console_devices::{pre_create_console_devices, ConsoleInfo}; 31 use landlock::LandlockError; 32 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 33 use memory_manager::MemoryManagerSnapshotData; 34 use pci::PciBdf; 35 use seccompiler::{apply_filter, SeccompAction}; 36 use serde::ser::{SerializeStruct, Serializer}; 37 use serde::{Deserialize, Serialize}; 38 use signal_hook::iterator::{Handle, Signals}; 39 use thiserror::Error; 40 use tracer::trace_scoped; 41 use vm_memory::bitmap::{AtomicBitmap, BitmapSlice}; 42 use vm_memory::{ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile}; 43 use vm_migration::protocol::*; 44 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 45 use vmm_sys_util::eventfd::EventFd; 46 use vmm_sys_util::signal::unblock_signal; 47 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 48 49 use crate::api::{ 50 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 51 VmSendMigrationData, VmmPingResponse, 52 }; 53 use crate::config::{add_to_config, RestoreConfig}; 54 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 55 use crate::coredump::GuestDebuggable; 56 use crate::landlock::Landlock; 57 use crate::memory_manager::MemoryManager; 58 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 59 use crate::migration::get_vm_snapshot; 60 use crate::migration::{recv_vm_config, recv_vm_state}; 61 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 62 use crate::vm::{Error as VmError, Vm, VmState}; 63 use crate::vm_config::{ 64 DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, 65 VmConfig, VsockConfig, 66 }; 67 68 #[cfg(not(target_arch = "riscv64"))] 69 mod acpi; 70 pub mod api; 71 mod clone3; 72 pub mod config; 73 pub mod console_devices; 74 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 75 mod coredump; 76 pub mod cpu; 77 pub mod device_manager; 78 pub mod device_tree; 79 #[cfg(feature = "guest_debug")] 80 mod gdb; 81 #[cfg(feature = "igvm")] 82 mod igvm; 83 pub mod interrupt; 84 pub mod landlock; 85 pub mod memory_manager; 86 pub mod migration; 87 mod pci_segment; 88 pub mod seccomp_filters; 89 mod serial_manager; 90 mod sigwinch_listener; 91 pub mod vm; 92 pub mod vm_config; 93 94 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 95 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 96 97 /// Errors associated with VMM management 98 #[derive(Debug, Error)] 99 pub enum Error { 100 /// API request receive error 101 #[error("Error receiving API request: {0}")] 102 ApiRequestRecv(#[source] RecvError), 103 104 /// API response send error 105 #[error("Error sending API request: {0}")] 106 ApiResponseSend(#[source] SendError<ApiResponse>), 107 108 /// Cannot bind to the UNIX domain socket path 109 #[error("Error binding to UNIX domain socket: {0}")] 110 Bind(#[source] io::Error), 111 112 /// Cannot clone EventFd. 113 #[error("Error cloning EventFd: {0}")] 114 EventFdClone(#[source] io::Error), 115 116 /// Cannot create EventFd. 117 #[error("Error creating EventFd: {0}")] 118 EventFdCreate(#[source] io::Error), 119 120 /// Cannot read from EventFd. 121 #[error("Error reading from EventFd: {0}")] 122 EventFdRead(#[source] io::Error), 123 124 /// Cannot create epoll context. 125 #[error("Error creating epoll context: {0}")] 126 Epoll(#[source] io::Error), 127 128 /// Cannot create HTTP thread 129 #[error("Error spawning HTTP thread: {0}")] 130 HttpThreadSpawn(#[source] io::Error), 131 132 /// Cannot create D-Bus thread 133 #[cfg(feature = "dbus_api")] 134 #[error("Error spawning D-Bus thread: {0}")] 135 DBusThreadSpawn(#[source] io::Error), 136 137 /// Cannot start D-Bus session 138 #[cfg(feature = "dbus_api")] 139 #[error("Error starting D-Bus session: {0}")] 140 CreateDBusSession(#[source] zbus::Error), 141 142 /// Cannot create `event-monitor` thread 143 #[error("Error spawning `event-monitor` thread: {0}")] 144 EventMonitorThreadSpawn(#[source] io::Error), 145 146 /// Cannot handle the VM STDIN stream 147 #[error("Error handling VM stdin: {0:?}")] 148 Stdin(VmError), 149 150 /// Cannot handle the VM pty stream 151 #[error("Error handling VM pty: {0:?}")] 152 Pty(VmError), 153 154 /// Cannot reboot the VM 155 #[error("Error rebooting VM: {0:?}")] 156 VmReboot(VmError), 157 158 /// Cannot create VMM thread 159 #[error("Error spawning VMM thread {0:?}")] 160 VmmThreadSpawn(#[source] io::Error), 161 162 /// Cannot shut the VMM down 163 #[error("Error shutting down VMM: {0:?}")] 164 VmmShutdown(VmError), 165 166 /// Cannot create seccomp filter 167 #[error("Error creating seccomp filter: {0}")] 168 CreateSeccompFilter(seccompiler::Error), 169 170 /// Cannot apply seccomp filter 171 #[error("Error applying seccomp filter: {0}")] 172 ApplySeccompFilter(seccompiler::Error), 173 174 /// Error activating virtio devices 175 #[error("Error activating virtio devices: {0:?}")] 176 ActivateVirtioDevices(VmError), 177 178 /// Error creating API server 179 #[error("Error creating API server {0:?}")] 180 CreateApiServer(micro_http::ServerError), 181 182 /// Error binding API server socket 183 #[error("Error creation API server's socket {0:?}")] 184 CreateApiServerSocket(#[source] io::Error), 185 186 #[cfg(feature = "guest_debug")] 187 #[error("Failed to start the GDB thread: {0}")] 188 GdbThreadSpawn(io::Error), 189 190 /// GDB request receive error 191 #[cfg(feature = "guest_debug")] 192 #[error("Error receiving GDB request: {0}")] 193 GdbRequestRecv(#[source] RecvError), 194 195 /// GDB response send error 196 #[cfg(feature = "guest_debug")] 197 #[error("Error sending GDB request: {0}")] 198 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 199 200 #[error("Cannot spawn a signal handler thread: {0}")] 201 SignalHandlerSpawn(#[source] io::Error), 202 203 #[error("Failed to join on threads: {0:?}")] 204 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 205 206 /// Cannot create Landlock object 207 #[error("Error creating landlock object: {0}")] 208 CreateLandlock(LandlockError), 209 210 /// Cannot apply landlock based sandboxing 211 #[error("Error applying landlock: {0}")] 212 ApplyLandlock(LandlockError), 213 } 214 pub type Result<T> = result::Result<T, Error>; 215 216 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 217 #[repr(u64)] 218 pub enum EpollDispatch { 219 Exit = 0, 220 Reset = 1, 221 Api = 2, 222 ActivateVirtioDevices = 3, 223 Debug = 4, 224 Unknown, 225 } 226 227 impl From<u64> for EpollDispatch { 228 fn from(v: u64) -> Self { 229 use EpollDispatch::*; 230 match v { 231 0 => Exit, 232 1 => Reset, 233 2 => Api, 234 3 => ActivateVirtioDevices, 235 4 => Debug, 236 _ => Unknown, 237 } 238 } 239 } 240 241 enum SocketStream { 242 Unix(UnixStream), 243 Tcp(TcpStream), 244 } 245 246 impl Read for SocketStream { 247 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { 248 match self { 249 SocketStream::Unix(stream) => stream.read(buf), 250 SocketStream::Tcp(stream) => stream.read(buf), 251 } 252 } 253 } 254 255 impl Write for SocketStream { 256 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 257 match self { 258 SocketStream::Unix(stream) => stream.write(buf), 259 SocketStream::Tcp(stream) => stream.write(buf), 260 } 261 } 262 263 fn flush(&mut self) -> std::io::Result<()> { 264 match self { 265 SocketStream::Unix(stream) => stream.flush(), 266 SocketStream::Tcp(stream) => stream.flush(), 267 } 268 } 269 } 270 271 impl AsRawFd for SocketStream { 272 fn as_raw_fd(&self) -> RawFd { 273 match self { 274 SocketStream::Unix(s) => s.as_raw_fd(), 275 SocketStream::Tcp(s) => s.as_raw_fd(), 276 } 277 } 278 } 279 280 impl ReadVolatile for SocketStream { 281 fn read_volatile<B: BitmapSlice>( 282 &mut self, 283 buf: &mut VolatileSlice<B>, 284 ) -> std::result::Result<usize, VolatileMemoryError> { 285 match self { 286 SocketStream::Unix(s) => s.read_volatile(buf), 287 SocketStream::Tcp(s) => s.read_volatile(buf), 288 } 289 } 290 291 fn read_exact_volatile<B: BitmapSlice>( 292 &mut self, 293 buf: &mut VolatileSlice<B>, 294 ) -> std::result::Result<(), VolatileMemoryError> { 295 match self { 296 SocketStream::Unix(s) => s.read_exact_volatile(buf), 297 SocketStream::Tcp(s) => s.read_exact_volatile(buf), 298 } 299 } 300 } 301 302 impl WriteVolatile for SocketStream { 303 fn write_volatile<B: BitmapSlice>( 304 &mut self, 305 buf: &VolatileSlice<B>, 306 ) -> std::result::Result<usize, VolatileMemoryError> { 307 match self { 308 SocketStream::Unix(s) => s.write_volatile(buf), 309 SocketStream::Tcp(s) => s.write_volatile(buf), 310 } 311 } 312 313 fn write_all_volatile<B: BitmapSlice>( 314 &mut self, 315 buf: &VolatileSlice<B>, 316 ) -> std::result::Result<(), VolatileMemoryError> { 317 match self { 318 SocketStream::Unix(s) => s.write_all_volatile(buf), 319 SocketStream::Tcp(s) => s.write_all_volatile(buf), 320 } 321 } 322 } 323 324 pub struct EpollContext { 325 epoll_file: File, 326 } 327 328 impl EpollContext { 329 pub fn new() -> result::Result<EpollContext, io::Error> { 330 let epoll_fd = epoll::create(true)?; 331 // Use 'File' to enforce closing on 'epoll_fd' 332 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 333 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 334 335 Ok(EpollContext { epoll_file }) 336 } 337 338 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 339 where 340 T: AsRawFd, 341 { 342 let dispatch_index = token as u64; 343 epoll::ctl( 344 self.epoll_file.as_raw_fd(), 345 epoll::ControlOptions::EPOLL_CTL_ADD, 346 fd.as_raw_fd(), 347 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 348 )?; 349 350 Ok(()) 351 } 352 353 #[cfg(fuzzing)] 354 pub fn add_event_custom<T>( 355 &mut self, 356 fd: &T, 357 id: u64, 358 evts: epoll::Events, 359 ) -> result::Result<(), io::Error> 360 where 361 T: AsRawFd, 362 { 363 epoll::ctl( 364 self.epoll_file.as_raw_fd(), 365 epoll::ControlOptions::EPOLL_CTL_ADD, 366 fd.as_raw_fd(), 367 epoll::Event::new(evts, id), 368 )?; 369 370 Ok(()) 371 } 372 } 373 374 impl AsRawFd for EpollContext { 375 fn as_raw_fd(&self) -> RawFd { 376 self.epoll_file.as_raw_fd() 377 } 378 } 379 380 pub struct PciDeviceInfo { 381 pub id: String, 382 pub bdf: PciBdf, 383 } 384 385 impl Serialize for PciDeviceInfo { 386 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 387 where 388 S: Serializer, 389 { 390 let bdf_str = self.bdf.to_string(); 391 392 // Serialize the structure. 393 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 394 state.serialize_field("id", &self.id)?; 395 state.serialize_field("bdf", &bdf_str)?; 396 state.end() 397 } 398 } 399 400 pub fn feature_list() -> Vec<String> { 401 vec![ 402 #[cfg(feature = "dbus_api")] 403 "dbus_api".to_string(), 404 #[cfg(feature = "dhat-heap")] 405 "dhat-heap".to_string(), 406 #[cfg(feature = "guest_debug")] 407 "guest_debug".to_string(), 408 #[cfg(feature = "igvm")] 409 "igvm".to_string(), 410 #[cfg(feature = "io_uring")] 411 "io_uring".to_string(), 412 #[cfg(feature = "kvm")] 413 "kvm".to_string(), 414 #[cfg(feature = "mshv")] 415 "mshv".to_string(), 416 #[cfg(feature = "sev_snp")] 417 "sev_snp".to_string(), 418 #[cfg(feature = "tdx")] 419 "tdx".to_string(), 420 #[cfg(feature = "tracing")] 421 "tracing".to_string(), 422 ] 423 } 424 425 pub fn start_event_monitor_thread( 426 mut monitor: event_monitor::Monitor, 427 seccomp_action: &SeccompAction, 428 landlock_enable: bool, 429 hypervisor_type: hypervisor::HypervisorType, 430 exit_event: EventFd, 431 ) -> Result<thread::JoinHandle<Result<()>>> { 432 // Retrieve seccomp filter 433 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 434 .map_err(Error::CreateSeccompFilter)?; 435 436 thread::Builder::new() 437 .name("event-monitor".to_owned()) 438 .spawn(move || { 439 // Apply seccomp filter 440 if !seccomp_filter.is_empty() { 441 apply_filter(&seccomp_filter) 442 .map_err(Error::ApplySeccompFilter) 443 .map_err(|e| { 444 error!("Error applying seccomp filter: {:?}", e); 445 exit_event.write(1).ok(); 446 e 447 })?; 448 } 449 if landlock_enable { 450 Landlock::new() 451 .map_err(Error::CreateLandlock)? 452 .restrict_self() 453 .map_err(Error::ApplyLandlock) 454 .map_err(|e| { 455 error!("Error applying landlock to event monitor thread: {:?}", e); 456 exit_event.write(1).ok(); 457 e 458 })?; 459 } 460 461 std::panic::catch_unwind(AssertUnwindSafe(move || { 462 while let Ok(event) = monitor.rx.recv() { 463 let event = Arc::new(event); 464 465 if let Some(ref mut file) = monitor.file { 466 file.write_all(event.as_bytes().as_ref()).ok(); 467 file.write_all(b"\n\n").ok(); 468 } 469 470 for tx in monitor.broadcast.iter() { 471 tx.send(event.clone()).ok(); 472 } 473 } 474 })) 475 .map_err(|_| { 476 error!("`event-monitor` thread panicked"); 477 exit_event.write(1).ok(); 478 }) 479 .ok(); 480 481 Ok(()) 482 }) 483 .map_err(Error::EventMonitorThreadSpawn) 484 } 485 486 #[allow(unused_variables)] 487 #[allow(clippy::too_many_arguments)] 488 pub fn start_vmm_thread( 489 vmm_version: VmmVersionInfo, 490 http_path: &Option<String>, 491 http_fd: Option<RawFd>, 492 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 493 api_event: EventFd, 494 api_sender: Sender<ApiRequest>, 495 api_receiver: Receiver<ApiRequest>, 496 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 497 #[cfg(feature = "guest_debug")] debug_event: EventFd, 498 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 499 exit_event: EventFd, 500 seccomp_action: &SeccompAction, 501 hypervisor: Arc<dyn hypervisor::Hypervisor>, 502 landlock_enable: bool, 503 ) -> Result<VmmThreadHandle> { 504 #[cfg(feature = "guest_debug")] 505 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 506 #[cfg(feature = "guest_debug")] 507 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 508 #[cfg(feature = "guest_debug")] 509 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 510 #[cfg(feature = "guest_debug")] 511 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 512 513 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 514 let hypervisor_type = hypervisor.hypervisor_type(); 515 516 // Retrieve seccomp filter 517 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 518 .map_err(Error::CreateSeccompFilter)?; 519 520 let vmm_seccomp_action = seccomp_action.clone(); 521 let thread = { 522 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 523 thread::Builder::new() 524 .name("vmm".to_string()) 525 .spawn(move || { 526 // Apply seccomp filter for VMM thread. 527 if !vmm_seccomp_filter.is_empty() { 528 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 529 } 530 531 let mut vmm = Vmm::new( 532 vmm_version, 533 api_event, 534 #[cfg(feature = "guest_debug")] 535 debug_event, 536 #[cfg(feature = "guest_debug")] 537 vm_debug_event, 538 vmm_seccomp_action, 539 hypervisor, 540 exit_event, 541 )?; 542 543 vmm.setup_signal_handler(landlock_enable)?; 544 545 vmm.control_loop( 546 Rc::new(api_receiver), 547 #[cfg(feature = "guest_debug")] 548 Rc::new(gdb_receiver), 549 ) 550 }) 551 .map_err(Error::VmmThreadSpawn)? 552 }; 553 554 // The VMM thread is started, we can start the dbus thread 555 // and start serving HTTP requests 556 #[cfg(feature = "dbus_api")] 557 let dbus_shutdown_chs = match dbus_options { 558 Some(opts) => { 559 let (_, chs) = api::start_dbus_thread( 560 opts, 561 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 562 api_sender.clone(), 563 seccomp_action, 564 exit_event.try_clone().map_err(Error::EventFdClone)?, 565 hypervisor_type, 566 )?; 567 Some(chs) 568 } 569 None => None, 570 }; 571 572 let http_api_handle = if let Some(http_path) = http_path { 573 Some(api::start_http_path_thread( 574 http_path, 575 api_event_clone, 576 api_sender, 577 seccomp_action, 578 exit_event, 579 hypervisor_type, 580 landlock_enable, 581 )?) 582 } else if let Some(http_fd) = http_fd { 583 Some(api::start_http_fd_thread( 584 http_fd, 585 api_event_clone, 586 api_sender, 587 seccomp_action, 588 exit_event, 589 hypervisor_type, 590 landlock_enable, 591 )?) 592 } else { 593 None 594 }; 595 596 #[cfg(feature = "guest_debug")] 597 if let Some(debug_path) = debug_path { 598 let target = gdb::GdbStub::new( 599 gdb_sender, 600 gdb_debug_event, 601 gdb_vm_debug_event, 602 gdb_hw_breakpoints, 603 ); 604 thread::Builder::new() 605 .name("gdb".to_owned()) 606 .spawn(move || gdb::gdb_thread(target, &debug_path)) 607 .map_err(Error::GdbThreadSpawn)?; 608 } 609 610 Ok(VmmThreadHandle { 611 thread_handle: thread, 612 #[cfg(feature = "dbus_api")] 613 dbus_shutdown_chs, 614 http_api_handle, 615 }) 616 } 617 618 #[derive(Clone, Deserialize, Serialize)] 619 struct VmMigrationConfig { 620 vm_config: Arc<Mutex<VmConfig>>, 621 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 622 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 623 memory_manager_data: MemoryManagerSnapshotData, 624 } 625 626 #[derive(Debug, Clone)] 627 pub struct VmmVersionInfo { 628 pub build_version: String, 629 pub version: String, 630 } 631 632 impl VmmVersionInfo { 633 pub fn new(build_version: &str, version: &str) -> Self { 634 Self { 635 build_version: build_version.to_owned(), 636 version: version.to_owned(), 637 } 638 } 639 } 640 641 pub struct VmmThreadHandle { 642 pub thread_handle: thread::JoinHandle<Result<()>>, 643 #[cfg(feature = "dbus_api")] 644 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 645 pub http_api_handle: Option<HttpApiHandle>, 646 } 647 648 pub struct Vmm { 649 epoll: EpollContext, 650 exit_evt: EventFd, 651 reset_evt: EventFd, 652 api_evt: EventFd, 653 #[cfg(feature = "guest_debug")] 654 debug_evt: EventFd, 655 #[cfg(feature = "guest_debug")] 656 vm_debug_evt: EventFd, 657 version: VmmVersionInfo, 658 vm: Option<Vm>, 659 vm_config: Option<Arc<Mutex<VmConfig>>>, 660 seccomp_action: SeccompAction, 661 hypervisor: Arc<dyn hypervisor::Hypervisor>, 662 activate_evt: EventFd, 663 signals: Option<Handle>, 664 threads: Vec<thread::JoinHandle<()>>, 665 original_termios_opt: Arc<Mutex<Option<termios>>>, 666 console_resize_pipe: Option<Arc<File>>, 667 console_info: Option<ConsoleInfo>, 668 } 669 670 impl Vmm { 671 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 672 673 fn signal_handler( 674 mut signals: Signals, 675 original_termios_opt: Arc<Mutex<Option<termios>>>, 676 exit_evt: &EventFd, 677 ) { 678 for sig in &Self::HANDLED_SIGNALS { 679 unblock_signal(*sig).unwrap(); 680 } 681 682 for signal in signals.forever() { 683 match signal { 684 SIGTERM | SIGINT => { 685 if exit_evt.write(1).is_err() { 686 // Resetting the terminal is usually done as the VMM exits 687 if let Ok(lock) = original_termios_opt.lock() { 688 if let Some(termios) = *lock { 689 // SAFETY: FFI call 690 let _ = unsafe { 691 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 692 }; 693 } 694 } else { 695 warn!("Failed to lock original termios"); 696 } 697 698 std::process::exit(1); 699 } 700 } 701 _ => (), 702 } 703 } 704 } 705 706 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 707 let signals = Signals::new(Self::HANDLED_SIGNALS); 708 match signals { 709 Ok(signals) => { 710 self.signals = Some(signals.handle()); 711 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 712 let original_termios_opt = Arc::clone(&self.original_termios_opt); 713 714 let signal_handler_seccomp_filter = get_seccomp_filter( 715 &self.seccomp_action, 716 Thread::SignalHandler, 717 self.hypervisor.hypervisor_type(), 718 ) 719 .map_err(Error::CreateSeccompFilter)?; 720 self.threads.push( 721 thread::Builder::new() 722 .name("vmm_signal_handler".to_string()) 723 .spawn(move || { 724 if !signal_handler_seccomp_filter.is_empty() { 725 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 726 .map_err(Error::ApplySeccompFilter) 727 { 728 error!("Error applying seccomp filter: {:?}", e); 729 exit_evt.write(1).ok(); 730 return; 731 } 732 } 733 if landlock_enable{ 734 match Landlock::new() { 735 Ok(landlock) => { 736 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 737 error!("Error applying Landlock to signal handler thread: {:?}", e); 738 exit_evt.write(1).ok(); 739 }); 740 } 741 Err(e) => { 742 error!("Error creating Landlock object: {:?}", e); 743 exit_evt.write(1).ok(); 744 } 745 }; 746 } 747 748 std::panic::catch_unwind(AssertUnwindSafe(|| { 749 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 750 })) 751 .map_err(|_| { 752 error!("vmm signal_handler thread panicked"); 753 exit_evt.write(1).ok() 754 }) 755 .ok(); 756 }) 757 .map_err(Error::SignalHandlerSpawn)?, 758 ); 759 } 760 Err(e) => error!("Signal not found {}", e), 761 } 762 Ok(()) 763 } 764 765 #[allow(clippy::too_many_arguments)] 766 fn new( 767 vmm_version: VmmVersionInfo, 768 api_evt: EventFd, 769 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 770 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 771 seccomp_action: SeccompAction, 772 hypervisor: Arc<dyn hypervisor::Hypervisor>, 773 exit_evt: EventFd, 774 ) -> Result<Self> { 775 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 776 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 777 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 778 779 epoll 780 .add_event(&exit_evt, EpollDispatch::Exit) 781 .map_err(Error::Epoll)?; 782 783 epoll 784 .add_event(&reset_evt, EpollDispatch::Reset) 785 .map_err(Error::Epoll)?; 786 787 epoll 788 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 789 .map_err(Error::Epoll)?; 790 791 epoll 792 .add_event(&api_evt, EpollDispatch::Api) 793 .map_err(Error::Epoll)?; 794 795 #[cfg(feature = "guest_debug")] 796 epoll 797 .add_event(&debug_evt, EpollDispatch::Debug) 798 .map_err(Error::Epoll)?; 799 800 Ok(Vmm { 801 epoll, 802 exit_evt, 803 reset_evt, 804 api_evt, 805 #[cfg(feature = "guest_debug")] 806 debug_evt, 807 #[cfg(feature = "guest_debug")] 808 vm_debug_evt, 809 version: vmm_version, 810 vm: None, 811 vm_config: None, 812 seccomp_action, 813 hypervisor, 814 activate_evt, 815 signals: None, 816 threads: vec![], 817 original_termios_opt: Arc::new(Mutex::new(None)), 818 console_resize_pipe: None, 819 console_info: None, 820 }) 821 } 822 823 fn vm_receive_config<T>( 824 &mut self, 825 req: &Request, 826 socket: &mut T, 827 existing_memory_files: Option<HashMap<u32, File>>, 828 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 829 where 830 T: Read + Write, 831 { 832 // Read in config data along with memory manager data 833 let mut data: Vec<u8> = Vec::new(); 834 data.resize_with(req.length() as usize, Default::default); 835 socket 836 .read_exact(&mut data) 837 .map_err(MigratableError::MigrateSocket)?; 838 839 let vm_migration_config: VmMigrationConfig = 840 serde_json::from_slice(&data).map_err(|e| { 841 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 842 })?; 843 844 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 845 self.vm_check_cpuid_compatibility( 846 &vm_migration_config.vm_config, 847 &vm_migration_config.common_cpuid, 848 )?; 849 850 let config = vm_migration_config.vm_config.clone(); 851 self.vm_config = Some(vm_migration_config.vm_config); 852 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 853 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 854 })?); 855 856 if self 857 .vm_config 858 .as_ref() 859 .unwrap() 860 .lock() 861 .unwrap() 862 .landlock_enable 863 { 864 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 865 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 866 })?; 867 } 868 869 let vm = Vm::create_hypervisor_vm( 870 &self.hypervisor, 871 #[cfg(feature = "tdx")] 872 false, 873 #[cfg(feature = "sev_snp")] 874 false, 875 #[cfg(feature = "sev_snp")] 876 config.lock().unwrap().memory.total_size(), 877 ) 878 .map_err(|e| { 879 MigratableError::MigrateReceive(anyhow!( 880 "Error creating hypervisor VM from snapshot: {:?}", 881 e 882 )) 883 })?; 884 885 let phys_bits = 886 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 887 888 let memory_manager = MemoryManager::new( 889 vm, 890 &config.lock().unwrap().memory.clone(), 891 None, 892 phys_bits, 893 #[cfg(feature = "tdx")] 894 false, 895 Some(&vm_migration_config.memory_manager_data), 896 existing_memory_files, 897 #[cfg(target_arch = "x86_64")] 898 None, 899 ) 900 .map_err(|e| { 901 MigratableError::MigrateReceive(anyhow!( 902 "Error creating MemoryManager from snapshot: {:?}", 903 e 904 )) 905 })?; 906 907 Response::ok().write_to(socket)?; 908 909 Ok(memory_manager) 910 } 911 912 fn vm_receive_state<T>( 913 &mut self, 914 req: &Request, 915 socket: &mut T, 916 mm: Arc<Mutex<MemoryManager>>, 917 ) -> std::result::Result<(), MigratableError> 918 where 919 T: Read + Write, 920 { 921 // Read in state data 922 let mut data: Vec<u8> = Vec::new(); 923 data.resize_with(req.length() as usize, Default::default); 924 socket 925 .read_exact(&mut data) 926 .map_err(MigratableError::MigrateSocket)?; 927 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 928 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 929 })?; 930 931 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 932 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 933 })?; 934 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 935 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 936 })?; 937 #[cfg(feature = "guest_debug")] 938 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 939 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 940 })?; 941 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 942 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 943 })?; 944 945 #[cfg(not(target_arch = "riscv64"))] 946 let timestamp = Instant::now(); 947 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 948 let mut vm = Vm::new_from_memory_manager( 949 self.vm_config.clone().unwrap(), 950 mm, 951 hypervisor_vm, 952 exit_evt, 953 reset_evt, 954 #[cfg(feature = "guest_debug")] 955 debug_evt, 956 &self.seccomp_action, 957 self.hypervisor.clone(), 958 activate_evt, 959 #[cfg(not(target_arch = "riscv64"))] 960 timestamp, 961 self.console_info.clone(), 962 self.console_resize_pipe.clone(), 963 Arc::clone(&self.original_termios_opt), 964 Some(snapshot), 965 ) 966 .map_err(|e| { 967 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 968 })?; 969 970 // Create VM 971 vm.restore().map_err(|e| { 972 Response::error().write_to(socket).ok(); 973 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 974 })?; 975 self.vm = Some(vm); 976 977 Response::ok().write_to(socket)?; 978 979 Ok(()) 980 } 981 982 fn vm_receive_memory<T>( 983 &mut self, 984 req: &Request, 985 socket: &mut T, 986 memory_manager: &mut MemoryManager, 987 ) -> std::result::Result<(), MigratableError> 988 where 989 T: Read + ReadVolatile + Write, 990 { 991 // Read table 992 let table = MemoryRangeTable::read_from(socket, req.length())?; 993 994 // And then read the memory itself 995 memory_manager 996 .receive_memory_regions(&table, socket) 997 .inspect_err(|_| { 998 Response::error().write_to(socket).ok(); 999 })?; 1000 Response::ok().write_to(socket)?; 1001 Ok(()) 1002 } 1003 1004 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 1005 url.strip_prefix("unix:") 1006 .ok_or_else(|| { 1007 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 1008 }) 1009 .map(|s| s.into()) 1010 } 1011 1012 fn send_migration_socket( 1013 destination_url: &str, 1014 ) -> std::result::Result<SocketStream, MigratableError> { 1015 if let Some(address) = destination_url.strip_prefix("tcp:") { 1016 info!("Connecting to TCP socket at {}", address); 1017 1018 let socket = TcpStream::connect(address).map_err(|e| { 1019 MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) 1020 })?; 1021 1022 Ok(SocketStream::Tcp(socket)) 1023 } else { 1024 let path = Vmm::socket_url_to_path(destination_url)?; 1025 info!("Connecting to UNIX socket at {:?}", path); 1026 1027 let socket = UnixStream::connect(&path).map_err(|e| { 1028 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 1029 })?; 1030 1031 Ok(SocketStream::Unix(socket)) 1032 } 1033 } 1034 1035 fn receive_migration_socket( 1036 receiver_url: &str, 1037 ) -> std::result::Result<SocketStream, MigratableError> { 1038 if let Some(address) = receiver_url.strip_prefix("tcp:") { 1039 let listener = TcpListener::bind(address).map_err(|e| { 1040 MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) 1041 })?; 1042 1043 let (socket, _addr) = listener.accept().map_err(|e| { 1044 MigratableError::MigrateReceive(anyhow!( 1045 "Error accepting connection on TCP socket: {}", 1046 e 1047 )) 1048 })?; 1049 1050 Ok(SocketStream::Tcp(socket)) 1051 } else { 1052 let path = Vmm::socket_url_to_path(receiver_url)?; 1053 let listener = UnixListener::bind(&path).map_err(|e| { 1054 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1055 })?; 1056 1057 let (socket, _addr) = listener.accept().map_err(|e| { 1058 MigratableError::MigrateReceive(anyhow!( 1059 "Error accepting connection on UNIX socket: {}", 1060 e 1061 )) 1062 })?; 1063 1064 // Remove the UNIX socket file after accepting the connection 1065 std::fs::remove_file(&path).map_err(|e| { 1066 MigratableError::MigrateReceive(anyhow!("Error removing UNIX socket file: {}", e)) 1067 })?; 1068 1069 Ok(SocketStream::Unix(socket)) 1070 } 1071 } 1072 1073 // Returns true if there were dirty pages to send 1074 fn vm_maybe_send_dirty_pages( 1075 vm: &mut Vm, 1076 socket: &mut SocketStream, 1077 ) -> result::Result<bool, MigratableError> { 1078 // Send (dirty) memory table 1079 let table = vm.dirty_log()?; 1080 1081 // But if there are no regions go straight to pause 1082 if table.regions().is_empty() { 1083 return Ok(false); 1084 } 1085 1086 Request::memory(table.length()).write_to(socket).unwrap(); 1087 table.write_to(socket)?; 1088 // And then the memory itself 1089 vm.send_memory_regions(&table, socket)?; 1090 Response::read_from(socket)?.ok_or_abandon( 1091 socket, 1092 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1093 )?; 1094 1095 Ok(true) 1096 } 1097 1098 fn send_migration( 1099 vm: &mut Vm, 1100 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 1101 dyn hypervisor::Hypervisor, 1102 >, 1103 send_data_migration: VmSendMigrationData, 1104 ) -> result::Result<(), MigratableError> { 1105 // Set up the socket connection 1106 let mut socket = Self::send_migration_socket(&send_data_migration.destination_url)?; 1107 1108 // Start the migration 1109 Request::start().write_to(&mut socket)?; 1110 Response::read_from(&mut socket)?.ok_or_abandon( 1111 &mut socket, 1112 MigratableError::MigrateSend(anyhow!("Error starting migration")), 1113 )?; 1114 1115 // Send config 1116 let vm_config = vm.get_config(); 1117 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1118 let common_cpuid = { 1119 #[cfg(feature = "tdx")] 1120 if vm_config.lock().unwrap().is_tdx_enabled() { 1121 return Err(MigratableError::MigrateSend(anyhow!( 1122 "Live Migration is not supported when TDX is enabled" 1123 ))); 1124 }; 1125 1126 let amx = vm_config.lock().unwrap().cpus.features.amx; 1127 let phys_bits = 1128 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 1129 arch::generate_common_cpuid( 1130 &hypervisor, 1131 &arch::CpuidConfig { 1132 sgx_epc_sections: None, 1133 phys_bits, 1134 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 1135 #[cfg(feature = "tdx")] 1136 tdx: false, 1137 amx, 1138 }, 1139 ) 1140 .map_err(|e| { 1141 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 1142 })? 1143 }; 1144 1145 if send_data_migration.local { 1146 match &mut socket { 1147 SocketStream::Unix(unix_socket) => { 1148 // Proceed with sending memory file descriptors over UNIX socket 1149 vm.send_memory_fds(unix_socket)?; 1150 } 1151 SocketStream::Tcp(_tcp_socket) => { 1152 return Err(MigratableError::MigrateSend(anyhow!( 1153 "--local option is not supported with TCP sockets", 1154 ))); 1155 } 1156 } 1157 } 1158 1159 let vm_migration_config = VmMigrationConfig { 1160 vm_config, 1161 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1162 common_cpuid, 1163 memory_manager_data: vm.memory_manager_data(), 1164 }; 1165 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1166 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1167 socket 1168 .write_all(&config_data) 1169 .map_err(MigratableError::MigrateSocket)?; 1170 Response::read_from(&mut socket)?.ok_or_abandon( 1171 &mut socket, 1172 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1173 )?; 1174 1175 // Let every Migratable object know about the migration being started. 1176 vm.start_migration()?; 1177 1178 if send_data_migration.local { 1179 // Now pause VM 1180 vm.pause()?; 1181 } else { 1182 // Start logging dirty pages 1183 vm.start_dirty_log()?; 1184 1185 // Send memory table 1186 let table = vm.memory_range_table()?; 1187 Request::memory(table.length()) 1188 .write_to(&mut socket) 1189 .unwrap(); 1190 table.write_to(&mut socket)?; 1191 // And then the memory itself 1192 vm.send_memory_regions(&table, &mut socket)?; 1193 Response::read_from(&mut socket)?.ok_or_abandon( 1194 &mut socket, 1195 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1196 )?; 1197 1198 // Try at most 5 passes of dirty memory sending 1199 const MAX_DIRTY_MIGRATIONS: usize = 5; 1200 for i in 0..MAX_DIRTY_MIGRATIONS { 1201 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1202 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1203 break; 1204 } 1205 } 1206 1207 // Now pause VM 1208 vm.pause()?; 1209 1210 // Send last batch of dirty pages 1211 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1212 } 1213 // Capture snapshot and send it 1214 let vm_snapshot = vm.snapshot()?; 1215 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1216 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1217 socket 1218 .write_all(&snapshot_data) 1219 .map_err(MigratableError::MigrateSocket)?; 1220 Response::read_from(&mut socket)?.ok_or_abandon( 1221 &mut socket, 1222 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1223 )?; 1224 // Complete the migration 1225 Request::complete().write_to(&mut socket)?; 1226 Response::read_from(&mut socket)?.ok_or_abandon( 1227 &mut socket, 1228 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1229 )?; 1230 1231 // Stop logging dirty pages 1232 if !send_data_migration.local { 1233 vm.stop_dirty_log()?; 1234 } 1235 1236 info!("Migration complete"); 1237 1238 // Let every Migratable object know about the migration being complete 1239 vm.complete_migration() 1240 } 1241 1242 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1243 fn vm_check_cpuid_compatibility( 1244 &self, 1245 src_vm_config: &Arc<Mutex<VmConfig>>, 1246 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1247 ) -> result::Result<(), MigratableError> { 1248 #[cfg(feature = "tdx")] 1249 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1250 return Err(MigratableError::MigrateReceive(anyhow!( 1251 "Live Migration is not supported when TDX is enabled" 1252 ))); 1253 }; 1254 1255 // We check the `CPUID` compatibility of between the source vm and destination, which is 1256 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1257 let dest_cpuid = &{ 1258 let vm_config = &src_vm_config.lock().unwrap(); 1259 1260 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1261 arch::generate_common_cpuid( 1262 &self.hypervisor.clone(), 1263 &arch::CpuidConfig { 1264 sgx_epc_sections: None, 1265 phys_bits, 1266 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1267 #[cfg(feature = "tdx")] 1268 tdx: false, 1269 amx: vm_config.cpus.features.amx, 1270 }, 1271 ) 1272 .map_err(|e| { 1273 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1274 })? 1275 }; 1276 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1277 MigratableError::MigrateReceive(anyhow!( 1278 "Error checking cpu feature compatibility': {:?}", 1279 e 1280 )) 1281 }) 1282 } 1283 1284 fn vm_restore( 1285 &mut self, 1286 source_url: &str, 1287 vm_config: Arc<Mutex<VmConfig>>, 1288 prefault: bool, 1289 ) -> std::result::Result<(), VmError> { 1290 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1291 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1292 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1293 1294 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1295 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1296 .map_err(VmError::Restore)?; 1297 1298 self.vm_config = Some(Arc::clone(&vm_config)); 1299 1300 // Always re-populate the 'console_info' based on the new 'vm_config' 1301 self.console_info = 1302 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1303 1304 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1305 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1306 #[cfg(feature = "guest_debug")] 1307 let debug_evt = self 1308 .vm_debug_evt 1309 .try_clone() 1310 .map_err(VmError::EventFdClone)?; 1311 let activate_evt = self 1312 .activate_evt 1313 .try_clone() 1314 .map_err(VmError::EventFdClone)?; 1315 1316 let vm = Vm::new( 1317 vm_config, 1318 exit_evt, 1319 reset_evt, 1320 #[cfg(feature = "guest_debug")] 1321 debug_evt, 1322 &self.seccomp_action, 1323 self.hypervisor.clone(), 1324 activate_evt, 1325 self.console_info.clone(), 1326 self.console_resize_pipe.clone(), 1327 Arc::clone(&self.original_termios_opt), 1328 Some(snapshot), 1329 Some(source_url), 1330 Some(prefault), 1331 )?; 1332 self.vm = Some(vm); 1333 1334 if self 1335 .vm_config 1336 .as_ref() 1337 .unwrap() 1338 .lock() 1339 .unwrap() 1340 .landlock_enable 1341 { 1342 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1343 .map_err(VmError::ApplyLandlock)?; 1344 } 1345 1346 // Now we can restore the rest of the VM. 1347 if let Some(ref mut vm) = self.vm { 1348 vm.restore() 1349 } else { 1350 Err(VmError::VmNotCreated) 1351 } 1352 } 1353 1354 fn control_loop( 1355 &mut self, 1356 api_receiver: Rc<Receiver<ApiRequest>>, 1357 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1358 ) -> Result<()> { 1359 const EPOLL_EVENTS_LEN: usize = 100; 1360 1361 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1362 let epoll_fd = self.epoll.as_raw_fd(); 1363 1364 'outer: loop { 1365 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1366 Ok(res) => res, 1367 Err(e) => { 1368 if e.kind() == io::ErrorKind::Interrupted { 1369 // It's well defined from the epoll_wait() syscall 1370 // documentation that the epoll loop can be interrupted 1371 // before any of the requested events occurred or the 1372 // timeout expired. In both those cases, epoll_wait() 1373 // returns an error of type EINTR, but this should not 1374 // be considered as a regular error. Instead it is more 1375 // appropriate to retry, by calling into epoll_wait(). 1376 continue; 1377 } 1378 return Err(Error::Epoll(e)); 1379 } 1380 }; 1381 1382 for event in events.iter().take(num_events) { 1383 let dispatch_event: EpollDispatch = event.data.into(); 1384 match dispatch_event { 1385 EpollDispatch::Unknown => { 1386 let event = event.data; 1387 warn!("Unknown VMM loop event: {}", event); 1388 } 1389 EpollDispatch::Exit => { 1390 info!("VM exit event"); 1391 // Consume the event. 1392 self.exit_evt.read().map_err(Error::EventFdRead)?; 1393 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1394 1395 break 'outer; 1396 } 1397 EpollDispatch::Reset => { 1398 info!("VM reset event"); 1399 // Consume the event. 1400 self.reset_evt.read().map_err(Error::EventFdRead)?; 1401 self.vm_reboot().map_err(Error::VmReboot)?; 1402 } 1403 EpollDispatch::ActivateVirtioDevices => { 1404 if let Some(ref vm) = self.vm { 1405 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1406 info!( 1407 "Trying to activate pending virtio devices: count = {}", 1408 count 1409 ); 1410 vm.activate_virtio_devices() 1411 .map_err(Error::ActivateVirtioDevices)?; 1412 } 1413 } 1414 EpollDispatch::Api => { 1415 // Consume the events. 1416 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1417 // Read from the API receiver channel 1418 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1419 1420 if api_request(self)? { 1421 break 'outer; 1422 } 1423 } 1424 } 1425 #[cfg(feature = "guest_debug")] 1426 EpollDispatch::Debug => { 1427 // Consume the events. 1428 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1429 // Read from the API receiver channel 1430 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1431 1432 let response = if let Some(ref mut vm) = self.vm { 1433 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1434 } else { 1435 Err(VmError::VmNotRunning) 1436 } 1437 .map_err(gdb::Error::Vm); 1438 1439 gdb_request 1440 .sender 1441 .send(response) 1442 .map_err(Error::GdbResponseSend)?; 1443 } 1444 } 1445 #[cfg(not(feature = "guest_debug"))] 1446 EpollDispatch::Debug => {} 1447 } 1448 } 1449 } 1450 1451 // Trigger the termination of the signal_handler thread 1452 if let Some(signals) = self.signals.take() { 1453 signals.close(); 1454 } 1455 1456 // Wait for all the threads to finish 1457 for thread in self.threads.drain(..) { 1458 thread.join().map_err(Error::ThreadCleanup)? 1459 } 1460 1461 Ok(()) 1462 } 1463 } 1464 1465 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1466 vm_config.lock().unwrap().apply_landlock()?; 1467 Ok(()) 1468 } 1469 1470 impl RequestHandler for Vmm { 1471 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1472 // We only store the passed VM config. 1473 // The VM will be created when being asked to boot it. 1474 if self.vm_config.is_none() { 1475 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1476 self.console_info = 1477 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1478 1479 if self 1480 .vm_config 1481 .as_ref() 1482 .unwrap() 1483 .lock() 1484 .unwrap() 1485 .landlock_enable 1486 { 1487 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1488 .map_err(VmError::ApplyLandlock)?; 1489 } 1490 Ok(()) 1491 } else { 1492 Err(VmError::VmAlreadyCreated) 1493 } 1494 } 1495 1496 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1497 tracer::start(); 1498 info!("Booting VM"); 1499 event!("vm", "booting"); 1500 let r = { 1501 trace_scoped!("vm_boot"); 1502 // If we don't have a config, we cannot boot a VM. 1503 if self.vm_config.is_none() { 1504 return Err(VmError::VmMissingConfig); 1505 }; 1506 1507 // console_info is set to None in vm_shutdown. re-populate here if empty 1508 if self.console_info.is_none() { 1509 self.console_info = 1510 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1511 } 1512 1513 // Create a new VM if we don't have one yet. 1514 if self.vm.is_none() { 1515 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1516 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1517 #[cfg(feature = "guest_debug")] 1518 let vm_debug_evt = self 1519 .vm_debug_evt 1520 .try_clone() 1521 .map_err(VmError::EventFdClone)?; 1522 let activate_evt = self 1523 .activate_evt 1524 .try_clone() 1525 .map_err(VmError::EventFdClone)?; 1526 1527 if let Some(ref vm_config) = self.vm_config { 1528 let vm = Vm::new( 1529 Arc::clone(vm_config), 1530 exit_evt, 1531 reset_evt, 1532 #[cfg(feature = "guest_debug")] 1533 vm_debug_evt, 1534 &self.seccomp_action, 1535 self.hypervisor.clone(), 1536 activate_evt, 1537 self.console_info.clone(), 1538 self.console_resize_pipe.clone(), 1539 Arc::clone(&self.original_termios_opt), 1540 None, 1541 None, 1542 None, 1543 )?; 1544 1545 self.vm = Some(vm); 1546 } 1547 } 1548 1549 // Now we can boot the VM. 1550 if let Some(ref mut vm) = self.vm { 1551 vm.boot() 1552 } else { 1553 Err(VmError::VmNotCreated) 1554 } 1555 }; 1556 tracer::end(); 1557 if r.is_ok() { 1558 event!("vm", "booted"); 1559 } 1560 r 1561 } 1562 1563 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1564 if let Some(ref mut vm) = self.vm { 1565 vm.pause().map_err(VmError::Pause) 1566 } else { 1567 Err(VmError::VmNotRunning) 1568 } 1569 } 1570 1571 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1572 if let Some(ref mut vm) = self.vm { 1573 vm.resume().map_err(VmError::Resume) 1574 } else { 1575 Err(VmError::VmNotRunning) 1576 } 1577 } 1578 1579 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1580 if let Some(ref mut vm) = self.vm { 1581 // Drain console_info so that FDs are not reused 1582 let _ = self.console_info.take(); 1583 vm.snapshot() 1584 .map_err(VmError::Snapshot) 1585 .and_then(|snapshot| { 1586 vm.send(&snapshot, destination_url) 1587 .map_err(VmError::SnapshotSend) 1588 }) 1589 } else { 1590 Err(VmError::VmNotRunning) 1591 } 1592 } 1593 1594 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1595 if self.vm.is_some() || self.vm_config.is_some() { 1596 return Err(VmError::VmAlreadyCreated); 1597 } 1598 1599 let source_url = restore_cfg.source_url.as_path().to_str(); 1600 if source_url.is_none() { 1601 return Err(VmError::InvalidRestoreSourceUrl); 1602 } 1603 // Safe to unwrap as we checked it was Some(&str). 1604 let source_url = source_url.unwrap(); 1605 1606 let vm_config = Arc::new(Mutex::new( 1607 recv_vm_config(source_url).map_err(VmError::Restore)?, 1608 )); 1609 restore_cfg 1610 .validate(&vm_config.lock().unwrap().clone()) 1611 .map_err(VmError::ConfigValidation)?; 1612 1613 // Update VM's net configurations with new fds received for restore operation 1614 if let (Some(restored_nets), Some(vm_net_configs)) = 1615 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1616 { 1617 for net in restored_nets.iter() { 1618 for net_config in vm_net_configs.iter_mut() { 1619 // update only if the net dev is backed by FDs 1620 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1621 net_config.fds.clone_from(&net.fds); 1622 } 1623 } 1624 } 1625 } 1626 1627 self.vm_restore(source_url, vm_config, restore_cfg.prefault) 1628 .map_err(|vm_restore_err| { 1629 error!("VM Restore failed: {:?}", vm_restore_err); 1630 1631 // Cleanup the VM being created while vm restore 1632 if let Err(e) = self.vm_delete() { 1633 return e; 1634 } 1635 1636 vm_restore_err 1637 }) 1638 } 1639 1640 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1641 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1642 if let Some(ref mut vm) = self.vm { 1643 vm.coredump(destination_url).map_err(VmError::Coredump) 1644 } else { 1645 Err(VmError::VmNotRunning) 1646 } 1647 } 1648 1649 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1650 let r = if let Some(ref mut vm) = self.vm.take() { 1651 // Drain console_info so that the FDs are not reused 1652 let _ = self.console_info.take(); 1653 vm.shutdown() 1654 } else { 1655 Err(VmError::VmNotRunning) 1656 }; 1657 1658 if r.is_ok() { 1659 event!("vm", "shutdown"); 1660 } 1661 1662 r 1663 } 1664 1665 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1666 event!("vm", "rebooting"); 1667 1668 // First we stop the current VM 1669 let config = if let Some(mut vm) = self.vm.take() { 1670 let config = vm.get_config(); 1671 vm.shutdown()?; 1672 config 1673 } else { 1674 return Err(VmError::VmNotCreated); 1675 }; 1676 1677 // vm.shutdown() closes all the console devices, so set console_info to None 1678 // so that the closed FD #s are not reused. 1679 let _ = self.console_info.take(); 1680 1681 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1682 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1683 #[cfg(feature = "guest_debug")] 1684 let debug_evt = self 1685 .vm_debug_evt 1686 .try_clone() 1687 .map_err(VmError::EventFdClone)?; 1688 let activate_evt = self 1689 .activate_evt 1690 .try_clone() 1691 .map_err(VmError::EventFdClone)?; 1692 1693 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1694 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1695 // during the boot process. 1696 if self.reset_evt.read().is_ok() { 1697 warn!("Spurious second reset event received. Ignoring."); 1698 } 1699 1700 self.console_info = 1701 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1702 1703 // Then we create the new VM 1704 let mut vm = Vm::new( 1705 config, 1706 exit_evt, 1707 reset_evt, 1708 #[cfg(feature = "guest_debug")] 1709 debug_evt, 1710 &self.seccomp_action, 1711 self.hypervisor.clone(), 1712 activate_evt, 1713 self.console_info.clone(), 1714 self.console_resize_pipe.clone(), 1715 Arc::clone(&self.original_termios_opt), 1716 None, 1717 None, 1718 None, 1719 )?; 1720 1721 // And we boot it 1722 vm.boot()?; 1723 1724 self.vm = Some(vm); 1725 1726 event!("vm", "rebooted"); 1727 1728 Ok(()) 1729 } 1730 1731 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1732 match &self.vm_config { 1733 Some(vm_config) => { 1734 let state = match &self.vm { 1735 Some(vm) => vm.get_state()?, 1736 None => VmState::Created, 1737 }; 1738 let config = vm_config.lock().unwrap().clone(); 1739 1740 let mut memory_actual_size = config.memory.total_size(); 1741 if let Some(vm) = &self.vm { 1742 memory_actual_size -= vm.balloon_size(); 1743 } 1744 1745 let device_tree = self 1746 .vm 1747 .as_ref() 1748 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1749 1750 Ok(VmInfoResponse { 1751 config: Box::new(config), 1752 state, 1753 memory_actual_size, 1754 device_tree, 1755 }) 1756 } 1757 None => Err(VmError::VmNotCreated), 1758 } 1759 } 1760 1761 fn vmm_ping(&self) -> VmmPingResponse { 1762 let VmmVersionInfo { 1763 build_version, 1764 version, 1765 } = self.version.clone(); 1766 1767 VmmPingResponse { 1768 build_version, 1769 version, 1770 pid: std::process::id() as i64, 1771 features: feature_list(), 1772 } 1773 } 1774 1775 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1776 if self.vm_config.is_none() { 1777 return Ok(()); 1778 } 1779 1780 // If a VM is booted, we first try to shut it down. 1781 if self.vm.is_some() { 1782 self.vm_shutdown()?; 1783 } 1784 1785 self.vm_config = None; 1786 1787 event!("vm", "deleted"); 1788 1789 Ok(()) 1790 } 1791 1792 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1793 self.vm_delete()?; 1794 event!("vmm", "shutdown"); 1795 Ok(()) 1796 } 1797 1798 fn vm_resize( 1799 &mut self, 1800 desired_vcpus: Option<u8>, 1801 desired_ram: Option<u64>, 1802 desired_balloon: Option<u64>, 1803 ) -> result::Result<(), VmError> { 1804 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1805 1806 if let Some(ref mut vm) = self.vm { 1807 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1808 error!("Error when resizing VM: {:?}", e); 1809 Err(e) 1810 } else { 1811 Ok(()) 1812 } 1813 } else { 1814 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1815 if let Some(desired_vcpus) = desired_vcpus { 1816 config.cpus.boot_vcpus = desired_vcpus; 1817 } 1818 if let Some(desired_ram) = desired_ram { 1819 config.memory.size = desired_ram; 1820 } 1821 if let Some(desired_balloon) = desired_balloon { 1822 if let Some(balloon_config) = &mut config.balloon { 1823 balloon_config.size = desired_balloon; 1824 } 1825 } 1826 Ok(()) 1827 } 1828 } 1829 1830 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1831 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1832 1833 if let Some(ref mut vm) = self.vm { 1834 if let Err(e) = vm.resize_zone(id, desired_ram) { 1835 error!("Error when resizing VM: {:?}", e); 1836 Err(e) 1837 } else { 1838 Ok(()) 1839 } 1840 } else { 1841 // Update VmConfig by setting the new desired ram. 1842 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1843 1844 if let Some(zones) = &mut memory_config.zones { 1845 for zone in zones.iter_mut() { 1846 if zone.id == id { 1847 zone.size = desired_ram; 1848 return Ok(()); 1849 } 1850 } 1851 } 1852 1853 error!("Could not find the memory zone {} for the resize", id); 1854 Err(VmError::ResizeZone) 1855 } 1856 } 1857 1858 fn vm_add_device( 1859 &mut self, 1860 device_cfg: DeviceConfig, 1861 ) -> result::Result<Option<Vec<u8>>, VmError> { 1862 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1863 1864 { 1865 // Validate the configuration change in a cloned configuration 1866 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1867 add_to_config(&mut config.devices, device_cfg.clone()); 1868 config.validate().map_err(VmError::ConfigValidation)?; 1869 } 1870 1871 if let Some(ref mut vm) = self.vm { 1872 let info = vm.add_device(device_cfg).map_err(|e| { 1873 error!("Error when adding new device to the VM: {:?}", e); 1874 e 1875 })?; 1876 serde_json::to_vec(&info) 1877 .map(Some) 1878 .map_err(VmError::SerializeJson) 1879 } else { 1880 // Update VmConfig by adding the new device. 1881 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1882 add_to_config(&mut config.devices, device_cfg); 1883 Ok(None) 1884 } 1885 } 1886 1887 fn vm_add_user_device( 1888 &mut self, 1889 device_cfg: UserDeviceConfig, 1890 ) -> result::Result<Option<Vec<u8>>, VmError> { 1891 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1892 1893 { 1894 // Validate the configuration change in a cloned configuration 1895 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1896 add_to_config(&mut config.user_devices, device_cfg.clone()); 1897 config.validate().map_err(VmError::ConfigValidation)?; 1898 } 1899 1900 if let Some(ref mut vm) = self.vm { 1901 let info = vm.add_user_device(device_cfg).map_err(|e| { 1902 error!("Error when adding new user device to the VM: {:?}", e); 1903 e 1904 })?; 1905 serde_json::to_vec(&info) 1906 .map(Some) 1907 .map_err(VmError::SerializeJson) 1908 } else { 1909 // Update VmConfig by adding the new device. 1910 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1911 add_to_config(&mut config.user_devices, device_cfg); 1912 Ok(None) 1913 } 1914 } 1915 1916 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1917 if let Some(ref mut vm) = self.vm { 1918 if let Err(e) = vm.remove_device(id) { 1919 error!("Error when removing device from the VM: {:?}", e); 1920 Err(e) 1921 } else { 1922 Ok(()) 1923 } 1924 } else if let Some(ref config) = self.vm_config { 1925 let mut config = config.lock().unwrap(); 1926 if config.remove_device(&id) { 1927 Ok(()) 1928 } else { 1929 Err(VmError::NoDeviceToRemove(id)) 1930 } 1931 } else { 1932 Err(VmError::VmNotCreated) 1933 } 1934 } 1935 1936 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1937 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1938 1939 { 1940 // Validate the configuration change in a cloned configuration 1941 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1942 add_to_config(&mut config.disks, disk_cfg.clone()); 1943 config.validate().map_err(VmError::ConfigValidation)?; 1944 } 1945 1946 if let Some(ref mut vm) = self.vm { 1947 let info = vm.add_disk(disk_cfg).map_err(|e| { 1948 error!("Error when adding new disk to the VM: {:?}", e); 1949 e 1950 })?; 1951 serde_json::to_vec(&info) 1952 .map(Some) 1953 .map_err(VmError::SerializeJson) 1954 } else { 1955 // Update VmConfig by adding the new device. 1956 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1957 add_to_config(&mut config.disks, disk_cfg); 1958 Ok(None) 1959 } 1960 } 1961 1962 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1963 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1964 1965 { 1966 // Validate the configuration change in a cloned configuration 1967 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1968 add_to_config(&mut config.fs, fs_cfg.clone()); 1969 config.validate().map_err(VmError::ConfigValidation)?; 1970 } 1971 1972 if let Some(ref mut vm) = self.vm { 1973 let info = vm.add_fs(fs_cfg).map_err(|e| { 1974 error!("Error when adding new fs to the VM: {:?}", e); 1975 e 1976 })?; 1977 serde_json::to_vec(&info) 1978 .map(Some) 1979 .map_err(VmError::SerializeJson) 1980 } else { 1981 // Update VmConfig by adding the new device. 1982 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1983 add_to_config(&mut config.fs, fs_cfg); 1984 Ok(None) 1985 } 1986 } 1987 1988 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1989 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1990 1991 { 1992 // Validate the configuration change in a cloned configuration 1993 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1994 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1995 config.validate().map_err(VmError::ConfigValidation)?; 1996 } 1997 1998 if let Some(ref mut vm) = self.vm { 1999 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 2000 error!("Error when adding new pmem device to the VM: {:?}", e); 2001 e 2002 })?; 2003 serde_json::to_vec(&info) 2004 .map(Some) 2005 .map_err(VmError::SerializeJson) 2006 } else { 2007 // Update VmConfig by adding the new device. 2008 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2009 add_to_config(&mut config.pmem, pmem_cfg); 2010 Ok(None) 2011 } 2012 } 2013 2014 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2015 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2016 2017 { 2018 // Validate the configuration change in a cloned configuration 2019 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2020 add_to_config(&mut config.net, net_cfg.clone()); 2021 config.validate().map_err(VmError::ConfigValidation)?; 2022 } 2023 2024 if let Some(ref mut vm) = self.vm { 2025 let info = vm.add_net(net_cfg).map_err(|e| { 2026 error!("Error when adding new network device to the VM: {:?}", e); 2027 e 2028 })?; 2029 serde_json::to_vec(&info) 2030 .map(Some) 2031 .map_err(VmError::SerializeJson) 2032 } else { 2033 // Update VmConfig by adding the new device. 2034 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2035 add_to_config(&mut config.net, net_cfg); 2036 Ok(None) 2037 } 2038 } 2039 2040 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2041 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2042 2043 { 2044 // Validate the configuration change in a cloned configuration 2045 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2046 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 2047 config.validate().map_err(VmError::ConfigValidation)?; 2048 } 2049 2050 if let Some(ref mut vm) = self.vm { 2051 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 2052 error!("Error when adding new vDPA device to the VM: {:?}", e); 2053 e 2054 })?; 2055 serde_json::to_vec(&info) 2056 .map(Some) 2057 .map_err(VmError::SerializeJson) 2058 } else { 2059 // Update VmConfig by adding the new device. 2060 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2061 add_to_config(&mut config.vdpa, vdpa_cfg); 2062 Ok(None) 2063 } 2064 } 2065 2066 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2067 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2068 2069 { 2070 // Validate the configuration change in a cloned configuration 2071 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2072 2073 if config.vsock.is_some() { 2074 return Err(VmError::TooManyVsockDevices); 2075 } 2076 2077 config.vsock = Some(vsock_cfg.clone()); 2078 config.validate().map_err(VmError::ConfigValidation)?; 2079 } 2080 2081 if let Some(ref mut vm) = self.vm { 2082 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 2083 error!("Error when adding new vsock device to the VM: {:?}", e); 2084 e 2085 })?; 2086 serde_json::to_vec(&info) 2087 .map(Some) 2088 .map_err(VmError::SerializeJson) 2089 } else { 2090 // Update VmConfig by adding the new device. 2091 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2092 config.vsock = Some(vsock_cfg); 2093 Ok(None) 2094 } 2095 } 2096 2097 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 2098 if let Some(ref mut vm) = self.vm { 2099 let info = vm.counters().map_err(|e| { 2100 error!("Error when getting counters from the VM: {:?}", e); 2101 e 2102 })?; 2103 serde_json::to_vec(&info) 2104 .map(Some) 2105 .map_err(VmError::SerializeJson) 2106 } else { 2107 Err(VmError::VmNotRunning) 2108 } 2109 } 2110 2111 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 2112 if let Some(ref mut vm) = self.vm { 2113 vm.power_button() 2114 } else { 2115 Err(VmError::VmNotRunning) 2116 } 2117 } 2118 2119 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 2120 if let Some(ref mut vm) = self.vm { 2121 vm.nmi() 2122 } else { 2123 Err(VmError::VmNotRunning) 2124 } 2125 } 2126 2127 fn vm_receive_migration( 2128 &mut self, 2129 receive_data_migration: VmReceiveMigrationData, 2130 ) -> result::Result<(), MigratableError> { 2131 info!( 2132 "Receiving migration: receiver_url = {}", 2133 receive_data_migration.receiver_url 2134 ); 2135 2136 // Accept the connection and get the socket 2137 let mut socket = Vmm::receive_migration_socket(&receive_data_migration.receiver_url)?; 2138 2139 let mut started = false; 2140 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 2141 let mut existing_memory_files = None; 2142 loop { 2143 let req = Request::read_from(&mut socket)?; 2144 match req.command() { 2145 Command::Invalid => info!("Invalid Command Received"), 2146 Command::Start => { 2147 info!("Start Command Received"); 2148 started = true; 2149 2150 Response::ok().write_to(&mut socket)?; 2151 } 2152 Command::Config => { 2153 info!("Config Command Received"); 2154 2155 if !started { 2156 warn!("Migration not started yet"); 2157 Response::error().write_to(&mut socket)?; 2158 continue; 2159 } 2160 memory_manager = Some(self.vm_receive_config( 2161 &req, 2162 &mut socket, 2163 existing_memory_files.take(), 2164 )?); 2165 } 2166 Command::State => { 2167 info!("State Command Received"); 2168 2169 if !started { 2170 warn!("Migration not started yet"); 2171 Response::error().write_to(&mut socket)?; 2172 continue; 2173 } 2174 if let Some(mm) = memory_manager.take() { 2175 self.vm_receive_state(&req, &mut socket, mm)?; 2176 } else { 2177 warn!("Configuration not sent yet"); 2178 Response::error().write_to(&mut socket)?; 2179 } 2180 } 2181 Command::Memory => { 2182 info!("Memory Command Received"); 2183 2184 if !started { 2185 warn!("Migration not started yet"); 2186 Response::error().write_to(&mut socket)?; 2187 continue; 2188 } 2189 if let Some(mm) = memory_manager.as_ref() { 2190 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2191 } else { 2192 warn!("Configuration not sent yet"); 2193 Response::error().write_to(&mut socket)?; 2194 } 2195 } 2196 Command::MemoryFd => { 2197 info!("MemoryFd Command Received"); 2198 2199 if !started { 2200 warn!("Migration not started yet"); 2201 Response::error().write_to(&mut socket)?; 2202 continue; 2203 } 2204 2205 match &mut socket { 2206 SocketStream::Unix(unix_socket) => { 2207 let mut buf = [0u8; 4]; 2208 let (_, file) = unix_socket.recv_with_fd(&mut buf).map_err(|e| { 2209 MigratableError::MigrateReceive(anyhow!( 2210 "Error receiving slot from socket: {}", 2211 e 2212 )) 2213 })?; 2214 2215 if existing_memory_files.is_none() { 2216 existing_memory_files = Some(HashMap::default()) 2217 } 2218 2219 if let Some(ref mut existing_memory_files) = existing_memory_files { 2220 let slot = u32::from_le_bytes(buf); 2221 existing_memory_files.insert(slot, file.unwrap()); 2222 } 2223 2224 Response::ok().write_to(&mut socket)?; 2225 } 2226 SocketStream::Tcp(_tcp_socket) => { 2227 // For TCP sockets, we cannot transfer file descriptors 2228 warn!( 2229 "MemoryFd command received over TCP socket, which is not supported" 2230 ); 2231 Response::error().write_to(&mut socket)?; 2232 } 2233 } 2234 } 2235 Command::Complete => { 2236 info!("Complete Command Received"); 2237 if let Some(ref mut vm) = self.vm.as_mut() { 2238 vm.resume()?; 2239 Response::ok().write_to(&mut socket)?; 2240 } else { 2241 warn!("VM not created yet"); 2242 Response::error().write_to(&mut socket)?; 2243 } 2244 break; 2245 } 2246 Command::Abandon => { 2247 info!("Abandon Command Received"); 2248 self.vm = None; 2249 self.vm_config = None; 2250 Response::ok().write_to(&mut socket).ok(); 2251 break; 2252 } 2253 } 2254 } 2255 2256 Ok(()) 2257 } 2258 2259 fn vm_send_migration( 2260 &mut self, 2261 send_data_migration: VmSendMigrationData, 2262 ) -> result::Result<(), MigratableError> { 2263 info!( 2264 "Sending migration: destination_url = {}, local = {}", 2265 send_data_migration.destination_url, send_data_migration.local 2266 ); 2267 2268 if !self 2269 .vm_config 2270 .as_ref() 2271 .unwrap() 2272 .lock() 2273 .unwrap() 2274 .backed_by_shared_memory() 2275 && send_data_migration.local 2276 { 2277 return Err(MigratableError::MigrateSend(anyhow!( 2278 "Local migration requires shared memory or hugepages enabled" 2279 ))); 2280 } 2281 2282 if let Some(vm) = self.vm.as_mut() { 2283 Self::send_migration( 2284 vm, 2285 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2286 self.hypervisor.clone(), 2287 send_data_migration.clone(), 2288 ) 2289 .map_err(|migration_err| { 2290 error!("Migration failed: {:?}", migration_err); 2291 2292 // Stop logging dirty pages only for non-local migrations 2293 if !send_data_migration.local { 2294 if let Err(e) = vm.stop_dirty_log() { 2295 return e; 2296 } 2297 } 2298 2299 if vm.get_state().unwrap() == VmState::Paused { 2300 if let Err(e) = vm.resume() { 2301 return e; 2302 } 2303 } 2304 2305 migration_err 2306 })?; 2307 2308 // Shutdown the VM after the migration succeeded 2309 self.exit_evt.write(1).map_err(|e| { 2310 MigratableError::MigrateSend(anyhow!( 2311 "Failed shutting down the VM after migration: {:?}", 2312 e 2313 )) 2314 }) 2315 } else { 2316 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2317 } 2318 } 2319 } 2320 2321 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2322 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2323 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2324 2325 #[cfg(test)] 2326 mod unit_tests { 2327 use super::*; 2328 #[cfg(target_arch = "x86_64")] 2329 use crate::vm_config::DebugConsoleConfig; 2330 use crate::vm_config::{ 2331 ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig, 2332 PayloadConfig, RngConfig, 2333 }; 2334 2335 fn create_dummy_vmm() -> Vmm { 2336 Vmm::new( 2337 VmmVersionInfo::new("dummy", "dummy"), 2338 EventFd::new(EFD_NONBLOCK).unwrap(), 2339 #[cfg(feature = "guest_debug")] 2340 EventFd::new(EFD_NONBLOCK).unwrap(), 2341 #[cfg(feature = "guest_debug")] 2342 EventFd::new(EFD_NONBLOCK).unwrap(), 2343 SeccompAction::Allow, 2344 hypervisor::new().unwrap(), 2345 EventFd::new(EFD_NONBLOCK).unwrap(), 2346 ) 2347 .unwrap() 2348 } 2349 2350 fn create_dummy_vm_config() -> Box<VmConfig> { 2351 Box::new(VmConfig { 2352 cpus: CpusConfig { 2353 boot_vcpus: 1, 2354 max_vcpus: 1, 2355 topology: None, 2356 kvm_hyperv: false, 2357 max_phys_bits: 46, 2358 affinity: None, 2359 features: CpuFeatures::default(), 2360 }, 2361 memory: MemoryConfig { 2362 size: 536_870_912, 2363 mergeable: false, 2364 hotplug_method: HotplugMethod::Acpi, 2365 hotplug_size: None, 2366 hotplugged_size: None, 2367 shared: true, 2368 hugepages: false, 2369 hugepage_size: None, 2370 prefault: false, 2371 zones: None, 2372 thp: true, 2373 }, 2374 payload: Some(PayloadConfig { 2375 kernel: Some(PathBuf::from("/path/to/kernel")), 2376 firmware: None, 2377 cmdline: None, 2378 initramfs: None, 2379 #[cfg(feature = "igvm")] 2380 igvm: None, 2381 #[cfg(feature = "sev_snp")] 2382 host_data: None, 2383 }), 2384 rate_limit_groups: None, 2385 disks: None, 2386 net: None, 2387 rng: RngConfig { 2388 src: PathBuf::from("/dev/urandom"), 2389 iommu: false, 2390 }, 2391 balloon: None, 2392 fs: None, 2393 pmem: None, 2394 serial: ConsoleConfig { 2395 file: None, 2396 mode: ConsoleOutputMode::Null, 2397 iommu: false, 2398 socket: None, 2399 }, 2400 console: ConsoleConfig { 2401 file: None, 2402 mode: ConsoleOutputMode::Tty, 2403 iommu: false, 2404 socket: None, 2405 }, 2406 #[cfg(target_arch = "x86_64")] 2407 debug_console: DebugConsoleConfig::default(), 2408 devices: None, 2409 user_devices: None, 2410 vdpa: None, 2411 vsock: None, 2412 #[cfg(feature = "pvmemcontrol")] 2413 pvmemcontrol: None, 2414 pvpanic: false, 2415 iommu: false, 2416 #[cfg(target_arch = "x86_64")] 2417 sgx_epc: None, 2418 numa: None, 2419 watchdog: false, 2420 #[cfg(feature = "guest_debug")] 2421 gdb: false, 2422 pci_segments: None, 2423 platform: None, 2424 tpm: None, 2425 preserved_fds: None, 2426 landlock_enable: false, 2427 landlock_rules: None, 2428 }) 2429 } 2430 2431 #[test] 2432 fn test_vmm_vm_create() { 2433 let mut vmm = create_dummy_vmm(); 2434 let config = create_dummy_vm_config(); 2435 2436 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2437 assert!(matches!( 2438 vmm.vm_create(config), 2439 Err(VmError::VmAlreadyCreated) 2440 )); 2441 } 2442 2443 #[test] 2444 fn test_vmm_vm_cold_add_device() { 2445 let mut vmm = create_dummy_vmm(); 2446 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2447 2448 assert!(matches!( 2449 vmm.vm_add_device(device_config.clone()), 2450 Err(VmError::VmNotCreated) 2451 )); 2452 2453 let _ = vmm.vm_create(create_dummy_vm_config()); 2454 assert!(vmm 2455 .vm_config 2456 .as_ref() 2457 .unwrap() 2458 .lock() 2459 .unwrap() 2460 .devices 2461 .is_none()); 2462 2463 assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); 2464 assert_eq!( 2465 vmm.vm_config 2466 .as_ref() 2467 .unwrap() 2468 .lock() 2469 .unwrap() 2470 .devices 2471 .clone() 2472 .unwrap() 2473 .len(), 2474 1 2475 ); 2476 assert_eq!( 2477 vmm.vm_config 2478 .as_ref() 2479 .unwrap() 2480 .lock() 2481 .unwrap() 2482 .devices 2483 .clone() 2484 .unwrap()[0], 2485 device_config 2486 ); 2487 } 2488 2489 #[test] 2490 fn test_vmm_vm_cold_add_user_device() { 2491 let mut vmm = create_dummy_vmm(); 2492 let user_device_config = 2493 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2494 2495 assert!(matches!( 2496 vmm.vm_add_user_device(user_device_config.clone()), 2497 Err(VmError::VmNotCreated) 2498 )); 2499 2500 let _ = vmm.vm_create(create_dummy_vm_config()); 2501 assert!(vmm 2502 .vm_config 2503 .as_ref() 2504 .unwrap() 2505 .lock() 2506 .unwrap() 2507 .user_devices 2508 .is_none()); 2509 2510 assert!(vmm 2511 .vm_add_user_device(user_device_config.clone()) 2512 .unwrap() 2513 .is_none()); 2514 assert_eq!( 2515 vmm.vm_config 2516 .as_ref() 2517 .unwrap() 2518 .lock() 2519 .unwrap() 2520 .user_devices 2521 .clone() 2522 .unwrap() 2523 .len(), 2524 1 2525 ); 2526 assert_eq!( 2527 vmm.vm_config 2528 .as_ref() 2529 .unwrap() 2530 .lock() 2531 .unwrap() 2532 .user_devices 2533 .clone() 2534 .unwrap()[0], 2535 user_device_config 2536 ); 2537 } 2538 2539 #[test] 2540 fn test_vmm_vm_cold_add_disk() { 2541 let mut vmm = create_dummy_vmm(); 2542 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2543 2544 assert!(matches!( 2545 vmm.vm_add_disk(disk_config.clone()), 2546 Err(VmError::VmNotCreated) 2547 )); 2548 2549 let _ = vmm.vm_create(create_dummy_vm_config()); 2550 assert!(vmm 2551 .vm_config 2552 .as_ref() 2553 .unwrap() 2554 .lock() 2555 .unwrap() 2556 .disks 2557 .is_none()); 2558 2559 assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); 2560 assert_eq!( 2561 vmm.vm_config 2562 .as_ref() 2563 .unwrap() 2564 .lock() 2565 .unwrap() 2566 .disks 2567 .clone() 2568 .unwrap() 2569 .len(), 2570 1 2571 ); 2572 assert_eq!( 2573 vmm.vm_config 2574 .as_ref() 2575 .unwrap() 2576 .lock() 2577 .unwrap() 2578 .disks 2579 .clone() 2580 .unwrap()[0], 2581 disk_config 2582 ); 2583 } 2584 2585 #[test] 2586 fn test_vmm_vm_cold_add_fs() { 2587 let mut vmm = create_dummy_vmm(); 2588 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2589 2590 assert!(matches!( 2591 vmm.vm_add_fs(fs_config.clone()), 2592 Err(VmError::VmNotCreated) 2593 )); 2594 2595 let _ = vmm.vm_create(create_dummy_vm_config()); 2596 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2597 2598 assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none()); 2599 assert_eq!( 2600 vmm.vm_config 2601 .as_ref() 2602 .unwrap() 2603 .lock() 2604 .unwrap() 2605 .fs 2606 .clone() 2607 .unwrap() 2608 .len(), 2609 1 2610 ); 2611 assert_eq!( 2612 vmm.vm_config 2613 .as_ref() 2614 .unwrap() 2615 .lock() 2616 .unwrap() 2617 .fs 2618 .clone() 2619 .unwrap()[0], 2620 fs_config 2621 ); 2622 } 2623 2624 #[test] 2625 fn test_vmm_vm_cold_add_pmem() { 2626 let mut vmm = create_dummy_vmm(); 2627 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2628 2629 assert!(matches!( 2630 vmm.vm_add_pmem(pmem_config.clone()), 2631 Err(VmError::VmNotCreated) 2632 )); 2633 2634 let _ = vmm.vm_create(create_dummy_vm_config()); 2635 assert!(vmm 2636 .vm_config 2637 .as_ref() 2638 .unwrap() 2639 .lock() 2640 .unwrap() 2641 .pmem 2642 .is_none()); 2643 2644 assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); 2645 assert_eq!( 2646 vmm.vm_config 2647 .as_ref() 2648 .unwrap() 2649 .lock() 2650 .unwrap() 2651 .pmem 2652 .clone() 2653 .unwrap() 2654 .len(), 2655 1 2656 ); 2657 assert_eq!( 2658 vmm.vm_config 2659 .as_ref() 2660 .unwrap() 2661 .lock() 2662 .unwrap() 2663 .pmem 2664 .clone() 2665 .unwrap()[0], 2666 pmem_config 2667 ); 2668 } 2669 2670 #[test] 2671 fn test_vmm_vm_cold_add_net() { 2672 let mut vmm = create_dummy_vmm(); 2673 let net_config = NetConfig::parse( 2674 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2675 ) 2676 .unwrap(); 2677 2678 assert!(matches!( 2679 vmm.vm_add_net(net_config.clone()), 2680 Err(VmError::VmNotCreated) 2681 )); 2682 2683 let _ = vmm.vm_create(create_dummy_vm_config()); 2684 assert!(vmm 2685 .vm_config 2686 .as_ref() 2687 .unwrap() 2688 .lock() 2689 .unwrap() 2690 .net 2691 .is_none()); 2692 2693 assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); 2694 assert_eq!( 2695 vmm.vm_config 2696 .as_ref() 2697 .unwrap() 2698 .lock() 2699 .unwrap() 2700 .net 2701 .clone() 2702 .unwrap() 2703 .len(), 2704 1 2705 ); 2706 assert_eq!( 2707 vmm.vm_config 2708 .as_ref() 2709 .unwrap() 2710 .lock() 2711 .unwrap() 2712 .net 2713 .clone() 2714 .unwrap()[0], 2715 net_config 2716 ); 2717 } 2718 2719 #[test] 2720 fn test_vmm_vm_cold_add_vdpa() { 2721 let mut vmm = create_dummy_vmm(); 2722 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2723 2724 assert!(matches!( 2725 vmm.vm_add_vdpa(vdpa_config.clone()), 2726 Err(VmError::VmNotCreated) 2727 )); 2728 2729 let _ = vmm.vm_create(create_dummy_vm_config()); 2730 assert!(vmm 2731 .vm_config 2732 .as_ref() 2733 .unwrap() 2734 .lock() 2735 .unwrap() 2736 .vdpa 2737 .is_none()); 2738 2739 assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); 2740 assert_eq!( 2741 vmm.vm_config 2742 .as_ref() 2743 .unwrap() 2744 .lock() 2745 .unwrap() 2746 .vdpa 2747 .clone() 2748 .unwrap() 2749 .len(), 2750 1 2751 ); 2752 assert_eq!( 2753 vmm.vm_config 2754 .as_ref() 2755 .unwrap() 2756 .lock() 2757 .unwrap() 2758 .vdpa 2759 .clone() 2760 .unwrap()[0], 2761 vdpa_config 2762 ); 2763 } 2764 2765 #[test] 2766 fn test_vmm_vm_cold_add_vsock() { 2767 let mut vmm = create_dummy_vmm(); 2768 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2769 2770 assert!(matches!( 2771 vmm.vm_add_vsock(vsock_config.clone()), 2772 Err(VmError::VmNotCreated) 2773 )); 2774 2775 let _ = vmm.vm_create(create_dummy_vm_config()); 2776 assert!(vmm 2777 .vm_config 2778 .as_ref() 2779 .unwrap() 2780 .lock() 2781 .unwrap() 2782 .vsock 2783 .is_none()); 2784 2785 assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); 2786 assert_eq!( 2787 vmm.vm_config 2788 .as_ref() 2789 .unwrap() 2790 .lock() 2791 .unwrap() 2792 .vsock 2793 .clone() 2794 .unwrap(), 2795 vsock_config 2796 ); 2797 } 2798 } 2799