1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 #[macro_use] 7 extern crate event_monitor; 8 #[macro_use] 9 extern crate log; 10 11 use std::collections::HashMap; 12 use std::fs::File; 13 use std::io::{stdout, Read, Write}; 14 use std::net::{TcpListener, TcpStream}; 15 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 16 use std::os::unix::net::{UnixListener, UnixStream}; 17 use std::panic::AssertUnwindSafe; 18 use std::path::PathBuf; 19 use std::rc::Rc; 20 use std::sync::mpsc::{Receiver, RecvError, SendError, Sender}; 21 use std::sync::{Arc, Mutex}; 22 use std::time::Instant; 23 use std::{io, result, thread}; 24 25 use anyhow::anyhow; 26 #[cfg(feature = "dbus_api")] 27 use api::dbus::{DBusApiOptions, DBusApiShutdownChannels}; 28 use api::http::HttpApiHandle; 29 use console_devices::{pre_create_console_devices, ConsoleInfo}; 30 use landlock::LandlockError; 31 use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW}; 32 use memory_manager::MemoryManagerSnapshotData; 33 use pci::PciBdf; 34 use seccompiler::{apply_filter, SeccompAction}; 35 use serde::ser::{SerializeStruct, Serializer}; 36 use serde::{Deserialize, Serialize}; 37 use signal_hook::iterator::{Handle, Signals}; 38 use thiserror::Error; 39 use tracer::trace_scoped; 40 use vm_memory::bitmap::{AtomicBitmap, BitmapSlice}; 41 use vm_memory::{ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile}; 42 use vm_migration::protocol::*; 43 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; 44 use vmm_sys_util::eventfd::EventFd; 45 use vmm_sys_util::signal::unblock_signal; 46 use vmm_sys_util::sock_ctrl_msg::ScmSocket; 47 48 use crate::api::{ 49 ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData, 50 VmSendMigrationData, VmmPingResponse, 51 }; 52 use crate::config::{add_to_config, RestoreConfig}; 53 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 54 use crate::coredump::GuestDebuggable; 55 use crate::landlock::Landlock; 56 use crate::memory_manager::MemoryManager; 57 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 58 use crate::migration::get_vm_snapshot; 59 use crate::migration::{recv_vm_config, recv_vm_state}; 60 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 61 use crate::vm::{Error as VmError, Vm, VmState}; 62 use crate::vm_config::{ 63 DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, 64 VmConfig, VsockConfig, 65 }; 66 67 mod acpi; 68 pub mod api; 69 mod clone3; 70 pub mod config; 71 pub mod console_devices; 72 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 73 mod coredump; 74 pub mod cpu; 75 pub mod device_manager; 76 pub mod device_tree; 77 #[cfg(feature = "guest_debug")] 78 mod gdb; 79 #[cfg(feature = "igvm")] 80 mod igvm; 81 pub mod interrupt; 82 pub mod landlock; 83 pub mod memory_manager; 84 pub mod migration; 85 mod pci_segment; 86 pub mod seccomp_filters; 87 mod serial_manager; 88 mod sigwinch_listener; 89 pub mod vm; 90 pub mod vm_config; 91 92 type GuestMemoryMmap = vm_memory::GuestMemoryMmap<AtomicBitmap>; 93 type GuestRegionMmap = vm_memory::GuestRegionMmap<AtomicBitmap>; 94 95 /// Errors associated with VMM management 96 #[derive(Debug, Error)] 97 pub enum Error { 98 /// API request receive error 99 #[error("Error receiving API request: {0}")] 100 ApiRequestRecv(#[source] RecvError), 101 102 /// API response send error 103 #[error("Error sending API request: {0}")] 104 ApiResponseSend(#[source] SendError<ApiResponse>), 105 106 /// Cannot bind to the UNIX domain socket path 107 #[error("Error binding to UNIX domain socket: {0}")] 108 Bind(#[source] io::Error), 109 110 /// Cannot clone EventFd. 111 #[error("Error cloning EventFd: {0}")] 112 EventFdClone(#[source] io::Error), 113 114 /// Cannot create EventFd. 115 #[error("Error creating EventFd: {0}")] 116 EventFdCreate(#[source] io::Error), 117 118 /// Cannot read from EventFd. 119 #[error("Error reading from EventFd: {0}")] 120 EventFdRead(#[source] io::Error), 121 122 /// Cannot create epoll context. 123 #[error("Error creating epoll context: {0}")] 124 Epoll(#[source] io::Error), 125 126 /// Cannot create HTTP thread 127 #[error("Error spawning HTTP thread: {0}")] 128 HttpThreadSpawn(#[source] io::Error), 129 130 /// Cannot create D-Bus thread 131 #[cfg(feature = "dbus_api")] 132 #[error("Error spawning D-Bus thread: {0}")] 133 DBusThreadSpawn(#[source] io::Error), 134 135 /// Cannot start D-Bus session 136 #[cfg(feature = "dbus_api")] 137 #[error("Error starting D-Bus session: {0}")] 138 CreateDBusSession(#[source] zbus::Error), 139 140 /// Cannot create `event-monitor` thread 141 #[error("Error spawning `event-monitor` thread: {0}")] 142 EventMonitorThreadSpawn(#[source] io::Error), 143 144 /// Cannot handle the VM STDIN stream 145 #[error("Error handling VM stdin: {0:?}")] 146 Stdin(VmError), 147 148 /// Cannot handle the VM pty stream 149 #[error("Error handling VM pty: {0:?}")] 150 Pty(VmError), 151 152 /// Cannot reboot the VM 153 #[error("Error rebooting VM: {0:?}")] 154 VmReboot(VmError), 155 156 /// Cannot create VMM thread 157 #[error("Error spawning VMM thread {0:?}")] 158 VmmThreadSpawn(#[source] io::Error), 159 160 /// Cannot shut the VMM down 161 #[error("Error shutting down VMM: {0:?}")] 162 VmmShutdown(VmError), 163 164 /// Cannot create seccomp filter 165 #[error("Error creating seccomp filter: {0}")] 166 CreateSeccompFilter(seccompiler::Error), 167 168 /// Cannot apply seccomp filter 169 #[error("Error applying seccomp filter: {0}")] 170 ApplySeccompFilter(seccompiler::Error), 171 172 /// Error activating virtio devices 173 #[error("Error activating virtio devices: {0:?}")] 174 ActivateVirtioDevices(VmError), 175 176 /// Error creating API server 177 #[error("Error creating API server {0:?}")] 178 CreateApiServer(micro_http::ServerError), 179 180 /// Error binding API server socket 181 #[error("Error creation API server's socket {0:?}")] 182 CreateApiServerSocket(#[source] io::Error), 183 184 #[cfg(feature = "guest_debug")] 185 #[error("Failed to start the GDB thread: {0}")] 186 GdbThreadSpawn(io::Error), 187 188 /// GDB request receive error 189 #[cfg(feature = "guest_debug")] 190 #[error("Error receiving GDB request: {0}")] 191 GdbRequestRecv(#[source] RecvError), 192 193 /// GDB response send error 194 #[cfg(feature = "guest_debug")] 195 #[error("Error sending GDB request: {0}")] 196 GdbResponseSend(#[source] SendError<gdb::GdbResponse>), 197 198 #[error("Cannot spawn a signal handler thread: {0}")] 199 SignalHandlerSpawn(#[source] io::Error), 200 201 #[error("Failed to join on threads: {0:?}")] 202 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 203 204 /// Cannot create Landlock object 205 #[error("Error creating landlock object: {0}")] 206 CreateLandlock(LandlockError), 207 208 /// Cannot apply landlock based sandboxing 209 #[error("Error applying landlock: {0}")] 210 ApplyLandlock(LandlockError), 211 } 212 pub type Result<T> = result::Result<T, Error>; 213 214 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 215 #[repr(u64)] 216 pub enum EpollDispatch { 217 Exit = 0, 218 Reset = 1, 219 Api = 2, 220 ActivateVirtioDevices = 3, 221 Debug = 4, 222 Unknown, 223 } 224 225 impl From<u64> for EpollDispatch { 226 fn from(v: u64) -> Self { 227 use EpollDispatch::*; 228 match v { 229 0 => Exit, 230 1 => Reset, 231 2 => Api, 232 3 => ActivateVirtioDevices, 233 4 => Debug, 234 _ => Unknown, 235 } 236 } 237 } 238 239 enum SocketStream { 240 Unix(UnixStream), 241 Tcp(TcpStream), 242 } 243 244 impl Read for SocketStream { 245 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { 246 match self { 247 SocketStream::Unix(stream) => stream.read(buf), 248 SocketStream::Tcp(stream) => stream.read(buf), 249 } 250 } 251 } 252 253 impl Write for SocketStream { 254 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { 255 match self { 256 SocketStream::Unix(stream) => stream.write(buf), 257 SocketStream::Tcp(stream) => stream.write(buf), 258 } 259 } 260 261 fn flush(&mut self) -> std::io::Result<()> { 262 match self { 263 SocketStream::Unix(stream) => stream.flush(), 264 SocketStream::Tcp(stream) => stream.flush(), 265 } 266 } 267 } 268 269 impl AsRawFd for SocketStream { 270 fn as_raw_fd(&self) -> RawFd { 271 match self { 272 SocketStream::Unix(s) => s.as_raw_fd(), 273 SocketStream::Tcp(s) => s.as_raw_fd(), 274 } 275 } 276 } 277 278 impl ReadVolatile for SocketStream { 279 fn read_volatile<B: BitmapSlice>( 280 &mut self, 281 buf: &mut VolatileSlice<B>, 282 ) -> std::result::Result<usize, VolatileMemoryError> { 283 match self { 284 SocketStream::Unix(s) => s.read_volatile(buf), 285 SocketStream::Tcp(s) => s.read_volatile(buf), 286 } 287 } 288 289 fn read_exact_volatile<B: BitmapSlice>( 290 &mut self, 291 buf: &mut VolatileSlice<B>, 292 ) -> std::result::Result<(), VolatileMemoryError> { 293 match self { 294 SocketStream::Unix(s) => s.read_exact_volatile(buf), 295 SocketStream::Tcp(s) => s.read_exact_volatile(buf), 296 } 297 } 298 } 299 300 impl WriteVolatile for SocketStream { 301 fn write_volatile<B: BitmapSlice>( 302 &mut self, 303 buf: &VolatileSlice<B>, 304 ) -> std::result::Result<usize, VolatileMemoryError> { 305 match self { 306 SocketStream::Unix(s) => s.write_volatile(buf), 307 SocketStream::Tcp(s) => s.write_volatile(buf), 308 } 309 } 310 311 fn write_all_volatile<B: BitmapSlice>( 312 &mut self, 313 buf: &VolatileSlice<B>, 314 ) -> std::result::Result<(), VolatileMemoryError> { 315 match self { 316 SocketStream::Unix(s) => s.write_all_volatile(buf), 317 SocketStream::Tcp(s) => s.write_all_volatile(buf), 318 } 319 } 320 } 321 322 pub struct EpollContext { 323 epoll_file: File, 324 } 325 326 impl EpollContext { 327 pub fn new() -> result::Result<EpollContext, io::Error> { 328 let epoll_fd = epoll::create(true)?; 329 // Use 'File' to enforce closing on 'epoll_fd' 330 // SAFETY: the epoll_fd returned by epoll::create is valid and owned by us. 331 let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; 332 333 Ok(EpollContext { epoll_file }) 334 } 335 336 pub fn add_event<T>(&mut self, fd: &T, token: EpollDispatch) -> result::Result<(), io::Error> 337 where 338 T: AsRawFd, 339 { 340 let dispatch_index = token as u64; 341 epoll::ctl( 342 self.epoll_file.as_raw_fd(), 343 epoll::ControlOptions::EPOLL_CTL_ADD, 344 fd.as_raw_fd(), 345 epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index), 346 )?; 347 348 Ok(()) 349 } 350 351 #[cfg(fuzzing)] 352 pub fn add_event_custom<T>( 353 &mut self, 354 fd: &T, 355 id: u64, 356 evts: epoll::Events, 357 ) -> result::Result<(), io::Error> 358 where 359 T: AsRawFd, 360 { 361 epoll::ctl( 362 self.epoll_file.as_raw_fd(), 363 epoll::ControlOptions::EPOLL_CTL_ADD, 364 fd.as_raw_fd(), 365 epoll::Event::new(evts, id), 366 )?; 367 368 Ok(()) 369 } 370 } 371 372 impl AsRawFd for EpollContext { 373 fn as_raw_fd(&self) -> RawFd { 374 self.epoll_file.as_raw_fd() 375 } 376 } 377 378 pub struct PciDeviceInfo { 379 pub id: String, 380 pub bdf: PciBdf, 381 } 382 383 impl Serialize for PciDeviceInfo { 384 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> 385 where 386 S: Serializer, 387 { 388 let bdf_str = self.bdf.to_string(); 389 390 // Serialize the structure. 391 let mut state = serializer.serialize_struct("PciDeviceInfo", 2)?; 392 state.serialize_field("id", &self.id)?; 393 state.serialize_field("bdf", &bdf_str)?; 394 state.end() 395 } 396 } 397 398 pub fn feature_list() -> Vec<String> { 399 vec![ 400 #[cfg(feature = "dbus_api")] 401 "dbus_api".to_string(), 402 #[cfg(feature = "dhat-heap")] 403 "dhat-heap".to_string(), 404 #[cfg(feature = "guest_debug")] 405 "guest_debug".to_string(), 406 #[cfg(feature = "igvm")] 407 "igvm".to_string(), 408 #[cfg(feature = "io_uring")] 409 "io_uring".to_string(), 410 #[cfg(feature = "kvm")] 411 "kvm".to_string(), 412 #[cfg(feature = "mshv")] 413 "mshv".to_string(), 414 #[cfg(feature = "sev_snp")] 415 "sev_snp".to_string(), 416 #[cfg(feature = "tdx")] 417 "tdx".to_string(), 418 #[cfg(feature = "tracing")] 419 "tracing".to_string(), 420 ] 421 } 422 423 pub fn start_event_monitor_thread( 424 mut monitor: event_monitor::Monitor, 425 seccomp_action: &SeccompAction, 426 landlock_enable: bool, 427 hypervisor_type: hypervisor::HypervisorType, 428 exit_event: EventFd, 429 ) -> Result<thread::JoinHandle<Result<()>>> { 430 // Retrieve seccomp filter 431 let seccomp_filter = get_seccomp_filter(seccomp_action, Thread::EventMonitor, hypervisor_type) 432 .map_err(Error::CreateSeccompFilter)?; 433 434 thread::Builder::new() 435 .name("event-monitor".to_owned()) 436 .spawn(move || { 437 // Apply seccomp filter 438 if !seccomp_filter.is_empty() { 439 apply_filter(&seccomp_filter) 440 .map_err(Error::ApplySeccompFilter) 441 .map_err(|e| { 442 error!("Error applying seccomp filter: {:?}", e); 443 exit_event.write(1).ok(); 444 e 445 })?; 446 } 447 if landlock_enable { 448 Landlock::new() 449 .map_err(Error::CreateLandlock)? 450 .restrict_self() 451 .map_err(Error::ApplyLandlock) 452 .map_err(|e| { 453 error!("Error applying landlock to event monitor thread: {:?}", e); 454 exit_event.write(1).ok(); 455 e 456 })?; 457 } 458 459 std::panic::catch_unwind(AssertUnwindSafe(move || { 460 while let Ok(event) = monitor.rx.recv() { 461 let event = Arc::new(event); 462 463 if let Some(ref mut file) = monitor.file { 464 file.write_all(event.as_bytes().as_ref()).ok(); 465 file.write_all(b"\n\n").ok(); 466 } 467 468 for tx in monitor.broadcast.iter() { 469 tx.send(event.clone()).ok(); 470 } 471 } 472 })) 473 .map_err(|_| { 474 error!("`event-monitor` thread panicked"); 475 exit_event.write(1).ok(); 476 }) 477 .ok(); 478 479 Ok(()) 480 }) 481 .map_err(Error::EventMonitorThreadSpawn) 482 } 483 484 #[allow(unused_variables)] 485 #[allow(clippy::too_many_arguments)] 486 pub fn start_vmm_thread( 487 vmm_version: VmmVersionInfo, 488 http_path: &Option<String>, 489 http_fd: Option<RawFd>, 490 #[cfg(feature = "dbus_api")] dbus_options: Option<DBusApiOptions>, 491 api_event: EventFd, 492 api_sender: Sender<ApiRequest>, 493 api_receiver: Receiver<ApiRequest>, 494 #[cfg(feature = "guest_debug")] debug_path: Option<PathBuf>, 495 #[cfg(feature = "guest_debug")] debug_event: EventFd, 496 #[cfg(feature = "guest_debug")] vm_debug_event: EventFd, 497 exit_event: EventFd, 498 seccomp_action: &SeccompAction, 499 hypervisor: Arc<dyn hypervisor::Hypervisor>, 500 landlock_enable: bool, 501 ) -> Result<VmmThreadHandle> { 502 #[cfg(feature = "guest_debug")] 503 let gdb_hw_breakpoints = hypervisor.get_guest_debug_hw_bps(); 504 #[cfg(feature = "guest_debug")] 505 let (gdb_sender, gdb_receiver) = std::sync::mpsc::channel(); 506 #[cfg(feature = "guest_debug")] 507 let gdb_debug_event = debug_event.try_clone().map_err(Error::EventFdClone)?; 508 #[cfg(feature = "guest_debug")] 509 let gdb_vm_debug_event = vm_debug_event.try_clone().map_err(Error::EventFdClone)?; 510 511 let api_event_clone = api_event.try_clone().map_err(Error::EventFdClone)?; 512 let hypervisor_type = hypervisor.hypervisor_type(); 513 514 // Retrieve seccomp filter 515 let vmm_seccomp_filter = get_seccomp_filter(seccomp_action, Thread::Vmm, hypervisor_type) 516 .map_err(Error::CreateSeccompFilter)?; 517 518 let vmm_seccomp_action = seccomp_action.clone(); 519 let thread = { 520 let exit_event = exit_event.try_clone().map_err(Error::EventFdClone)?; 521 thread::Builder::new() 522 .name("vmm".to_string()) 523 .spawn(move || { 524 // Apply seccomp filter for VMM thread. 525 if !vmm_seccomp_filter.is_empty() { 526 apply_filter(&vmm_seccomp_filter).map_err(Error::ApplySeccompFilter)?; 527 } 528 529 let mut vmm = Vmm::new( 530 vmm_version, 531 api_event, 532 #[cfg(feature = "guest_debug")] 533 debug_event, 534 #[cfg(feature = "guest_debug")] 535 vm_debug_event, 536 vmm_seccomp_action, 537 hypervisor, 538 exit_event, 539 )?; 540 541 vmm.setup_signal_handler(landlock_enable)?; 542 543 vmm.control_loop( 544 Rc::new(api_receiver), 545 #[cfg(feature = "guest_debug")] 546 Rc::new(gdb_receiver), 547 ) 548 }) 549 .map_err(Error::VmmThreadSpawn)? 550 }; 551 552 // The VMM thread is started, we can start the dbus thread 553 // and start serving HTTP requests 554 #[cfg(feature = "dbus_api")] 555 let dbus_shutdown_chs = match dbus_options { 556 Some(opts) => { 557 let (_, chs) = api::start_dbus_thread( 558 opts, 559 api_event_clone.try_clone().map_err(Error::EventFdClone)?, 560 api_sender.clone(), 561 seccomp_action, 562 exit_event.try_clone().map_err(Error::EventFdClone)?, 563 hypervisor_type, 564 )?; 565 Some(chs) 566 } 567 None => None, 568 }; 569 570 let http_api_handle = if let Some(http_path) = http_path { 571 Some(api::start_http_path_thread( 572 http_path, 573 api_event_clone, 574 api_sender, 575 seccomp_action, 576 exit_event, 577 hypervisor_type, 578 landlock_enable, 579 )?) 580 } else if let Some(http_fd) = http_fd { 581 Some(api::start_http_fd_thread( 582 http_fd, 583 api_event_clone, 584 api_sender, 585 seccomp_action, 586 exit_event, 587 hypervisor_type, 588 landlock_enable, 589 )?) 590 } else { 591 None 592 }; 593 594 #[cfg(feature = "guest_debug")] 595 if let Some(debug_path) = debug_path { 596 let target = gdb::GdbStub::new( 597 gdb_sender, 598 gdb_debug_event, 599 gdb_vm_debug_event, 600 gdb_hw_breakpoints, 601 ); 602 thread::Builder::new() 603 .name("gdb".to_owned()) 604 .spawn(move || gdb::gdb_thread(target, &debug_path)) 605 .map_err(Error::GdbThreadSpawn)?; 606 } 607 608 Ok(VmmThreadHandle { 609 thread_handle: thread, 610 #[cfg(feature = "dbus_api")] 611 dbus_shutdown_chs, 612 http_api_handle, 613 }) 614 } 615 616 #[derive(Clone, Deserialize, Serialize)] 617 struct VmMigrationConfig { 618 vm_config: Arc<Mutex<VmConfig>>, 619 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 620 common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>, 621 memory_manager_data: MemoryManagerSnapshotData, 622 } 623 624 #[derive(Debug, Clone)] 625 pub struct VmmVersionInfo { 626 pub build_version: String, 627 pub version: String, 628 } 629 630 impl VmmVersionInfo { 631 pub fn new(build_version: &str, version: &str) -> Self { 632 Self { 633 build_version: build_version.to_owned(), 634 version: version.to_owned(), 635 } 636 } 637 } 638 639 pub struct VmmThreadHandle { 640 pub thread_handle: thread::JoinHandle<Result<()>>, 641 #[cfg(feature = "dbus_api")] 642 pub dbus_shutdown_chs: Option<DBusApiShutdownChannels>, 643 pub http_api_handle: Option<HttpApiHandle>, 644 } 645 646 pub struct Vmm { 647 epoll: EpollContext, 648 exit_evt: EventFd, 649 reset_evt: EventFd, 650 api_evt: EventFd, 651 #[cfg(feature = "guest_debug")] 652 debug_evt: EventFd, 653 #[cfg(feature = "guest_debug")] 654 vm_debug_evt: EventFd, 655 version: VmmVersionInfo, 656 vm: Option<Vm>, 657 vm_config: Option<Arc<Mutex<VmConfig>>>, 658 seccomp_action: SeccompAction, 659 hypervisor: Arc<dyn hypervisor::Hypervisor>, 660 activate_evt: EventFd, 661 signals: Option<Handle>, 662 threads: Vec<thread::JoinHandle<()>>, 663 original_termios_opt: Arc<Mutex<Option<termios>>>, 664 console_resize_pipe: Option<Arc<File>>, 665 console_info: Option<ConsoleInfo>, 666 } 667 668 impl Vmm { 669 pub const HANDLED_SIGNALS: [i32; 2] = [SIGTERM, SIGINT]; 670 671 fn signal_handler( 672 mut signals: Signals, 673 original_termios_opt: Arc<Mutex<Option<termios>>>, 674 exit_evt: &EventFd, 675 ) { 676 for sig in &Self::HANDLED_SIGNALS { 677 unblock_signal(*sig).unwrap(); 678 } 679 680 for signal in signals.forever() { 681 match signal { 682 SIGTERM | SIGINT => { 683 if exit_evt.write(1).is_err() { 684 // Resetting the terminal is usually done as the VMM exits 685 if let Ok(lock) = original_termios_opt.lock() { 686 if let Some(termios) = *lock { 687 // SAFETY: FFI call 688 let _ = unsafe { 689 tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) 690 }; 691 } 692 } else { 693 warn!("Failed to lock original termios"); 694 } 695 696 std::process::exit(1); 697 } 698 } 699 _ => (), 700 } 701 } 702 } 703 704 fn setup_signal_handler(&mut self, landlock_enable: bool) -> Result<()> { 705 let signals = Signals::new(Self::HANDLED_SIGNALS); 706 match signals { 707 Ok(signals) => { 708 self.signals = Some(signals.handle()); 709 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?; 710 let original_termios_opt = Arc::clone(&self.original_termios_opt); 711 712 let signal_handler_seccomp_filter = get_seccomp_filter( 713 &self.seccomp_action, 714 Thread::SignalHandler, 715 self.hypervisor.hypervisor_type(), 716 ) 717 .map_err(Error::CreateSeccompFilter)?; 718 self.threads.push( 719 thread::Builder::new() 720 .name("vmm_signal_handler".to_string()) 721 .spawn(move || { 722 if !signal_handler_seccomp_filter.is_empty() { 723 if let Err(e) = apply_filter(&signal_handler_seccomp_filter) 724 .map_err(Error::ApplySeccompFilter) 725 { 726 error!("Error applying seccomp filter: {:?}", e); 727 exit_evt.write(1).ok(); 728 return; 729 } 730 } 731 if landlock_enable{ 732 match Landlock::new() { 733 Ok(landlock) => { 734 let _ = landlock.restrict_self().map_err(Error::ApplyLandlock).map_err(|e| { 735 error!("Error applying Landlock to signal handler thread: {:?}", e); 736 exit_evt.write(1).ok(); 737 }); 738 } 739 Err(e) => { 740 error!("Error creating Landlock object: {:?}", e); 741 exit_evt.write(1).ok(); 742 } 743 }; 744 } 745 746 std::panic::catch_unwind(AssertUnwindSafe(|| { 747 Vmm::signal_handler(signals, original_termios_opt, &exit_evt); 748 })) 749 .map_err(|_| { 750 error!("vmm signal_handler thread panicked"); 751 exit_evt.write(1).ok() 752 }) 753 .ok(); 754 }) 755 .map_err(Error::SignalHandlerSpawn)?, 756 ); 757 } 758 Err(e) => error!("Signal not found {}", e), 759 } 760 Ok(()) 761 } 762 763 #[allow(clippy::too_many_arguments)] 764 fn new( 765 vmm_version: VmmVersionInfo, 766 api_evt: EventFd, 767 #[cfg(feature = "guest_debug")] debug_evt: EventFd, 768 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 769 seccomp_action: SeccompAction, 770 hypervisor: Arc<dyn hypervisor::Hypervisor>, 771 exit_evt: EventFd, 772 ) -> Result<Self> { 773 let mut epoll = EpollContext::new().map_err(Error::Epoll)?; 774 let reset_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 775 let activate_evt = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; 776 777 epoll 778 .add_event(&exit_evt, EpollDispatch::Exit) 779 .map_err(Error::Epoll)?; 780 781 epoll 782 .add_event(&reset_evt, EpollDispatch::Reset) 783 .map_err(Error::Epoll)?; 784 785 epoll 786 .add_event(&activate_evt, EpollDispatch::ActivateVirtioDevices) 787 .map_err(Error::Epoll)?; 788 789 epoll 790 .add_event(&api_evt, EpollDispatch::Api) 791 .map_err(Error::Epoll)?; 792 793 #[cfg(feature = "guest_debug")] 794 epoll 795 .add_event(&debug_evt, EpollDispatch::Debug) 796 .map_err(Error::Epoll)?; 797 798 Ok(Vmm { 799 epoll, 800 exit_evt, 801 reset_evt, 802 api_evt, 803 #[cfg(feature = "guest_debug")] 804 debug_evt, 805 #[cfg(feature = "guest_debug")] 806 vm_debug_evt, 807 version: vmm_version, 808 vm: None, 809 vm_config: None, 810 seccomp_action, 811 hypervisor, 812 activate_evt, 813 signals: None, 814 threads: vec![], 815 original_termios_opt: Arc::new(Mutex::new(None)), 816 console_resize_pipe: None, 817 console_info: None, 818 }) 819 } 820 821 fn vm_receive_config<T>( 822 &mut self, 823 req: &Request, 824 socket: &mut T, 825 existing_memory_files: Option<HashMap<u32, File>>, 826 ) -> std::result::Result<Arc<Mutex<MemoryManager>>, MigratableError> 827 where 828 T: Read + Write, 829 { 830 // Read in config data along with memory manager data 831 let mut data: Vec<u8> = Vec::new(); 832 data.resize_with(req.length() as usize, Default::default); 833 socket 834 .read_exact(&mut data) 835 .map_err(MigratableError::MigrateSocket)?; 836 837 let vm_migration_config: VmMigrationConfig = 838 serde_json::from_slice(&data).map_err(|e| { 839 MigratableError::MigrateReceive(anyhow!("Error deserialising config: {}", e)) 840 })?; 841 842 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 843 self.vm_check_cpuid_compatibility( 844 &vm_migration_config.vm_config, 845 &vm_migration_config.common_cpuid, 846 )?; 847 848 let config = vm_migration_config.vm_config.clone(); 849 self.vm_config = Some(vm_migration_config.vm_config); 850 self.console_info = Some(pre_create_console_devices(self).map_err(|e| { 851 MigratableError::MigrateReceive(anyhow!("Error creating console devices: {:?}", e)) 852 })?); 853 854 if self 855 .vm_config 856 .as_ref() 857 .unwrap() 858 .lock() 859 .unwrap() 860 .landlock_enable 861 { 862 apply_landlock(self.vm_config.as_ref().unwrap().clone()).map_err(|e| { 863 MigratableError::MigrateReceive(anyhow!("Error applying landlock: {:?}", e)) 864 })?; 865 } 866 867 let vm = Vm::create_hypervisor_vm( 868 &self.hypervisor, 869 #[cfg(feature = "tdx")] 870 false, 871 #[cfg(feature = "sev_snp")] 872 false, 873 #[cfg(feature = "sev_snp")] 874 config.lock().unwrap().memory.total_size(), 875 ) 876 .map_err(|e| { 877 MigratableError::MigrateReceive(anyhow!( 878 "Error creating hypervisor VM from snapshot: {:?}", 879 e 880 )) 881 })?; 882 883 let phys_bits = 884 vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits); 885 886 let memory_manager = MemoryManager::new( 887 vm, 888 &config.lock().unwrap().memory.clone(), 889 None, 890 phys_bits, 891 #[cfg(feature = "tdx")] 892 false, 893 Some(&vm_migration_config.memory_manager_data), 894 existing_memory_files, 895 #[cfg(target_arch = "x86_64")] 896 None, 897 ) 898 .map_err(|e| { 899 MigratableError::MigrateReceive(anyhow!( 900 "Error creating MemoryManager from snapshot: {:?}", 901 e 902 )) 903 })?; 904 905 Response::ok().write_to(socket)?; 906 907 Ok(memory_manager) 908 } 909 910 fn vm_receive_state<T>( 911 &mut self, 912 req: &Request, 913 socket: &mut T, 914 mm: Arc<Mutex<MemoryManager>>, 915 ) -> std::result::Result<(), MigratableError> 916 where 917 T: Read + Write, 918 { 919 // Read in state data 920 let mut data: Vec<u8> = Vec::new(); 921 data.resize_with(req.length() as usize, Default::default); 922 socket 923 .read_exact(&mut data) 924 .map_err(MigratableError::MigrateSocket)?; 925 let snapshot: Snapshot = serde_json::from_slice(&data).map_err(|e| { 926 MigratableError::MigrateReceive(anyhow!("Error deserialising snapshot: {}", e)) 927 })?; 928 929 let exit_evt = self.exit_evt.try_clone().map_err(|e| { 930 MigratableError::MigrateReceive(anyhow!("Error cloning exit EventFd: {}", e)) 931 })?; 932 let reset_evt = self.reset_evt.try_clone().map_err(|e| { 933 MigratableError::MigrateReceive(anyhow!("Error cloning reset EventFd: {}", e)) 934 })?; 935 #[cfg(feature = "guest_debug")] 936 let debug_evt = self.vm_debug_evt.try_clone().map_err(|e| { 937 MigratableError::MigrateReceive(anyhow!("Error cloning debug EventFd: {}", e)) 938 })?; 939 let activate_evt = self.activate_evt.try_clone().map_err(|e| { 940 MigratableError::MigrateReceive(anyhow!("Error cloning activate EventFd: {}", e)) 941 })?; 942 943 let timestamp = Instant::now(); 944 let hypervisor_vm = mm.lock().unwrap().vm.clone(); 945 let mut vm = Vm::new_from_memory_manager( 946 self.vm_config.clone().unwrap(), 947 mm, 948 hypervisor_vm, 949 exit_evt, 950 reset_evt, 951 #[cfg(feature = "guest_debug")] 952 debug_evt, 953 &self.seccomp_action, 954 self.hypervisor.clone(), 955 activate_evt, 956 timestamp, 957 self.console_info.clone(), 958 self.console_resize_pipe.clone(), 959 Arc::clone(&self.original_termios_opt), 960 Some(snapshot), 961 ) 962 .map_err(|e| { 963 MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {:?}", e)) 964 })?; 965 966 // Create VM 967 vm.restore().map_err(|e| { 968 Response::error().write_to(socket).ok(); 969 MigratableError::MigrateReceive(anyhow!("Failed restoring the Vm: {}", e)) 970 })?; 971 self.vm = Some(vm); 972 973 Response::ok().write_to(socket)?; 974 975 Ok(()) 976 } 977 978 fn vm_receive_memory<T>( 979 &mut self, 980 req: &Request, 981 socket: &mut T, 982 memory_manager: &mut MemoryManager, 983 ) -> std::result::Result<(), MigratableError> 984 where 985 T: Read + ReadVolatile + Write, 986 { 987 // Read table 988 let table = MemoryRangeTable::read_from(socket, req.length())?; 989 990 // And then read the memory itself 991 memory_manager 992 .receive_memory_regions(&table, socket) 993 .inspect_err(|_| { 994 Response::error().write_to(socket).ok(); 995 })?; 996 Response::ok().write_to(socket)?; 997 Ok(()) 998 } 999 1000 fn socket_url_to_path(url: &str) -> result::Result<PathBuf, MigratableError> { 1001 url.strip_prefix("unix:") 1002 .ok_or_else(|| { 1003 MigratableError::MigrateSend(anyhow!("Could not extract path from URL: {}", url)) 1004 }) 1005 .map(|s| s.into()) 1006 } 1007 1008 fn send_migration_socket( 1009 destination_url: &str, 1010 ) -> std::result::Result<SocketStream, MigratableError> { 1011 if let Some(address) = destination_url.strip_prefix("tcp:") { 1012 info!("Connecting to TCP socket at {}", address); 1013 1014 let socket = TcpStream::connect(address).map_err(|e| { 1015 MigratableError::MigrateSend(anyhow!("Error connecting to TCP socket: {}", e)) 1016 })?; 1017 1018 Ok(SocketStream::Tcp(socket)) 1019 } else { 1020 let path = Vmm::socket_url_to_path(destination_url)?; 1021 info!("Connecting to UNIX socket at {:?}", path); 1022 1023 let socket = UnixStream::connect(&path).map_err(|e| { 1024 MigratableError::MigrateSend(anyhow!("Error connecting to UNIX socket: {}", e)) 1025 })?; 1026 1027 Ok(SocketStream::Unix(socket)) 1028 } 1029 } 1030 1031 fn receive_migration_socket( 1032 receiver_url: &str, 1033 ) -> std::result::Result<SocketStream, MigratableError> { 1034 if let Some(address) = receiver_url.strip_prefix("tcp:") { 1035 let listener = TcpListener::bind(address).map_err(|e| { 1036 MigratableError::MigrateReceive(anyhow!("Error binding to TCP socket: {}", e)) 1037 })?; 1038 1039 let (socket, _addr) = listener.accept().map_err(|e| { 1040 MigratableError::MigrateReceive(anyhow!( 1041 "Error accepting connection on TCP socket: {}", 1042 e 1043 )) 1044 })?; 1045 1046 Ok(SocketStream::Tcp(socket)) 1047 } else { 1048 let path = Vmm::socket_url_to_path(receiver_url)?; 1049 let listener = UnixListener::bind(&path).map_err(|e| { 1050 MigratableError::MigrateReceive(anyhow!("Error binding to UNIX socket: {}", e)) 1051 })?; 1052 1053 let (socket, _addr) = listener.accept().map_err(|e| { 1054 MigratableError::MigrateReceive(anyhow!( 1055 "Error accepting connection on UNIX socket: {}", 1056 e 1057 )) 1058 })?; 1059 1060 // Remove the UNIX socket file after accepting the connection 1061 std::fs::remove_file(&path).map_err(|e| { 1062 MigratableError::MigrateReceive(anyhow!("Error removing UNIX socket file: {}", e)) 1063 })?; 1064 1065 Ok(SocketStream::Unix(socket)) 1066 } 1067 } 1068 1069 // Returns true if there were dirty pages to send 1070 fn vm_maybe_send_dirty_pages( 1071 vm: &mut Vm, 1072 socket: &mut SocketStream, 1073 ) -> result::Result<bool, MigratableError> { 1074 // Send (dirty) memory table 1075 let table = vm.dirty_log()?; 1076 1077 // But if there are no regions go straight to pause 1078 if table.regions().is_empty() { 1079 return Ok(false); 1080 } 1081 1082 Request::memory(table.length()).write_to(socket).unwrap(); 1083 table.write_to(socket)?; 1084 // And then the memory itself 1085 vm.send_memory_regions(&table, socket)?; 1086 Response::read_from(socket)?.ok_or_abandon( 1087 socket, 1088 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1089 )?; 1090 1091 Ok(true) 1092 } 1093 1094 fn send_migration( 1095 vm: &mut Vm, 1096 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] hypervisor: Arc< 1097 dyn hypervisor::Hypervisor, 1098 >, 1099 send_data_migration: VmSendMigrationData, 1100 ) -> result::Result<(), MigratableError> { 1101 // Set up the socket connection 1102 let mut socket = Self::send_migration_socket(&send_data_migration.destination_url)?; 1103 1104 // Start the migration 1105 Request::start().write_to(&mut socket)?; 1106 Response::read_from(&mut socket)?.ok_or_abandon( 1107 &mut socket, 1108 MigratableError::MigrateSend(anyhow!("Error starting migration")), 1109 )?; 1110 1111 // Send config 1112 let vm_config = vm.get_config(); 1113 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1114 let common_cpuid = { 1115 #[cfg(feature = "tdx")] 1116 if vm_config.lock().unwrap().is_tdx_enabled() { 1117 return Err(MigratableError::MigrateSend(anyhow!( 1118 "Live Migration is not supported when TDX is enabled" 1119 ))); 1120 }; 1121 1122 let amx = vm_config.lock().unwrap().cpus.features.amx; 1123 let phys_bits = 1124 vm::physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits); 1125 arch::generate_common_cpuid( 1126 &hypervisor, 1127 &arch::CpuidConfig { 1128 sgx_epc_sections: None, 1129 phys_bits, 1130 kvm_hyperv: vm_config.lock().unwrap().cpus.kvm_hyperv, 1131 #[cfg(feature = "tdx")] 1132 tdx: false, 1133 amx, 1134 }, 1135 ) 1136 .map_err(|e| { 1137 MigratableError::MigrateSend(anyhow!("Error generating common cpuid': {:?}", e)) 1138 })? 1139 }; 1140 1141 if send_data_migration.local { 1142 match &mut socket { 1143 SocketStream::Unix(unix_socket) => { 1144 // Proceed with sending memory file descriptors over UNIX socket 1145 vm.send_memory_fds(unix_socket)?; 1146 } 1147 SocketStream::Tcp(_tcp_socket) => { 1148 return Err(MigratableError::MigrateSend(anyhow!( 1149 "--local option is not supported with TCP sockets", 1150 ))); 1151 } 1152 } 1153 } 1154 1155 let vm_migration_config = VmMigrationConfig { 1156 vm_config, 1157 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1158 common_cpuid, 1159 memory_manager_data: vm.memory_manager_data(), 1160 }; 1161 let config_data = serde_json::to_vec(&vm_migration_config).unwrap(); 1162 Request::config(config_data.len() as u64).write_to(&mut socket)?; 1163 socket 1164 .write_all(&config_data) 1165 .map_err(MigratableError::MigrateSocket)?; 1166 Response::read_from(&mut socket)?.ok_or_abandon( 1167 &mut socket, 1168 MigratableError::MigrateSend(anyhow!("Error during config migration")), 1169 )?; 1170 1171 // Let every Migratable object know about the migration being started. 1172 vm.start_migration()?; 1173 1174 if send_data_migration.local { 1175 // Now pause VM 1176 vm.pause()?; 1177 } else { 1178 // Start logging dirty pages 1179 vm.start_dirty_log()?; 1180 1181 // Send memory table 1182 let table = vm.memory_range_table()?; 1183 Request::memory(table.length()) 1184 .write_to(&mut socket) 1185 .unwrap(); 1186 table.write_to(&mut socket)?; 1187 // And then the memory itself 1188 vm.send_memory_regions(&table, &mut socket)?; 1189 Response::read_from(&mut socket)?.ok_or_abandon( 1190 &mut socket, 1191 MigratableError::MigrateSend(anyhow!("Error during dirty memory migration")), 1192 )?; 1193 1194 // Try at most 5 passes of dirty memory sending 1195 const MAX_DIRTY_MIGRATIONS: usize = 5; 1196 for i in 0..MAX_DIRTY_MIGRATIONS { 1197 info!("Dirty memory migration {} of {}", i, MAX_DIRTY_MIGRATIONS); 1198 if !Self::vm_maybe_send_dirty_pages(vm, &mut socket)? { 1199 break; 1200 } 1201 } 1202 1203 // Now pause VM 1204 vm.pause()?; 1205 1206 // Send last batch of dirty pages 1207 Self::vm_maybe_send_dirty_pages(vm, &mut socket)?; 1208 1209 // Stop logging dirty pages 1210 vm.stop_dirty_log()?; 1211 } 1212 // Capture snapshot and send it 1213 let vm_snapshot = vm.snapshot()?; 1214 let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap(); 1215 Request::state(snapshot_data.len() as u64).write_to(&mut socket)?; 1216 socket 1217 .write_all(&snapshot_data) 1218 .map_err(MigratableError::MigrateSocket)?; 1219 Response::read_from(&mut socket)?.ok_or_abandon( 1220 &mut socket, 1221 MigratableError::MigrateSend(anyhow!("Error during state migration")), 1222 )?; 1223 // Complete the migration 1224 Request::complete().write_to(&mut socket)?; 1225 Response::read_from(&mut socket)?.ok_or_abandon( 1226 &mut socket, 1227 MigratableError::MigrateSend(anyhow!("Error completing migration")), 1228 )?; 1229 1230 info!("Migration complete"); 1231 1232 // Let every Migratable object know about the migration being complete 1233 vm.complete_migration() 1234 } 1235 1236 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1237 fn vm_check_cpuid_compatibility( 1238 &self, 1239 src_vm_config: &Arc<Mutex<VmConfig>>, 1240 src_vm_cpuid: &[hypervisor::arch::x86::CpuIdEntry], 1241 ) -> result::Result<(), MigratableError> { 1242 #[cfg(feature = "tdx")] 1243 if src_vm_config.lock().unwrap().is_tdx_enabled() { 1244 return Err(MigratableError::MigrateReceive(anyhow!( 1245 "Live Migration is not supported when TDX is enabled" 1246 ))); 1247 }; 1248 1249 // We check the `CPUID` compatibility of between the source vm and destination, which is 1250 // mostly about feature compatibility and "topology/sgx" leaves are not relevant. 1251 let dest_cpuid = &{ 1252 let vm_config = &src_vm_config.lock().unwrap(); 1253 1254 let phys_bits = vm::physical_bits(&self.hypervisor, vm_config.cpus.max_phys_bits); 1255 arch::generate_common_cpuid( 1256 &self.hypervisor.clone(), 1257 &arch::CpuidConfig { 1258 sgx_epc_sections: None, 1259 phys_bits, 1260 kvm_hyperv: vm_config.cpus.kvm_hyperv, 1261 #[cfg(feature = "tdx")] 1262 tdx: false, 1263 amx: vm_config.cpus.features.amx, 1264 }, 1265 ) 1266 .map_err(|e| { 1267 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e)) 1268 })? 1269 }; 1270 arch::CpuidFeatureEntry::check_cpuid_compatibility(src_vm_cpuid, dest_cpuid).map_err(|e| { 1271 MigratableError::MigrateReceive(anyhow!( 1272 "Error checking cpu feature compatibility': {:?}", 1273 e 1274 )) 1275 }) 1276 } 1277 1278 fn control_loop( 1279 &mut self, 1280 api_receiver: Rc<Receiver<ApiRequest>>, 1281 #[cfg(feature = "guest_debug")] gdb_receiver: Rc<Receiver<gdb::GdbRequest>>, 1282 ) -> Result<()> { 1283 const EPOLL_EVENTS_LEN: usize = 100; 1284 1285 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; 1286 let epoll_fd = self.epoll.as_raw_fd(); 1287 1288 'outer: loop { 1289 let num_events = match epoll::wait(epoll_fd, -1, &mut events[..]) { 1290 Ok(res) => res, 1291 Err(e) => { 1292 if e.kind() == io::ErrorKind::Interrupted { 1293 // It's well defined from the epoll_wait() syscall 1294 // documentation that the epoll loop can be interrupted 1295 // before any of the requested events occurred or the 1296 // timeout expired. In both those cases, epoll_wait() 1297 // returns an error of type EINTR, but this should not 1298 // be considered as a regular error. Instead it is more 1299 // appropriate to retry, by calling into epoll_wait(). 1300 continue; 1301 } 1302 return Err(Error::Epoll(e)); 1303 } 1304 }; 1305 1306 for event in events.iter().take(num_events) { 1307 let dispatch_event: EpollDispatch = event.data.into(); 1308 match dispatch_event { 1309 EpollDispatch::Unknown => { 1310 let event = event.data; 1311 warn!("Unknown VMM loop event: {}", event); 1312 } 1313 EpollDispatch::Exit => { 1314 info!("VM exit event"); 1315 // Consume the event. 1316 self.exit_evt.read().map_err(Error::EventFdRead)?; 1317 self.vmm_shutdown().map_err(Error::VmmShutdown)?; 1318 1319 break 'outer; 1320 } 1321 EpollDispatch::Reset => { 1322 info!("VM reset event"); 1323 // Consume the event. 1324 self.reset_evt.read().map_err(Error::EventFdRead)?; 1325 self.vm_reboot().map_err(Error::VmReboot)?; 1326 } 1327 EpollDispatch::ActivateVirtioDevices => { 1328 if let Some(ref vm) = self.vm { 1329 let count = self.activate_evt.read().map_err(Error::EventFdRead)?; 1330 info!( 1331 "Trying to activate pending virtio devices: count = {}", 1332 count 1333 ); 1334 vm.activate_virtio_devices() 1335 .map_err(Error::ActivateVirtioDevices)?; 1336 } 1337 } 1338 EpollDispatch::Api => { 1339 // Consume the events. 1340 for _ in 0..self.api_evt.read().map_err(Error::EventFdRead)? { 1341 // Read from the API receiver channel 1342 let api_request = api_receiver.recv().map_err(Error::ApiRequestRecv)?; 1343 1344 if api_request(self)? { 1345 break 'outer; 1346 } 1347 } 1348 } 1349 #[cfg(feature = "guest_debug")] 1350 EpollDispatch::Debug => { 1351 // Consume the events. 1352 for _ in 0..self.debug_evt.read().map_err(Error::EventFdRead)? { 1353 // Read from the API receiver channel 1354 let gdb_request = gdb_receiver.recv().map_err(Error::GdbRequestRecv)?; 1355 1356 let response = if let Some(ref mut vm) = self.vm { 1357 vm.debug_request(&gdb_request.payload, gdb_request.cpu_id) 1358 } else { 1359 Err(VmError::VmNotRunning) 1360 } 1361 .map_err(gdb::Error::Vm); 1362 1363 gdb_request 1364 .sender 1365 .send(response) 1366 .map_err(Error::GdbResponseSend)?; 1367 } 1368 } 1369 #[cfg(not(feature = "guest_debug"))] 1370 EpollDispatch::Debug => {} 1371 } 1372 } 1373 } 1374 1375 // Trigger the termination of the signal_handler thread 1376 if let Some(signals) = self.signals.take() { 1377 signals.close(); 1378 } 1379 1380 // Wait for all the threads to finish 1381 for thread in self.threads.drain(..) { 1382 thread.join().map_err(Error::ThreadCleanup)? 1383 } 1384 1385 Ok(()) 1386 } 1387 } 1388 1389 fn apply_landlock(vm_config: Arc<Mutex<VmConfig>>) -> result::Result<(), LandlockError> { 1390 vm_config.lock().unwrap().apply_landlock()?; 1391 Ok(()) 1392 } 1393 1394 impl RequestHandler for Vmm { 1395 fn vm_create(&mut self, config: Box<VmConfig>) -> result::Result<(), VmError> { 1396 // We only store the passed VM config. 1397 // The VM will be created when being asked to boot it. 1398 if self.vm_config.is_none() { 1399 self.vm_config = Some(Arc::new(Mutex::new(*config))); 1400 self.console_info = 1401 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1402 1403 if self 1404 .vm_config 1405 .as_ref() 1406 .unwrap() 1407 .lock() 1408 .unwrap() 1409 .landlock_enable 1410 { 1411 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1412 .map_err(VmError::ApplyLandlock)?; 1413 } 1414 Ok(()) 1415 } else { 1416 Err(VmError::VmAlreadyCreated) 1417 } 1418 } 1419 1420 fn vm_boot(&mut self) -> result::Result<(), VmError> { 1421 tracer::start(); 1422 info!("Booting VM"); 1423 event!("vm", "booting"); 1424 let r = { 1425 trace_scoped!("vm_boot"); 1426 // If we don't have a config, we cannot boot a VM. 1427 if self.vm_config.is_none() { 1428 return Err(VmError::VmMissingConfig); 1429 }; 1430 1431 // console_info is set to None in vm_shutdown. re-populate here if empty 1432 if self.console_info.is_none() { 1433 self.console_info = 1434 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1435 } 1436 1437 // Create a new VM if we don't have one yet. 1438 if self.vm.is_none() { 1439 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1440 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1441 #[cfg(feature = "guest_debug")] 1442 let vm_debug_evt = self 1443 .vm_debug_evt 1444 .try_clone() 1445 .map_err(VmError::EventFdClone)?; 1446 let activate_evt = self 1447 .activate_evt 1448 .try_clone() 1449 .map_err(VmError::EventFdClone)?; 1450 1451 if let Some(ref vm_config) = self.vm_config { 1452 let vm = Vm::new( 1453 Arc::clone(vm_config), 1454 exit_evt, 1455 reset_evt, 1456 #[cfg(feature = "guest_debug")] 1457 vm_debug_evt, 1458 &self.seccomp_action, 1459 self.hypervisor.clone(), 1460 activate_evt, 1461 self.console_info.clone(), 1462 self.console_resize_pipe.clone(), 1463 Arc::clone(&self.original_termios_opt), 1464 None, 1465 None, 1466 None, 1467 )?; 1468 1469 self.vm = Some(vm); 1470 } 1471 } 1472 1473 // Now we can boot the VM. 1474 if let Some(ref mut vm) = self.vm { 1475 vm.boot() 1476 } else { 1477 Err(VmError::VmNotCreated) 1478 } 1479 }; 1480 tracer::end(); 1481 if r.is_ok() { 1482 event!("vm", "booted"); 1483 } 1484 r 1485 } 1486 1487 fn vm_pause(&mut self) -> result::Result<(), VmError> { 1488 if let Some(ref mut vm) = self.vm { 1489 vm.pause().map_err(VmError::Pause) 1490 } else { 1491 Err(VmError::VmNotRunning) 1492 } 1493 } 1494 1495 fn vm_resume(&mut self) -> result::Result<(), VmError> { 1496 if let Some(ref mut vm) = self.vm { 1497 vm.resume().map_err(VmError::Resume) 1498 } else { 1499 Err(VmError::VmNotRunning) 1500 } 1501 } 1502 1503 fn vm_snapshot(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1504 if let Some(ref mut vm) = self.vm { 1505 // Drain console_info so that FDs are not reused 1506 let _ = self.console_info.take(); 1507 vm.snapshot() 1508 .map_err(VmError::Snapshot) 1509 .and_then(|snapshot| { 1510 vm.send(&snapshot, destination_url) 1511 .map_err(VmError::SnapshotSend) 1512 }) 1513 } else { 1514 Err(VmError::VmNotRunning) 1515 } 1516 } 1517 1518 fn vm_restore(&mut self, restore_cfg: RestoreConfig) -> result::Result<(), VmError> { 1519 if self.vm.is_some() || self.vm_config.is_some() { 1520 return Err(VmError::VmAlreadyCreated); 1521 } 1522 1523 let source_url = restore_cfg.source_url.as_path().to_str(); 1524 if source_url.is_none() { 1525 return Err(VmError::InvalidRestoreSourceUrl); 1526 } 1527 // Safe to unwrap as we checked it was Some(&str). 1528 let source_url = source_url.unwrap(); 1529 1530 let vm_config = Arc::new(Mutex::new( 1531 recv_vm_config(source_url).map_err(VmError::Restore)?, 1532 )); 1533 restore_cfg 1534 .validate(&vm_config.lock().unwrap().clone()) 1535 .map_err(VmError::ConfigValidation)?; 1536 1537 // Update VM's net configurations with new fds received for restore operation 1538 if let (Some(restored_nets), Some(vm_net_configs)) = 1539 (restore_cfg.net_fds, &mut vm_config.lock().unwrap().net) 1540 { 1541 for net in restored_nets.iter() { 1542 for net_config in vm_net_configs.iter_mut() { 1543 // update only if the net dev is backed by FDs 1544 if net_config.id == Some(net.id.clone()) && net_config.fds.is_some() { 1545 net_config.fds.clone_from(&net.fds); 1546 } 1547 } 1548 } 1549 } 1550 1551 let snapshot = recv_vm_state(source_url).map_err(VmError::Restore)?; 1552 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1553 let vm_snapshot = get_vm_snapshot(&snapshot).map_err(VmError::Restore)?; 1554 1555 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1556 self.vm_check_cpuid_compatibility(&vm_config, &vm_snapshot.common_cpuid) 1557 .map_err(VmError::Restore)?; 1558 1559 self.vm_config = Some(Arc::clone(&vm_config)); 1560 1561 // console_info is set to None in vm_snapshot. re-populate here if empty 1562 if self.console_info.is_none() { 1563 self.console_info = 1564 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1565 } 1566 1567 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1568 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1569 #[cfg(feature = "guest_debug")] 1570 let debug_evt = self 1571 .vm_debug_evt 1572 .try_clone() 1573 .map_err(VmError::EventFdClone)?; 1574 let activate_evt = self 1575 .activate_evt 1576 .try_clone() 1577 .map_err(VmError::EventFdClone)?; 1578 1579 let vm = Vm::new( 1580 vm_config, 1581 exit_evt, 1582 reset_evt, 1583 #[cfg(feature = "guest_debug")] 1584 debug_evt, 1585 &self.seccomp_action, 1586 self.hypervisor.clone(), 1587 activate_evt, 1588 self.console_info.clone(), 1589 self.console_resize_pipe.clone(), 1590 Arc::clone(&self.original_termios_opt), 1591 Some(snapshot), 1592 Some(source_url), 1593 Some(restore_cfg.prefault), 1594 )?; 1595 self.vm = Some(vm); 1596 1597 if self 1598 .vm_config 1599 .as_ref() 1600 .unwrap() 1601 .lock() 1602 .unwrap() 1603 .landlock_enable 1604 { 1605 apply_landlock(self.vm_config.as_ref().unwrap().clone()) 1606 .map_err(VmError::ApplyLandlock)?; 1607 } 1608 1609 // Now we can restore the rest of the VM. 1610 if let Some(ref mut vm) = self.vm { 1611 vm.restore() 1612 } else { 1613 Err(VmError::VmNotCreated) 1614 } 1615 } 1616 1617 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1618 fn vm_coredump(&mut self, destination_url: &str) -> result::Result<(), VmError> { 1619 if let Some(ref mut vm) = self.vm { 1620 vm.coredump(destination_url).map_err(VmError::Coredump) 1621 } else { 1622 Err(VmError::VmNotRunning) 1623 } 1624 } 1625 1626 fn vm_shutdown(&mut self) -> result::Result<(), VmError> { 1627 let r = if let Some(ref mut vm) = self.vm.take() { 1628 // Drain console_info so that the FDs are not reused 1629 let _ = self.console_info.take(); 1630 vm.shutdown() 1631 } else { 1632 Err(VmError::VmNotRunning) 1633 }; 1634 1635 if r.is_ok() { 1636 event!("vm", "shutdown"); 1637 } 1638 1639 r 1640 } 1641 1642 fn vm_reboot(&mut self) -> result::Result<(), VmError> { 1643 event!("vm", "rebooting"); 1644 1645 // First we stop the current VM 1646 let config = if let Some(mut vm) = self.vm.take() { 1647 let config = vm.get_config(); 1648 vm.shutdown()?; 1649 config 1650 } else { 1651 return Err(VmError::VmNotCreated); 1652 }; 1653 1654 // vm.shutdown() closes all the console devices, so set console_info to None 1655 // so that the closed FD #s are not reused. 1656 let _ = self.console_info.take(); 1657 1658 let exit_evt = self.exit_evt.try_clone().map_err(VmError::EventFdClone)?; 1659 let reset_evt = self.reset_evt.try_clone().map_err(VmError::EventFdClone)?; 1660 #[cfg(feature = "guest_debug")] 1661 let debug_evt = self 1662 .vm_debug_evt 1663 .try_clone() 1664 .map_err(VmError::EventFdClone)?; 1665 let activate_evt = self 1666 .activate_evt 1667 .try_clone() 1668 .map_err(VmError::EventFdClone)?; 1669 1670 // The Linux kernel fires off an i8042 reset after doing the ACPI reset so there may be 1671 // an event sitting in the shared reset_evt. Without doing this we get very early reboots 1672 // during the boot process. 1673 if self.reset_evt.read().is_ok() { 1674 warn!("Spurious second reset event received. Ignoring."); 1675 } 1676 1677 self.console_info = 1678 Some(pre_create_console_devices(self).map_err(VmError::CreateConsoleDevices)?); 1679 1680 // Then we create the new VM 1681 let mut vm = Vm::new( 1682 config, 1683 exit_evt, 1684 reset_evt, 1685 #[cfg(feature = "guest_debug")] 1686 debug_evt, 1687 &self.seccomp_action, 1688 self.hypervisor.clone(), 1689 activate_evt, 1690 self.console_info.clone(), 1691 self.console_resize_pipe.clone(), 1692 Arc::clone(&self.original_termios_opt), 1693 None, 1694 None, 1695 None, 1696 )?; 1697 1698 // And we boot it 1699 vm.boot()?; 1700 1701 self.vm = Some(vm); 1702 1703 event!("vm", "rebooted"); 1704 1705 Ok(()) 1706 } 1707 1708 fn vm_info(&self) -> result::Result<VmInfoResponse, VmError> { 1709 match &self.vm_config { 1710 Some(vm_config) => { 1711 let state = match &self.vm { 1712 Some(vm) => vm.get_state()?, 1713 None => VmState::Created, 1714 }; 1715 let config = vm_config.lock().unwrap().clone(); 1716 1717 let mut memory_actual_size = config.memory.total_size(); 1718 if let Some(vm) = &self.vm { 1719 memory_actual_size -= vm.balloon_size(); 1720 } 1721 1722 let device_tree = self 1723 .vm 1724 .as_ref() 1725 .map(|vm| vm.device_tree().lock().unwrap().clone()); 1726 1727 Ok(VmInfoResponse { 1728 config: Box::new(config), 1729 state, 1730 memory_actual_size, 1731 device_tree, 1732 }) 1733 } 1734 None => Err(VmError::VmNotCreated), 1735 } 1736 } 1737 1738 fn vmm_ping(&self) -> VmmPingResponse { 1739 let VmmVersionInfo { 1740 build_version, 1741 version, 1742 } = self.version.clone(); 1743 1744 VmmPingResponse { 1745 build_version, 1746 version, 1747 pid: std::process::id() as i64, 1748 features: feature_list(), 1749 } 1750 } 1751 1752 fn vm_delete(&mut self) -> result::Result<(), VmError> { 1753 if self.vm_config.is_none() { 1754 return Ok(()); 1755 } 1756 1757 // If a VM is booted, we first try to shut it down. 1758 if self.vm.is_some() { 1759 self.vm_shutdown()?; 1760 } 1761 1762 self.vm_config = None; 1763 1764 event!("vm", "deleted"); 1765 1766 Ok(()) 1767 } 1768 1769 fn vmm_shutdown(&mut self) -> result::Result<(), VmError> { 1770 self.vm_delete()?; 1771 event!("vmm", "shutdown"); 1772 Ok(()) 1773 } 1774 1775 fn vm_resize( 1776 &mut self, 1777 desired_vcpus: Option<u8>, 1778 desired_ram: Option<u64>, 1779 desired_balloon: Option<u64>, 1780 ) -> result::Result<(), VmError> { 1781 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1782 1783 if let Some(ref mut vm) = self.vm { 1784 if let Err(e) = vm.resize(desired_vcpus, desired_ram, desired_balloon) { 1785 error!("Error when resizing VM: {:?}", e); 1786 Err(e) 1787 } else { 1788 Ok(()) 1789 } 1790 } else { 1791 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1792 if let Some(desired_vcpus) = desired_vcpus { 1793 config.cpus.boot_vcpus = desired_vcpus; 1794 } 1795 if let Some(desired_ram) = desired_ram { 1796 config.memory.size = desired_ram; 1797 } 1798 if let Some(desired_balloon) = desired_balloon { 1799 if let Some(balloon_config) = &mut config.balloon { 1800 balloon_config.size = desired_balloon; 1801 } 1802 } 1803 Ok(()) 1804 } 1805 } 1806 1807 fn vm_resize_zone(&mut self, id: String, desired_ram: u64) -> result::Result<(), VmError> { 1808 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1809 1810 if let Some(ref mut vm) = self.vm { 1811 if let Err(e) = vm.resize_zone(id, desired_ram) { 1812 error!("Error when resizing VM: {:?}", e); 1813 Err(e) 1814 } else { 1815 Ok(()) 1816 } 1817 } else { 1818 // Update VmConfig by setting the new desired ram. 1819 let memory_config = &mut self.vm_config.as_ref().unwrap().lock().unwrap().memory; 1820 1821 if let Some(zones) = &mut memory_config.zones { 1822 for zone in zones.iter_mut() { 1823 if zone.id == id { 1824 zone.size = desired_ram; 1825 return Ok(()); 1826 } 1827 } 1828 } 1829 1830 error!("Could not find the memory zone {} for the resize", id); 1831 Err(VmError::ResizeZone) 1832 } 1833 } 1834 1835 fn vm_add_device( 1836 &mut self, 1837 device_cfg: DeviceConfig, 1838 ) -> result::Result<Option<Vec<u8>>, VmError> { 1839 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1840 1841 { 1842 // Validate the configuration change in a cloned configuration 1843 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1844 add_to_config(&mut config.devices, device_cfg.clone()); 1845 config.validate().map_err(VmError::ConfigValidation)?; 1846 } 1847 1848 if let Some(ref mut vm) = self.vm { 1849 let info = vm.add_device(device_cfg).map_err(|e| { 1850 error!("Error when adding new device to the VM: {:?}", e); 1851 e 1852 })?; 1853 serde_json::to_vec(&info) 1854 .map(Some) 1855 .map_err(VmError::SerializeJson) 1856 } else { 1857 // Update VmConfig by adding the new device. 1858 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1859 add_to_config(&mut config.devices, device_cfg); 1860 Ok(None) 1861 } 1862 } 1863 1864 fn vm_add_user_device( 1865 &mut self, 1866 device_cfg: UserDeviceConfig, 1867 ) -> result::Result<Option<Vec<u8>>, VmError> { 1868 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1869 1870 { 1871 // Validate the configuration change in a cloned configuration 1872 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1873 add_to_config(&mut config.user_devices, device_cfg.clone()); 1874 config.validate().map_err(VmError::ConfigValidation)?; 1875 } 1876 1877 if let Some(ref mut vm) = self.vm { 1878 let info = vm.add_user_device(device_cfg).map_err(|e| { 1879 error!("Error when adding new user device to the VM: {:?}", e); 1880 e 1881 })?; 1882 serde_json::to_vec(&info) 1883 .map(Some) 1884 .map_err(VmError::SerializeJson) 1885 } else { 1886 // Update VmConfig by adding the new device. 1887 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1888 add_to_config(&mut config.user_devices, device_cfg); 1889 Ok(None) 1890 } 1891 } 1892 1893 fn vm_remove_device(&mut self, id: String) -> result::Result<(), VmError> { 1894 if let Some(ref mut vm) = self.vm { 1895 if let Err(e) = vm.remove_device(id) { 1896 error!("Error when removing device from the VM: {:?}", e); 1897 Err(e) 1898 } else { 1899 Ok(()) 1900 } 1901 } else if let Some(ref config) = self.vm_config { 1902 let mut config = config.lock().unwrap(); 1903 if config.remove_device(&id) { 1904 Ok(()) 1905 } else { 1906 Err(VmError::NoDeviceToRemove(id)) 1907 } 1908 } else { 1909 Err(VmError::VmNotCreated) 1910 } 1911 } 1912 1913 fn vm_add_disk(&mut self, disk_cfg: DiskConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1914 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1915 1916 { 1917 // Validate the configuration change in a cloned configuration 1918 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1919 add_to_config(&mut config.disks, disk_cfg.clone()); 1920 config.validate().map_err(VmError::ConfigValidation)?; 1921 } 1922 1923 if let Some(ref mut vm) = self.vm { 1924 let info = vm.add_disk(disk_cfg).map_err(|e| { 1925 error!("Error when adding new disk to the VM: {:?}", e); 1926 e 1927 })?; 1928 serde_json::to_vec(&info) 1929 .map(Some) 1930 .map_err(VmError::SerializeJson) 1931 } else { 1932 // Update VmConfig by adding the new device. 1933 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1934 add_to_config(&mut config.disks, disk_cfg); 1935 Ok(None) 1936 } 1937 } 1938 1939 fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1940 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1941 1942 { 1943 // Validate the configuration change in a cloned configuration 1944 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1945 add_to_config(&mut config.fs, fs_cfg.clone()); 1946 config.validate().map_err(VmError::ConfigValidation)?; 1947 } 1948 1949 if let Some(ref mut vm) = self.vm { 1950 let info = vm.add_fs(fs_cfg).map_err(|e| { 1951 error!("Error when adding new fs to the VM: {:?}", e); 1952 e 1953 })?; 1954 serde_json::to_vec(&info) 1955 .map(Some) 1956 .map_err(VmError::SerializeJson) 1957 } else { 1958 // Update VmConfig by adding the new device. 1959 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1960 add_to_config(&mut config.fs, fs_cfg); 1961 Ok(None) 1962 } 1963 } 1964 1965 fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1966 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1967 1968 { 1969 // Validate the configuration change in a cloned configuration 1970 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1971 add_to_config(&mut config.pmem, pmem_cfg.clone()); 1972 config.validate().map_err(VmError::ConfigValidation)?; 1973 } 1974 1975 if let Some(ref mut vm) = self.vm { 1976 let info = vm.add_pmem(pmem_cfg).map_err(|e| { 1977 error!("Error when adding new pmem device to the VM: {:?}", e); 1978 e 1979 })?; 1980 serde_json::to_vec(&info) 1981 .map(Some) 1982 .map_err(VmError::SerializeJson) 1983 } else { 1984 // Update VmConfig by adding the new device. 1985 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 1986 add_to_config(&mut config.pmem, pmem_cfg); 1987 Ok(None) 1988 } 1989 } 1990 1991 fn vm_add_net(&mut self, net_cfg: NetConfig) -> result::Result<Option<Vec<u8>>, VmError> { 1992 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 1993 1994 { 1995 // Validate the configuration change in a cloned configuration 1996 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 1997 add_to_config(&mut config.net, net_cfg.clone()); 1998 config.validate().map_err(VmError::ConfigValidation)?; 1999 } 2000 2001 if let Some(ref mut vm) = self.vm { 2002 let info = vm.add_net(net_cfg).map_err(|e| { 2003 error!("Error when adding new network device to the VM: {:?}", e); 2004 e 2005 })?; 2006 serde_json::to_vec(&info) 2007 .map(Some) 2008 .map_err(VmError::SerializeJson) 2009 } else { 2010 // Update VmConfig by adding the new device. 2011 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2012 add_to_config(&mut config.net, net_cfg); 2013 Ok(None) 2014 } 2015 } 2016 2017 fn vm_add_vdpa(&mut self, vdpa_cfg: VdpaConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2018 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2019 2020 { 2021 // Validate the configuration change in a cloned configuration 2022 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2023 add_to_config(&mut config.vdpa, vdpa_cfg.clone()); 2024 config.validate().map_err(VmError::ConfigValidation)?; 2025 } 2026 2027 if let Some(ref mut vm) = self.vm { 2028 let info = vm.add_vdpa(vdpa_cfg).map_err(|e| { 2029 error!("Error when adding new vDPA device to the VM: {:?}", e); 2030 e 2031 })?; 2032 serde_json::to_vec(&info) 2033 .map(Some) 2034 .map_err(VmError::SerializeJson) 2035 } else { 2036 // Update VmConfig by adding the new device. 2037 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2038 add_to_config(&mut config.vdpa, vdpa_cfg); 2039 Ok(None) 2040 } 2041 } 2042 2043 fn vm_add_vsock(&mut self, vsock_cfg: VsockConfig) -> result::Result<Option<Vec<u8>>, VmError> { 2044 self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; 2045 2046 { 2047 // Validate the configuration change in a cloned configuration 2048 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); 2049 2050 if config.vsock.is_some() { 2051 return Err(VmError::TooManyVsockDevices); 2052 } 2053 2054 config.vsock = Some(vsock_cfg.clone()); 2055 config.validate().map_err(VmError::ConfigValidation)?; 2056 } 2057 2058 if let Some(ref mut vm) = self.vm { 2059 let info = vm.add_vsock(vsock_cfg).map_err(|e| { 2060 error!("Error when adding new vsock device to the VM: {:?}", e); 2061 e 2062 })?; 2063 serde_json::to_vec(&info) 2064 .map(Some) 2065 .map_err(VmError::SerializeJson) 2066 } else { 2067 // Update VmConfig by adding the new device. 2068 let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); 2069 config.vsock = Some(vsock_cfg); 2070 Ok(None) 2071 } 2072 } 2073 2074 fn vm_counters(&mut self) -> result::Result<Option<Vec<u8>>, VmError> { 2075 if let Some(ref mut vm) = self.vm { 2076 let info = vm.counters().map_err(|e| { 2077 error!("Error when getting counters from the VM: {:?}", e); 2078 e 2079 })?; 2080 serde_json::to_vec(&info) 2081 .map(Some) 2082 .map_err(VmError::SerializeJson) 2083 } else { 2084 Err(VmError::VmNotRunning) 2085 } 2086 } 2087 2088 fn vm_power_button(&mut self) -> result::Result<(), VmError> { 2089 if let Some(ref mut vm) = self.vm { 2090 vm.power_button() 2091 } else { 2092 Err(VmError::VmNotRunning) 2093 } 2094 } 2095 2096 fn vm_nmi(&mut self) -> result::Result<(), VmError> { 2097 if let Some(ref mut vm) = self.vm { 2098 vm.nmi() 2099 } else { 2100 Err(VmError::VmNotRunning) 2101 } 2102 } 2103 2104 fn vm_receive_migration( 2105 &mut self, 2106 receive_data_migration: VmReceiveMigrationData, 2107 ) -> result::Result<(), MigratableError> { 2108 info!( 2109 "Receiving migration: receiver_url = {}", 2110 receive_data_migration.receiver_url 2111 ); 2112 2113 // Accept the connection and get the socket 2114 let mut socket = Vmm::receive_migration_socket(&receive_data_migration.receiver_url)?; 2115 2116 let mut started = false; 2117 let mut memory_manager: Option<Arc<Mutex<MemoryManager>>> = None; 2118 let mut existing_memory_files = None; 2119 loop { 2120 let req = Request::read_from(&mut socket)?; 2121 match req.command() { 2122 Command::Invalid => info!("Invalid Command Received"), 2123 Command::Start => { 2124 info!("Start Command Received"); 2125 started = true; 2126 2127 Response::ok().write_to(&mut socket)?; 2128 } 2129 Command::Config => { 2130 info!("Config Command Received"); 2131 2132 if !started { 2133 warn!("Migration not started yet"); 2134 Response::error().write_to(&mut socket)?; 2135 continue; 2136 } 2137 memory_manager = Some(self.vm_receive_config( 2138 &req, 2139 &mut socket, 2140 existing_memory_files.take(), 2141 )?); 2142 } 2143 Command::State => { 2144 info!("State Command Received"); 2145 2146 if !started { 2147 warn!("Migration not started yet"); 2148 Response::error().write_to(&mut socket)?; 2149 continue; 2150 } 2151 if let Some(mm) = memory_manager.take() { 2152 self.vm_receive_state(&req, &mut socket, mm)?; 2153 } else { 2154 warn!("Configuration not sent yet"); 2155 Response::error().write_to(&mut socket)?; 2156 } 2157 } 2158 Command::Memory => { 2159 info!("Memory Command Received"); 2160 2161 if !started { 2162 warn!("Migration not started yet"); 2163 Response::error().write_to(&mut socket)?; 2164 continue; 2165 } 2166 if let Some(mm) = memory_manager.as_ref() { 2167 self.vm_receive_memory(&req, &mut socket, &mut mm.lock().unwrap())?; 2168 } else { 2169 warn!("Configuration not sent yet"); 2170 Response::error().write_to(&mut socket)?; 2171 } 2172 } 2173 Command::MemoryFd => { 2174 info!("MemoryFd Command Received"); 2175 2176 if !started { 2177 warn!("Migration not started yet"); 2178 Response::error().write_to(&mut socket)?; 2179 continue; 2180 } 2181 2182 match &mut socket { 2183 SocketStream::Unix(unix_socket) => { 2184 let mut buf = [0u8; 4]; 2185 let (_, file) = unix_socket.recv_with_fd(&mut buf).map_err(|e| { 2186 MigratableError::MigrateReceive(anyhow!( 2187 "Error receiving slot from socket: {}", 2188 e 2189 )) 2190 })?; 2191 2192 if existing_memory_files.is_none() { 2193 existing_memory_files = Some(HashMap::default()) 2194 } 2195 2196 if let Some(ref mut existing_memory_files) = existing_memory_files { 2197 let slot = u32::from_le_bytes(buf); 2198 existing_memory_files.insert(slot, file.unwrap()); 2199 } 2200 2201 Response::ok().write_to(&mut socket)?; 2202 } 2203 SocketStream::Tcp(_tcp_socket) => { 2204 // For TCP sockets, we cannot transfer file descriptors 2205 warn!( 2206 "MemoryFd command received over TCP socket, which is not supported" 2207 ); 2208 Response::error().write_to(&mut socket)?; 2209 } 2210 } 2211 } 2212 Command::Complete => { 2213 info!("Complete Command Received"); 2214 if let Some(ref mut vm) = self.vm.as_mut() { 2215 vm.resume()?; 2216 Response::ok().write_to(&mut socket)?; 2217 } else { 2218 warn!("VM not created yet"); 2219 Response::error().write_to(&mut socket)?; 2220 } 2221 break; 2222 } 2223 Command::Abandon => { 2224 info!("Abandon Command Received"); 2225 self.vm = None; 2226 self.vm_config = None; 2227 Response::ok().write_to(&mut socket).ok(); 2228 break; 2229 } 2230 } 2231 } 2232 2233 Ok(()) 2234 } 2235 2236 fn vm_send_migration( 2237 &mut self, 2238 send_data_migration: VmSendMigrationData, 2239 ) -> result::Result<(), MigratableError> { 2240 info!( 2241 "Sending migration: destination_url = {}, local = {}", 2242 send_data_migration.destination_url, send_data_migration.local 2243 ); 2244 2245 if !self 2246 .vm_config 2247 .as_ref() 2248 .unwrap() 2249 .lock() 2250 .unwrap() 2251 .backed_by_shared_memory() 2252 && send_data_migration.local 2253 { 2254 return Err(MigratableError::MigrateSend(anyhow!( 2255 "Local migration requires shared memory or hugepages enabled" 2256 ))); 2257 } 2258 2259 if let Some(vm) = self.vm.as_mut() { 2260 Self::send_migration( 2261 vm, 2262 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2263 self.hypervisor.clone(), 2264 send_data_migration, 2265 ) 2266 .map_err(|migration_err| { 2267 error!("Migration failed: {:?}", migration_err); 2268 2269 // Stop logging dirty pages 2270 if let Err(e) = vm.stop_dirty_log() { 2271 return e; 2272 } 2273 2274 if vm.get_state().unwrap() == VmState::Paused { 2275 if let Err(e) = vm.resume() { 2276 return e; 2277 } 2278 } 2279 2280 migration_err 2281 })?; 2282 2283 // Shutdown the VM after the migration succeeded 2284 self.exit_evt.write(1).map_err(|e| { 2285 MigratableError::MigrateSend(anyhow!( 2286 "Failed shutting down the VM after migration: {:?}", 2287 e 2288 )) 2289 }) 2290 } else { 2291 Err(MigratableError::MigrateSend(anyhow!("VM is not running"))) 2292 } 2293 } 2294 } 2295 2296 const CPU_MANAGER_SNAPSHOT_ID: &str = "cpu-manager"; 2297 const MEMORY_MANAGER_SNAPSHOT_ID: &str = "memory-manager"; 2298 const DEVICE_MANAGER_SNAPSHOT_ID: &str = "device-manager"; 2299 2300 #[cfg(test)] 2301 mod unit_tests { 2302 use super::*; 2303 #[cfg(target_arch = "x86_64")] 2304 use crate::vm_config::DebugConsoleConfig; 2305 use crate::vm_config::{ 2306 ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpusConfig, HotplugMethod, MemoryConfig, 2307 PayloadConfig, RngConfig, 2308 }; 2309 2310 fn create_dummy_vmm() -> Vmm { 2311 Vmm::new( 2312 VmmVersionInfo::new("dummy", "dummy"), 2313 EventFd::new(EFD_NONBLOCK).unwrap(), 2314 #[cfg(feature = "guest_debug")] 2315 EventFd::new(EFD_NONBLOCK).unwrap(), 2316 #[cfg(feature = "guest_debug")] 2317 EventFd::new(EFD_NONBLOCK).unwrap(), 2318 SeccompAction::Allow, 2319 hypervisor::new().unwrap(), 2320 EventFd::new(EFD_NONBLOCK).unwrap(), 2321 ) 2322 .unwrap() 2323 } 2324 2325 fn create_dummy_vm_config() -> Box<VmConfig> { 2326 Box::new(VmConfig { 2327 cpus: CpusConfig { 2328 boot_vcpus: 1, 2329 max_vcpus: 1, 2330 topology: None, 2331 kvm_hyperv: false, 2332 max_phys_bits: 46, 2333 affinity: None, 2334 features: CpuFeatures::default(), 2335 }, 2336 memory: MemoryConfig { 2337 size: 536_870_912, 2338 mergeable: false, 2339 hotplug_method: HotplugMethod::Acpi, 2340 hotplug_size: None, 2341 hotplugged_size: None, 2342 shared: true, 2343 hugepages: false, 2344 hugepage_size: None, 2345 prefault: false, 2346 zones: None, 2347 thp: true, 2348 }, 2349 payload: Some(PayloadConfig { 2350 kernel: Some(PathBuf::from("/path/to/kernel")), 2351 firmware: None, 2352 cmdline: None, 2353 initramfs: None, 2354 #[cfg(feature = "igvm")] 2355 igvm: None, 2356 #[cfg(feature = "sev_snp")] 2357 host_data: None, 2358 }), 2359 rate_limit_groups: None, 2360 disks: None, 2361 net: None, 2362 rng: RngConfig { 2363 src: PathBuf::from("/dev/urandom"), 2364 iommu: false, 2365 }, 2366 balloon: None, 2367 fs: None, 2368 pmem: None, 2369 serial: ConsoleConfig { 2370 file: None, 2371 mode: ConsoleOutputMode::Null, 2372 iommu: false, 2373 socket: None, 2374 }, 2375 console: ConsoleConfig { 2376 file: None, 2377 mode: ConsoleOutputMode::Tty, 2378 iommu: false, 2379 socket: None, 2380 }, 2381 #[cfg(target_arch = "x86_64")] 2382 debug_console: DebugConsoleConfig::default(), 2383 devices: None, 2384 user_devices: None, 2385 vdpa: None, 2386 vsock: None, 2387 #[cfg(feature = "pvmemcontrol")] 2388 pvmemcontrol: None, 2389 pvpanic: false, 2390 iommu: false, 2391 #[cfg(target_arch = "x86_64")] 2392 sgx_epc: None, 2393 numa: None, 2394 watchdog: false, 2395 #[cfg(feature = "guest_debug")] 2396 gdb: false, 2397 pci_segments: None, 2398 platform: None, 2399 tpm: None, 2400 preserved_fds: None, 2401 landlock_enable: false, 2402 landlock_rules: None, 2403 }) 2404 } 2405 2406 #[test] 2407 fn test_vmm_vm_create() { 2408 let mut vmm = create_dummy_vmm(); 2409 let config = create_dummy_vm_config(); 2410 2411 assert!(matches!(vmm.vm_create(config.clone()), Ok(()))); 2412 assert!(matches!( 2413 vmm.vm_create(config), 2414 Err(VmError::VmAlreadyCreated) 2415 )); 2416 } 2417 2418 #[test] 2419 fn test_vmm_vm_cold_add_device() { 2420 let mut vmm = create_dummy_vmm(); 2421 let device_config = DeviceConfig::parse("path=/path/to/device").unwrap(); 2422 2423 assert!(matches!( 2424 vmm.vm_add_device(device_config.clone()), 2425 Err(VmError::VmNotCreated) 2426 )); 2427 2428 let _ = vmm.vm_create(create_dummy_vm_config()); 2429 assert!(vmm 2430 .vm_config 2431 .as_ref() 2432 .unwrap() 2433 .lock() 2434 .unwrap() 2435 .devices 2436 .is_none()); 2437 2438 assert!(vmm.vm_add_device(device_config.clone()).unwrap().is_none()); 2439 assert_eq!( 2440 vmm.vm_config 2441 .as_ref() 2442 .unwrap() 2443 .lock() 2444 .unwrap() 2445 .devices 2446 .clone() 2447 .unwrap() 2448 .len(), 2449 1 2450 ); 2451 assert_eq!( 2452 vmm.vm_config 2453 .as_ref() 2454 .unwrap() 2455 .lock() 2456 .unwrap() 2457 .devices 2458 .clone() 2459 .unwrap()[0], 2460 device_config 2461 ); 2462 } 2463 2464 #[test] 2465 fn test_vmm_vm_cold_add_user_device() { 2466 let mut vmm = create_dummy_vmm(); 2467 let user_device_config = 2468 UserDeviceConfig::parse("socket=/path/to/socket,id=8,pci_segment=2").unwrap(); 2469 2470 assert!(matches!( 2471 vmm.vm_add_user_device(user_device_config.clone()), 2472 Err(VmError::VmNotCreated) 2473 )); 2474 2475 let _ = vmm.vm_create(create_dummy_vm_config()); 2476 assert!(vmm 2477 .vm_config 2478 .as_ref() 2479 .unwrap() 2480 .lock() 2481 .unwrap() 2482 .user_devices 2483 .is_none()); 2484 2485 assert!(vmm 2486 .vm_add_user_device(user_device_config.clone()) 2487 .unwrap() 2488 .is_none()); 2489 assert_eq!( 2490 vmm.vm_config 2491 .as_ref() 2492 .unwrap() 2493 .lock() 2494 .unwrap() 2495 .user_devices 2496 .clone() 2497 .unwrap() 2498 .len(), 2499 1 2500 ); 2501 assert_eq!( 2502 vmm.vm_config 2503 .as_ref() 2504 .unwrap() 2505 .lock() 2506 .unwrap() 2507 .user_devices 2508 .clone() 2509 .unwrap()[0], 2510 user_device_config 2511 ); 2512 } 2513 2514 #[test] 2515 fn test_vmm_vm_cold_add_disk() { 2516 let mut vmm = create_dummy_vmm(); 2517 let disk_config = DiskConfig::parse("path=/path/to_file").unwrap(); 2518 2519 assert!(matches!( 2520 vmm.vm_add_disk(disk_config.clone()), 2521 Err(VmError::VmNotCreated) 2522 )); 2523 2524 let _ = vmm.vm_create(create_dummy_vm_config()); 2525 assert!(vmm 2526 .vm_config 2527 .as_ref() 2528 .unwrap() 2529 .lock() 2530 .unwrap() 2531 .disks 2532 .is_none()); 2533 2534 assert!(vmm.vm_add_disk(disk_config.clone()).unwrap().is_none()); 2535 assert_eq!( 2536 vmm.vm_config 2537 .as_ref() 2538 .unwrap() 2539 .lock() 2540 .unwrap() 2541 .disks 2542 .clone() 2543 .unwrap() 2544 .len(), 2545 1 2546 ); 2547 assert_eq!( 2548 vmm.vm_config 2549 .as_ref() 2550 .unwrap() 2551 .lock() 2552 .unwrap() 2553 .disks 2554 .clone() 2555 .unwrap()[0], 2556 disk_config 2557 ); 2558 } 2559 2560 #[test] 2561 fn test_vmm_vm_cold_add_fs() { 2562 let mut vmm = create_dummy_vmm(); 2563 let fs_config = FsConfig::parse("tag=mytag,socket=/tmp/sock").unwrap(); 2564 2565 assert!(matches!( 2566 vmm.vm_add_fs(fs_config.clone()), 2567 Err(VmError::VmNotCreated) 2568 )); 2569 2570 let _ = vmm.vm_create(create_dummy_vm_config()); 2571 assert!(vmm.vm_config.as_ref().unwrap().lock().unwrap().fs.is_none()); 2572 2573 assert!(vmm.vm_add_fs(fs_config.clone()).unwrap().is_none()); 2574 assert_eq!( 2575 vmm.vm_config 2576 .as_ref() 2577 .unwrap() 2578 .lock() 2579 .unwrap() 2580 .fs 2581 .clone() 2582 .unwrap() 2583 .len(), 2584 1 2585 ); 2586 assert_eq!( 2587 vmm.vm_config 2588 .as_ref() 2589 .unwrap() 2590 .lock() 2591 .unwrap() 2592 .fs 2593 .clone() 2594 .unwrap()[0], 2595 fs_config 2596 ); 2597 } 2598 2599 #[test] 2600 fn test_vmm_vm_cold_add_pmem() { 2601 let mut vmm = create_dummy_vmm(); 2602 let pmem_config = PmemConfig::parse("file=/tmp/pmem,size=128M").unwrap(); 2603 2604 assert!(matches!( 2605 vmm.vm_add_pmem(pmem_config.clone()), 2606 Err(VmError::VmNotCreated) 2607 )); 2608 2609 let _ = vmm.vm_create(create_dummy_vm_config()); 2610 assert!(vmm 2611 .vm_config 2612 .as_ref() 2613 .unwrap() 2614 .lock() 2615 .unwrap() 2616 .pmem 2617 .is_none()); 2618 2619 assert!(vmm.vm_add_pmem(pmem_config.clone()).unwrap().is_none()); 2620 assert_eq!( 2621 vmm.vm_config 2622 .as_ref() 2623 .unwrap() 2624 .lock() 2625 .unwrap() 2626 .pmem 2627 .clone() 2628 .unwrap() 2629 .len(), 2630 1 2631 ); 2632 assert_eq!( 2633 vmm.vm_config 2634 .as_ref() 2635 .unwrap() 2636 .lock() 2637 .unwrap() 2638 .pmem 2639 .clone() 2640 .unwrap()[0], 2641 pmem_config 2642 ); 2643 } 2644 2645 #[test] 2646 fn test_vmm_vm_cold_add_net() { 2647 let mut vmm = create_dummy_vmm(); 2648 let net_config = NetConfig::parse( 2649 "mac=de:ad:be:ef:12:34,host_mac=12:34:de:ad:be:ef,vhost_user=true,socket=/tmp/sock", 2650 ) 2651 .unwrap(); 2652 2653 assert!(matches!( 2654 vmm.vm_add_net(net_config.clone()), 2655 Err(VmError::VmNotCreated) 2656 )); 2657 2658 let _ = vmm.vm_create(create_dummy_vm_config()); 2659 assert!(vmm 2660 .vm_config 2661 .as_ref() 2662 .unwrap() 2663 .lock() 2664 .unwrap() 2665 .net 2666 .is_none()); 2667 2668 assert!(vmm.vm_add_net(net_config.clone()).unwrap().is_none()); 2669 assert_eq!( 2670 vmm.vm_config 2671 .as_ref() 2672 .unwrap() 2673 .lock() 2674 .unwrap() 2675 .net 2676 .clone() 2677 .unwrap() 2678 .len(), 2679 1 2680 ); 2681 assert_eq!( 2682 vmm.vm_config 2683 .as_ref() 2684 .unwrap() 2685 .lock() 2686 .unwrap() 2687 .net 2688 .clone() 2689 .unwrap()[0], 2690 net_config 2691 ); 2692 } 2693 2694 #[test] 2695 fn test_vmm_vm_cold_add_vdpa() { 2696 let mut vmm = create_dummy_vmm(); 2697 let vdpa_config = VdpaConfig::parse("path=/dev/vhost-vdpa,num_queues=2").unwrap(); 2698 2699 assert!(matches!( 2700 vmm.vm_add_vdpa(vdpa_config.clone()), 2701 Err(VmError::VmNotCreated) 2702 )); 2703 2704 let _ = vmm.vm_create(create_dummy_vm_config()); 2705 assert!(vmm 2706 .vm_config 2707 .as_ref() 2708 .unwrap() 2709 .lock() 2710 .unwrap() 2711 .vdpa 2712 .is_none()); 2713 2714 assert!(vmm.vm_add_vdpa(vdpa_config.clone()).unwrap().is_none()); 2715 assert_eq!( 2716 vmm.vm_config 2717 .as_ref() 2718 .unwrap() 2719 .lock() 2720 .unwrap() 2721 .vdpa 2722 .clone() 2723 .unwrap() 2724 .len(), 2725 1 2726 ); 2727 assert_eq!( 2728 vmm.vm_config 2729 .as_ref() 2730 .unwrap() 2731 .lock() 2732 .unwrap() 2733 .vdpa 2734 .clone() 2735 .unwrap()[0], 2736 vdpa_config 2737 ); 2738 } 2739 2740 #[test] 2741 fn test_vmm_vm_cold_add_vsock() { 2742 let mut vmm = create_dummy_vmm(); 2743 let vsock_config = VsockConfig::parse("socket=/tmp/sock,cid=3,iommu=on").unwrap(); 2744 2745 assert!(matches!( 2746 vmm.vm_add_vsock(vsock_config.clone()), 2747 Err(VmError::VmNotCreated) 2748 )); 2749 2750 let _ = vmm.vm_create(create_dummy_vm_config()); 2751 assert!(vmm 2752 .vm_config 2753 .as_ref() 2754 .unwrap() 2755 .lock() 2756 .unwrap() 2757 .vsock 2758 .is_none()); 2759 2760 assert!(vmm.vm_add_vsock(vsock_config.clone()).unwrap().is_none()); 2761 assert_eq!( 2762 vmm.vm_config 2763 .as_ref() 2764 .unwrap() 2765 .lock() 2766 .unwrap() 2767 .vsock 2768 .clone() 2769 .unwrap(), 2770 vsock_config 2771 ); 2772 } 2773 } 2774