1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use rate_limiter::group::RateLimiterGroup; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeMap, BTreeSet, HashMap}; 68 use std::fs::{read_link, File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::mem::zeroed; 71 use std::num::Wrapping; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::vfio::VfioDmaMapping; 89 use vm_device::dma_mapping::ExternalDmaMapping; 90 use vm_device::interrupt::{ 91 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 92 }; 93 use vm_device::{Bus, BusDevice, Resource}; 94 use vm_memory::guest_memory::FileOffset; 95 use vm_memory::GuestMemoryRegion; 96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 97 #[cfg(target_arch = "x86_64")] 98 use vm_memory::{GuestAddressSpace, GuestMemory}; 99 use vm_migration::{ 100 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 101 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 102 }; 103 use vm_virtio::AccessPlatform; 104 use vm_virtio::VirtioDeviceType; 105 use vmm_sys_util::eventfd::EventFd; 106 #[cfg(target_arch = "x86_64")] 107 use {devices::debug_console, devices::legacy::Serial}; 108 109 #[cfg(target_arch = "aarch64")] 110 const MMIO_LEN: u64 = 0x1000; 111 112 // Singleton devices / devices the user cannot name 113 #[cfg(target_arch = "x86_64")] 114 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 115 const SERIAL_DEVICE_NAME: &str = "__serial"; 116 #[cfg(target_arch = "x86_64")] 117 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 118 #[cfg(target_arch = "aarch64")] 119 const GPIO_DEVICE_NAME: &str = "__gpio"; 120 const RNG_DEVICE_NAME: &str = "__rng"; 121 const IOMMU_DEVICE_NAME: &str = "__iommu"; 122 const BALLOON_DEVICE_NAME: &str = "__balloon"; 123 const CONSOLE_DEVICE_NAME: &str = "__console"; 124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 125 126 // Devices that the user may name and for which we generate 127 // identifiers if the user doesn't give one 128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 129 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 130 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 138 139 /// Errors associated with device manager 140 #[derive(Debug)] 141 pub enum DeviceManagerError { 142 /// Cannot create EventFd. 143 EventFd(io::Error), 144 145 /// Cannot open disk path 146 Disk(io::Error), 147 148 /// Cannot create vhost-user-net device 149 CreateVhostUserNet(virtio_devices::vhost_user::Error), 150 151 /// Cannot create virtio-blk device 152 CreateVirtioBlock(io::Error), 153 154 /// Cannot create virtio-net device 155 CreateVirtioNet(virtio_devices::net::Error), 156 157 /// Cannot create virtio-console device 158 CreateVirtioConsole(io::Error), 159 160 /// Cannot create virtio-rng device 161 CreateVirtioRng(io::Error), 162 163 /// Cannot create virtio-fs device 164 CreateVirtioFs(virtio_devices::vhost_user::Error), 165 166 /// Virtio-fs device was created without a socket. 167 NoVirtioFsSock, 168 169 /// Cannot create vhost-user-blk device 170 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 171 172 /// Cannot create virtio-pmem device 173 CreateVirtioPmem(io::Error), 174 175 /// Cannot create vDPA device 176 CreateVdpa(virtio_devices::vdpa::Error), 177 178 /// Cannot create virtio-vsock device 179 CreateVirtioVsock(io::Error), 180 181 /// Cannot create tpm device 182 CreateTpmDevice(anyhow::Error), 183 184 /// Failed to convert Path to &str for the vDPA device. 185 CreateVdpaConvertPath, 186 187 /// Failed to convert Path to &str for the virtio-vsock device. 188 CreateVsockConvertPath, 189 190 /// Cannot create virtio-vsock backend 191 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 192 193 /// Cannot create virtio-iommu device 194 CreateVirtioIommu(io::Error), 195 196 /// Cannot create virtio-balloon device 197 CreateVirtioBalloon(io::Error), 198 199 /// Cannot create virtio-watchdog device 200 CreateVirtioWatchdog(io::Error), 201 202 /// Failed to parse disk image format 203 DetectImageType(io::Error), 204 205 /// Cannot open qcow disk path 206 QcowDeviceCreate(qcow::Error), 207 208 /// Cannot create serial manager 209 CreateSerialManager(SerialManagerError), 210 211 /// Cannot spawn the serial manager thread 212 SpawnSerialManager(SerialManagerError), 213 214 /// Cannot open tap interface 215 OpenTap(net_util::TapError), 216 217 /// Cannot allocate IRQ. 218 AllocateIrq, 219 220 /// Cannot configure the IRQ. 221 Irq(vmm_sys_util::errno::Error), 222 223 /// Cannot allocate PCI BARs 224 AllocateBars(pci::PciDeviceError), 225 226 /// Could not free the BARs associated with a PCI device. 227 FreePciBars(pci::PciDeviceError), 228 229 /// Cannot register ioevent. 230 RegisterIoevent(anyhow::Error), 231 232 /// Cannot unregister ioevent. 233 UnRegisterIoevent(anyhow::Error), 234 235 /// Cannot create virtio device 236 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 237 238 /// Cannot add PCI device 239 AddPciDevice(pci::PciRootError), 240 241 /// Cannot open persistent memory file 242 PmemFileOpen(io::Error), 243 244 /// Cannot set persistent memory file size 245 PmemFileSetLen(io::Error), 246 247 /// Cannot find a memory range for persistent memory 248 PmemRangeAllocation, 249 250 /// Cannot find a memory range for virtio-fs 251 FsRangeAllocation, 252 253 /// Error creating serial output file 254 SerialOutputFileOpen(io::Error), 255 256 #[cfg(target_arch = "x86_64")] 257 /// Error creating debug-console output file 258 DebugconOutputFileOpen(io::Error), 259 260 /// Error creating console output file 261 ConsoleOutputFileOpen(io::Error), 262 263 /// Error creating serial pty 264 SerialPtyOpen(io::Error), 265 266 /// Error creating console pty 267 ConsolePtyOpen(io::Error), 268 269 /// Error creating console pty 270 DebugconPtyOpen(io::Error), 271 272 /// Error setting pty raw mode 273 SetPtyRaw(vmm_sys_util::errno::Error), 274 275 /// Error getting pty peer 276 GetPtyPeer(vmm_sys_util::errno::Error), 277 278 /// Cannot create a VFIO device 279 VfioCreate(vfio_ioctls::VfioError), 280 281 /// Cannot create a VFIO PCI device 282 VfioPciCreate(pci::VfioPciError), 283 284 /// Failed to map VFIO MMIO region. 285 VfioMapRegion(pci::VfioPciError), 286 287 /// Failed to DMA map VFIO device. 288 VfioDmaMap(vfio_ioctls::VfioError), 289 290 /// Failed to DMA unmap VFIO device. 291 VfioDmaUnmap(pci::VfioPciError), 292 293 /// Failed to create the passthrough device. 294 CreatePassthroughDevice(anyhow::Error), 295 296 /// Failed to memory map. 297 Mmap(io::Error), 298 299 /// Cannot add legacy device to Bus. 300 BusError(vm_device::BusError), 301 302 /// Failed to allocate IO port 303 AllocateIoPort, 304 305 /// Failed to allocate MMIO address 306 AllocateMmioAddress, 307 308 /// Failed to make hotplug notification 309 HotPlugNotification(io::Error), 310 311 /// Error from a memory manager operation 312 MemoryManager(MemoryManagerError), 313 314 /// Failed to create new interrupt source group. 315 CreateInterruptGroup(io::Error), 316 317 /// Failed to update interrupt source group. 318 UpdateInterruptGroup(io::Error), 319 320 /// Failed to create interrupt controller. 321 CreateInterruptController(interrupt_controller::Error), 322 323 /// Failed to create a new MmapRegion instance. 324 NewMmapRegion(vm_memory::mmap::MmapRegionError), 325 326 /// Failed to clone a File. 327 CloneFile(io::Error), 328 329 /// Failed to create socket file 330 CreateSocketFile(io::Error), 331 332 /// Failed to spawn the network backend 333 SpawnNetBackend(io::Error), 334 335 /// Failed to spawn the block backend 336 SpawnBlockBackend(io::Error), 337 338 /// Missing PCI bus. 339 NoPciBus, 340 341 /// Could not find an available device name. 342 NoAvailableDeviceName, 343 344 /// Missing PCI device. 345 MissingPciDevice, 346 347 /// Failed to remove a PCI device from the PCI bus. 348 RemoveDeviceFromPciBus(pci::PciRootError), 349 350 /// Failed to remove a bus device from the IO bus. 351 RemoveDeviceFromIoBus(vm_device::BusError), 352 353 /// Failed to remove a bus device from the MMIO bus. 354 RemoveDeviceFromMmioBus(vm_device::BusError), 355 356 /// Failed to find the device corresponding to a specific PCI b/d/f. 357 UnknownPciBdf(u32), 358 359 /// Not allowed to remove this type of device from the VM. 360 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 361 362 /// Failed to find device corresponding to the given identifier. 363 UnknownDeviceId(String), 364 365 /// Failed to find an available PCI device ID. 366 NextPciDeviceId(pci::PciRootError), 367 368 /// Could not reserve the PCI device ID. 369 GetPciDeviceId(pci::PciRootError), 370 371 /// Could not give the PCI device ID back. 372 PutPciDeviceId(pci::PciRootError), 373 374 /// No disk path was specified when one was expected 375 NoDiskPath, 376 377 /// Failed to update guest memory for virtio device. 378 UpdateMemoryForVirtioDevice(virtio_devices::Error), 379 380 /// Cannot create virtio-mem device 381 CreateVirtioMem(io::Error), 382 383 /// Cannot find a memory range for virtio-mem memory 384 VirtioMemRangeAllocation, 385 386 /// Failed to update guest memory for VFIO PCI device. 387 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 388 389 /// Trying to use a directory for pmem but no size specified 390 PmemWithDirectorySizeMissing, 391 392 /// Trying to use a size that is not multiple of 2MiB 393 PmemSizeNotAligned, 394 395 /// Could not find the node in the device tree. 396 MissingNode, 397 398 /// Resource was already found. 399 ResourceAlreadyExists, 400 401 /// Expected resources for virtio-pmem could not be found. 402 MissingVirtioPmemResources, 403 404 /// Missing PCI b/d/f from the DeviceNode. 405 MissingDeviceNodePciBdf, 406 407 /// No support for device passthrough 408 NoDevicePassthroughSupport, 409 410 /// No socket option support for console device 411 NoSocketOptionSupportForConsoleDevice, 412 413 /// Failed to resize virtio-balloon 414 VirtioBalloonResize(virtio_devices::balloon::Error), 415 416 /// Missing virtio-balloon, can't proceed as expected. 417 MissingVirtioBalloon, 418 419 /// Missing virtual IOMMU device 420 MissingVirtualIommu, 421 422 /// Failed to do power button notification 423 PowerButtonNotification(io::Error), 424 425 /// Failed to do AArch64 GPIO power button notification 426 #[cfg(target_arch = "aarch64")] 427 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 428 429 /// Failed to set O_DIRECT flag to file descriptor 430 SetDirectIo, 431 432 /// Failed to create FixedVhdDiskAsync 433 CreateFixedVhdDiskAsync(io::Error), 434 435 /// Failed to create FixedVhdDiskSync 436 CreateFixedVhdDiskSync(io::Error), 437 438 /// Failed to create QcowDiskSync 439 CreateQcowDiskSync(qcow::Error), 440 441 /// Failed to create FixedVhdxDiskSync 442 CreateFixedVhdxDiskSync(vhdx::VhdxError), 443 444 /// Failed to add DMA mapping handler to virtio-mem device. 445 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 446 447 /// Failed to remove DMA mapping handler from virtio-mem device. 448 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 449 450 /// Failed to create vfio-user client 451 VfioUserCreateClient(vfio_user::Error), 452 453 /// Failed to create VFIO user device 454 VfioUserCreate(VfioUserPciDeviceError), 455 456 /// Failed to map region from VFIO user device into guest 457 VfioUserMapRegion(VfioUserPciDeviceError), 458 459 /// Failed to DMA map VFIO user device. 460 VfioUserDmaMap(VfioUserPciDeviceError), 461 462 /// Failed to DMA unmap VFIO user device. 463 VfioUserDmaUnmap(VfioUserPciDeviceError), 464 465 /// Failed to update memory mappings for VFIO user device 466 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 467 468 /// Cannot duplicate file descriptor 469 DupFd(vmm_sys_util::errno::Error), 470 471 /// Failed to DMA map virtio device. 472 VirtioDmaMap(std::io::Error), 473 474 /// Failed to DMA unmap virtio device. 475 VirtioDmaUnmap(std::io::Error), 476 477 /// Cannot hotplug device behind vIOMMU 478 InvalidIommuHotplug, 479 480 /// Invalid identifier as it is not unique. 481 IdentifierNotUnique(String), 482 483 /// Invalid identifier 484 InvalidIdentifier(String), 485 486 /// Error activating virtio device 487 VirtioActivate(ActivateError), 488 489 /// Failed retrieving device state from snapshot 490 RestoreGetState(MigratableError), 491 492 /// Cannot create a PvPanic device 493 PvPanicCreate(devices::pvpanic::PvPanicError), 494 495 /// Cannot create a RateLimiterGroup 496 RateLimiterGroupCreate(rate_limiter::group::Error), 497 } 498 499 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 500 501 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 502 503 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 504 const TIOCGTPEER: libc::c_int = 0x5441; 505 506 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 507 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 508 // This is done to try and use the devpts filesystem that 509 // could be available for use in the process's namespace first. 510 // Ideally these are all the same file though but different 511 // kernels could have things setup differently. 512 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 513 // for further details. 514 515 let custom_flags = libc::O_NONBLOCK; 516 let main = match OpenOptions::new() 517 .read(true) 518 .write(true) 519 .custom_flags(custom_flags) 520 .open("/dev/pts/ptmx") 521 { 522 Ok(f) => f, 523 _ => OpenOptions::new() 524 .read(true) 525 .write(true) 526 .custom_flags(custom_flags) 527 .open("/dev/ptmx")?, 528 }; 529 let mut unlock: libc::c_ulong = 0; 530 // SAFETY: FFI call into libc, trivially safe 531 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 532 533 // SAFETY: FFI call into libc, trivially safe 534 let sub_fd = unsafe { 535 libc::ioctl( 536 main.as_raw_fd(), 537 TIOCGTPEER as _, 538 libc::O_NOCTTY | libc::O_RDWR, 539 ) 540 }; 541 if sub_fd == -1 { 542 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 543 } 544 545 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 546 let path = read_link(proc_path)?; 547 548 // SAFETY: sub_fd is checked to be valid before being wrapped in File 549 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 550 } 551 552 #[derive(Default)] 553 pub struct Console { 554 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 555 } 556 557 impl Console { 558 pub fn need_resize(&self) -> bool { 559 if let Some(_resizer) = self.console_resizer.as_ref() { 560 return true; 561 } 562 563 false 564 } 565 566 pub fn update_console_size(&self) { 567 if let Some(resizer) = self.console_resizer.as_ref() { 568 resizer.update_console_size() 569 } 570 } 571 } 572 573 pub(crate) struct AddressManager { 574 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 575 #[cfg(target_arch = "x86_64")] 576 pub(crate) io_bus: Arc<Bus>, 577 pub(crate) mmio_bus: Arc<Bus>, 578 pub(crate) vm: Arc<dyn hypervisor::Vm>, 579 device_tree: Arc<Mutex<DeviceTree>>, 580 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 581 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 582 } 583 584 impl DeviceRelocation for AddressManager { 585 fn move_bar( 586 &self, 587 old_base: u64, 588 new_base: u64, 589 len: u64, 590 pci_dev: &mut dyn PciDevice, 591 region_type: PciBarRegionType, 592 ) -> std::result::Result<(), std::io::Error> { 593 match region_type { 594 PciBarRegionType::IoRegion => { 595 #[cfg(target_arch = "x86_64")] 596 { 597 // Update system allocator 598 self.allocator 599 .lock() 600 .unwrap() 601 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 602 603 self.allocator 604 .lock() 605 .unwrap() 606 .allocate_io_addresses( 607 Some(GuestAddress(new_base)), 608 len as GuestUsize, 609 None, 610 ) 611 .ok_or_else(|| { 612 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 613 })?; 614 615 // Update PIO bus 616 self.io_bus 617 .update_range(old_base, len, new_base, len) 618 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 619 } 620 #[cfg(target_arch = "aarch64")] 621 error!("I/O region is not supported"); 622 } 623 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 624 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 625 &self.pci_mmio32_allocators 626 } else { 627 &self.pci_mmio64_allocators 628 }; 629 630 // Find the specific allocator that this BAR was allocated from and use it for new one 631 for allocator in allocators { 632 let allocator_base = allocator.lock().unwrap().base(); 633 let allocator_end = allocator.lock().unwrap().end(); 634 635 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 636 allocator 637 .lock() 638 .unwrap() 639 .free(GuestAddress(old_base), len as GuestUsize); 640 641 allocator 642 .lock() 643 .unwrap() 644 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 645 .ok_or_else(|| { 646 io::Error::new( 647 io::ErrorKind::Other, 648 "failed allocating new MMIO range", 649 ) 650 })?; 651 652 break; 653 } 654 } 655 656 // Update MMIO bus 657 self.mmio_bus 658 .update_range(old_base, len, new_base, len) 659 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 660 } 661 } 662 663 // Update the device_tree resources associated with the device 664 if let Some(id) = pci_dev.id() { 665 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 666 let mut resource_updated = false; 667 for resource in node.resources.iter_mut() { 668 if let Resource::PciBar { base, type_, .. } = resource { 669 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 670 *base = new_base; 671 resource_updated = true; 672 break; 673 } 674 } 675 } 676 677 if !resource_updated { 678 return Err(io::Error::new( 679 io::ErrorKind::Other, 680 format!( 681 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 682 ), 683 )); 684 } 685 } else { 686 return Err(io::Error::new( 687 io::ErrorKind::Other, 688 format!("Couldn't find device {id} from device tree"), 689 )); 690 } 691 } 692 693 let any_dev = pci_dev.as_any(); 694 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 695 let bar_addr = virtio_pci_dev.config_bar_addr(); 696 if bar_addr == new_base { 697 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 698 let io_addr = IoEventAddress::Mmio(addr); 699 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 700 io::Error::new( 701 io::ErrorKind::Other, 702 format!("failed to unregister ioevent: {e:?}"), 703 ) 704 })?; 705 } 706 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 707 let io_addr = IoEventAddress::Mmio(addr); 708 self.vm 709 .register_ioevent(event, &io_addr, None) 710 .map_err(|e| { 711 io::Error::new( 712 io::ErrorKind::Other, 713 format!("failed to register ioevent: {e:?}"), 714 ) 715 })?; 716 } 717 } else { 718 let virtio_dev = virtio_pci_dev.virtio_device(); 719 let mut virtio_dev = virtio_dev.lock().unwrap(); 720 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 721 if shm_regions.addr.raw_value() == old_base { 722 let mem_region = self.vm.make_user_memory_region( 723 shm_regions.mem_slot, 724 old_base, 725 shm_regions.len, 726 shm_regions.host_addr, 727 false, 728 false, 729 ); 730 731 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 732 io::Error::new( 733 io::ErrorKind::Other, 734 format!("failed to remove user memory region: {e:?}"), 735 ) 736 })?; 737 738 // Create new mapping by inserting new region to KVM. 739 let mem_region = self.vm.make_user_memory_region( 740 shm_regions.mem_slot, 741 new_base, 742 shm_regions.len, 743 shm_regions.host_addr, 744 false, 745 false, 746 ); 747 748 self.vm.create_user_memory_region(mem_region).map_err(|e| { 749 io::Error::new( 750 io::ErrorKind::Other, 751 format!("failed to create user memory regions: {e:?}"), 752 ) 753 })?; 754 755 // Update shared memory regions to reflect the new mapping. 756 shm_regions.addr = GuestAddress(new_base); 757 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 758 io::Error::new( 759 io::ErrorKind::Other, 760 format!("failed to update shared memory regions: {e:?}"), 761 ) 762 })?; 763 } 764 } 765 } 766 } 767 768 pci_dev.move_bar(old_base, new_base) 769 } 770 } 771 772 #[derive(Serialize, Deserialize)] 773 struct DeviceManagerState { 774 device_tree: DeviceTree, 775 device_id_cnt: Wrapping<usize>, 776 } 777 778 #[derive(Debug)] 779 pub struct PtyPair { 780 pub main: File, 781 pub path: PathBuf, 782 } 783 784 impl Clone for PtyPair { 785 fn clone(&self) -> Self { 786 PtyPair { 787 main: self.main.try_clone().unwrap(), 788 path: self.path.clone(), 789 } 790 } 791 } 792 793 #[derive(Clone)] 794 pub enum PciDeviceHandle { 795 Vfio(Arc<Mutex<VfioPciDevice>>), 796 Virtio(Arc<Mutex<VirtioPciDevice>>), 797 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 798 } 799 800 #[derive(Clone)] 801 struct MetaVirtioDevice { 802 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 803 iommu: bool, 804 id: String, 805 pci_segment: u16, 806 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 807 } 808 809 #[derive(Default)] 810 pub struct AcpiPlatformAddresses { 811 pub pm_timer_address: Option<GenericAddress>, 812 pub reset_reg_address: Option<GenericAddress>, 813 pub sleep_control_reg_address: Option<GenericAddress>, 814 pub sleep_status_reg_address: Option<GenericAddress>, 815 } 816 817 pub struct DeviceManager { 818 // The underlying hypervisor 819 hypervisor_type: HypervisorType, 820 821 // Manage address space related to devices 822 address_manager: Arc<AddressManager>, 823 824 // Console abstraction 825 console: Arc<Console>, 826 827 // console PTY 828 console_pty: Option<Arc<Mutex<PtyPair>>>, 829 830 // serial PTY 831 serial_pty: Option<Arc<Mutex<PtyPair>>>, 832 833 // debug-console PTY 834 debug_console_pty: Option<Arc<Mutex<PtyPair>>>, 835 836 // Serial Manager 837 serial_manager: Option<Arc<SerialManager>>, 838 839 // pty foreground status, 840 console_resize_pipe: Option<Arc<File>>, 841 842 // To restore on exit. 843 original_termios_opt: Arc<Mutex<Option<termios>>>, 844 845 // Interrupt controller 846 #[cfg(target_arch = "x86_64")] 847 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 848 #[cfg(target_arch = "aarch64")] 849 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 850 851 // Things to be added to the commandline (e.g. aarch64 early console) 852 #[cfg(target_arch = "aarch64")] 853 cmdline_additions: Vec<String>, 854 855 // ACPI GED notification device 856 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 857 858 // VM configuration 859 config: Arc<Mutex<VmConfig>>, 860 861 // Memory Manager 862 memory_manager: Arc<Mutex<MemoryManager>>, 863 864 // CPU Manager 865 cpu_manager: Arc<Mutex<CpuManager>>, 866 867 // The virtio devices on the system 868 virtio_devices: Vec<MetaVirtioDevice>, 869 870 // List of bus devices 871 // Let the DeviceManager keep strong references to the BusDevice devices. 872 // This allows the IO and MMIO buses to be provided with Weak references, 873 // which prevents cyclic dependencies. 874 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 875 876 // Counter to keep track of the consumed device IDs. 877 device_id_cnt: Wrapping<usize>, 878 879 pci_segments: Vec<PciSegment>, 880 881 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 882 // MSI Interrupt Manager 883 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 884 885 #[cfg_attr(feature = "mshv", allow(dead_code))] 886 // Legacy Interrupt Manager 887 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 888 889 // Passthrough device handle 890 passthrough_device: Option<VfioDeviceFd>, 891 892 // VFIO container 893 // Only one container can be created, therefore it is stored as part of the 894 // DeviceManager to be reused. 895 vfio_container: Option<Arc<VfioContainer>>, 896 897 // Paravirtualized IOMMU 898 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 899 iommu_mapping: Option<Arc<IommuMapping>>, 900 901 // PCI information about devices attached to the paravirtualized IOMMU 902 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 903 // representing the devices attached to the virtual IOMMU. This is useful 904 // information for filling the ACPI VIOT table. 905 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 906 907 // Tree of devices, representing the dependencies between devices. 908 // Useful for introspection, snapshot and restore. 909 device_tree: Arc<Mutex<DeviceTree>>, 910 911 // Exit event 912 exit_evt: EventFd, 913 reset_evt: EventFd, 914 915 #[cfg(target_arch = "aarch64")] 916 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 917 918 // seccomp action 919 seccomp_action: SeccompAction, 920 921 // List of guest NUMA nodes. 922 numa_nodes: NumaNodes, 923 924 // Possible handle to the virtio-balloon device 925 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 926 927 // Virtio Device activation EventFd to allow the VMM thread to trigger device 928 // activation and thus start the threads from the VMM thread 929 activate_evt: EventFd, 930 931 acpi_address: GuestAddress, 932 933 selected_segment: usize, 934 935 // Possible handle to the virtio-mem device 936 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 937 938 #[cfg(target_arch = "aarch64")] 939 // GPIO device for AArch64 940 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 941 942 // pvpanic device 943 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 944 945 // Flag to force setting the iommu on virtio devices 946 force_iommu: bool, 947 948 // io_uring availability if detected 949 io_uring_supported: Option<bool>, 950 951 // aio availability if detected 952 aio_supported: Option<bool>, 953 954 // List of unique identifiers provided at boot through the configuration. 955 boot_id_list: BTreeSet<String>, 956 957 // Start time of the VM 958 timestamp: Instant, 959 960 // Pending activations 961 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 962 963 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 964 acpi_platform_addresses: AcpiPlatformAddresses, 965 966 snapshot: Option<Snapshot>, 967 968 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 969 } 970 971 impl DeviceManager { 972 #[allow(clippy::too_many_arguments)] 973 pub fn new( 974 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 975 mmio_bus: Arc<Bus>, 976 hypervisor_type: HypervisorType, 977 vm: Arc<dyn hypervisor::Vm>, 978 config: Arc<Mutex<VmConfig>>, 979 memory_manager: Arc<Mutex<MemoryManager>>, 980 cpu_manager: Arc<Mutex<CpuManager>>, 981 exit_evt: EventFd, 982 reset_evt: EventFd, 983 seccomp_action: SeccompAction, 984 numa_nodes: NumaNodes, 985 activate_evt: &EventFd, 986 force_iommu: bool, 987 boot_id_list: BTreeSet<String>, 988 timestamp: Instant, 989 snapshot: Option<Snapshot>, 990 dynamic: bool, 991 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 992 trace_scoped!("DeviceManager::new"); 993 994 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 995 let state: DeviceManagerState = snapshot.to_state().unwrap(); 996 ( 997 Arc::new(Mutex::new(state.device_tree.clone())), 998 state.device_id_cnt, 999 ) 1000 } else { 1001 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1002 }; 1003 1004 let num_pci_segments = 1005 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1006 platform_config.num_pci_segments 1007 } else { 1008 1 1009 }; 1010 1011 let create_mmio_allocators = |start, end, num_pci_segments, alignment| { 1012 // Start each PCI segment mmio range on an aligned boundary 1013 let pci_segment_mmio_size = 1014 (end - start + 1) / (alignment * num_pci_segments as u64) * alignment; 1015 1016 let mut mmio_allocators = vec![]; 1017 for i in 0..num_pci_segments as u64 { 1018 let mmio_start = start + i * pci_segment_mmio_size; 1019 let allocator = Arc::new(Mutex::new( 1020 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_mmio_size).unwrap(), 1021 )); 1022 mmio_allocators.push(allocator) 1023 } 1024 1025 mmio_allocators 1026 }; 1027 1028 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1029 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1030 let pci_mmio32_allocators = create_mmio_allocators( 1031 start_of_mmio32_area, 1032 end_of_mmio32_area, 1033 num_pci_segments, 1034 4 << 10, 1035 ); 1036 1037 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1038 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1039 let pci_mmio64_allocators = create_mmio_allocators( 1040 start_of_mmio64_area, 1041 end_of_mmio64_area, 1042 num_pci_segments, 1043 4 << 30, 1044 ); 1045 1046 let address_manager = Arc::new(AddressManager { 1047 allocator: memory_manager.lock().unwrap().allocator(), 1048 #[cfg(target_arch = "x86_64")] 1049 io_bus, 1050 mmio_bus, 1051 vm: vm.clone(), 1052 device_tree: Arc::clone(&device_tree), 1053 pci_mmio32_allocators, 1054 pci_mmio64_allocators, 1055 }); 1056 1057 // First we create the MSI interrupt manager, the legacy one is created 1058 // later, after the IOAPIC device creation. 1059 // The reason we create the MSI one first is because the IOAPIC needs it, 1060 // and then the legacy interrupt manager needs an IOAPIC. So we're 1061 // handling a linear dependency chain: 1062 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1063 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1064 Arc::new(MsiInterruptManager::new( 1065 Arc::clone(&address_manager.allocator), 1066 vm, 1067 )); 1068 1069 let acpi_address = address_manager 1070 .allocator 1071 .lock() 1072 .unwrap() 1073 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1074 .ok_or(DeviceManagerError::AllocateIoPort)?; 1075 1076 let mut pci_irq_slots = [0; 32]; 1077 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1078 &address_manager, 1079 &mut pci_irq_slots, 1080 )?; 1081 1082 let mut pci_segments = vec![PciSegment::new_default_segment( 1083 &address_manager, 1084 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1085 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1086 &pci_irq_slots, 1087 )?]; 1088 1089 for i in 1..num_pci_segments as usize { 1090 pci_segments.push(PciSegment::new( 1091 i as u16, 1092 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1093 &address_manager, 1094 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1095 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1096 &pci_irq_slots, 1097 )?); 1098 } 1099 1100 if dynamic { 1101 let acpi_address = address_manager 1102 .allocator 1103 .lock() 1104 .unwrap() 1105 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1106 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1107 1108 address_manager 1109 .mmio_bus 1110 .insert( 1111 cpu_manager.clone(), 1112 acpi_address.0, 1113 CPU_MANAGER_ACPI_SIZE as u64, 1114 ) 1115 .map_err(DeviceManagerError::BusError)?; 1116 1117 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1118 } 1119 1120 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1121 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1122 for rate_limit_group_cfg in rate_limit_groups_cfg { 1123 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1124 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1125 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1126 let mut rate_limit_group = RateLimiterGroup::new( 1127 &rate_limit_group_cfg.id, 1128 bw.size, 1129 bw.one_time_burst.unwrap_or(0), 1130 bw.refill_time, 1131 ops.size, 1132 ops.one_time_burst.unwrap_or(0), 1133 ops.refill_time, 1134 ) 1135 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1136 1137 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1138 1139 rate_limit_group.start_thread(exit_evt).unwrap(); 1140 rate_limit_groups 1141 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1142 } 1143 } 1144 1145 let device_manager = DeviceManager { 1146 hypervisor_type, 1147 address_manager: Arc::clone(&address_manager), 1148 console: Arc::new(Console::default()), 1149 interrupt_controller: None, 1150 #[cfg(target_arch = "aarch64")] 1151 cmdline_additions: Vec::new(), 1152 ged_notification_device: None, 1153 config, 1154 memory_manager, 1155 cpu_manager, 1156 virtio_devices: Vec::new(), 1157 bus_devices: Vec::new(), 1158 device_id_cnt, 1159 msi_interrupt_manager, 1160 legacy_interrupt_manager: None, 1161 passthrough_device: None, 1162 vfio_container: None, 1163 iommu_device: None, 1164 iommu_mapping: None, 1165 iommu_attached_devices: None, 1166 pci_segments, 1167 device_tree, 1168 exit_evt, 1169 reset_evt, 1170 #[cfg(target_arch = "aarch64")] 1171 id_to_dev_info: HashMap::new(), 1172 seccomp_action, 1173 numa_nodes, 1174 balloon: None, 1175 activate_evt: activate_evt 1176 .try_clone() 1177 .map_err(DeviceManagerError::EventFd)?, 1178 acpi_address, 1179 selected_segment: 0, 1180 serial_pty: None, 1181 serial_manager: None, 1182 console_pty: None, 1183 debug_console_pty: None, 1184 console_resize_pipe: None, 1185 original_termios_opt: Arc::new(Mutex::new(None)), 1186 virtio_mem_devices: Vec::new(), 1187 #[cfg(target_arch = "aarch64")] 1188 gpio_device: None, 1189 pvpanic_device: None, 1190 force_iommu, 1191 io_uring_supported: None, 1192 aio_supported: None, 1193 boot_id_list, 1194 timestamp, 1195 pending_activations: Arc::new(Mutex::new(Vec::default())), 1196 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1197 snapshot, 1198 rate_limit_groups, 1199 }; 1200 1201 let device_manager = Arc::new(Mutex::new(device_manager)); 1202 1203 address_manager 1204 .mmio_bus 1205 .insert( 1206 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1207 acpi_address.0, 1208 DEVICE_MANAGER_ACPI_SIZE as u64, 1209 ) 1210 .map_err(DeviceManagerError::BusError)?; 1211 1212 Ok(device_manager) 1213 } 1214 1215 pub fn serial_pty(&self) -> Option<PtyPair> { 1216 self.serial_pty 1217 .as_ref() 1218 .map(|pty| pty.lock().unwrap().clone()) 1219 } 1220 1221 pub fn console_pty(&self) -> Option<PtyPair> { 1222 self.console_pty 1223 .as_ref() 1224 .map(|pty| pty.lock().unwrap().clone()) 1225 } 1226 1227 pub fn debug_console_pty(&self) -> Option<PtyPair> { 1228 self.debug_console_pty 1229 .as_ref() 1230 .map(|pty| pty.lock().unwrap().clone()) 1231 } 1232 1233 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1234 self.console_resize_pipe.clone() 1235 } 1236 1237 pub fn create_devices( 1238 &mut self, 1239 serial_pty: Option<PtyPair>, 1240 console_pty: Option<PtyPair>, 1241 debug_console_pty: Option<PtyPair>, 1242 console_resize_pipe: Option<File>, 1243 original_termios_opt: Arc<Mutex<Option<termios>>>, 1244 ) -> DeviceManagerResult<()> { 1245 trace_scoped!("create_devices"); 1246 1247 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1248 1249 let interrupt_controller = self.add_interrupt_controller()?; 1250 1251 self.cpu_manager 1252 .lock() 1253 .unwrap() 1254 .set_interrupt_controller(interrupt_controller.clone()); 1255 1256 // Now we can create the legacy interrupt manager, which needs the freshly 1257 // formed IOAPIC device. 1258 let legacy_interrupt_manager: Arc< 1259 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1260 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1261 &interrupt_controller, 1262 ))); 1263 1264 { 1265 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1266 self.address_manager 1267 .mmio_bus 1268 .insert( 1269 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1270 acpi_address.0, 1271 MEMORY_MANAGER_ACPI_SIZE as u64, 1272 ) 1273 .map_err(DeviceManagerError::BusError)?; 1274 } 1275 } 1276 1277 #[cfg(target_arch = "x86_64")] 1278 self.add_legacy_devices( 1279 self.reset_evt 1280 .try_clone() 1281 .map_err(DeviceManagerError::EventFd)?, 1282 )?; 1283 1284 #[cfg(target_arch = "aarch64")] 1285 self.add_legacy_devices(&legacy_interrupt_manager)?; 1286 1287 { 1288 self.ged_notification_device = self.add_acpi_devices( 1289 &legacy_interrupt_manager, 1290 self.reset_evt 1291 .try_clone() 1292 .map_err(DeviceManagerError::EventFd)?, 1293 self.exit_evt 1294 .try_clone() 1295 .map_err(DeviceManagerError::EventFd)?, 1296 )?; 1297 } 1298 1299 self.original_termios_opt = original_termios_opt; 1300 1301 self.console = self.add_console_devices( 1302 &legacy_interrupt_manager, 1303 &mut virtio_devices, 1304 serial_pty, 1305 console_pty, 1306 debug_console_pty, 1307 console_resize_pipe, 1308 )?; 1309 1310 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1311 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1312 self.bus_devices 1313 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1314 } 1315 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1316 1317 virtio_devices.append(&mut self.make_virtio_devices()?); 1318 1319 self.add_pci_devices(virtio_devices.clone())?; 1320 1321 self.virtio_devices = virtio_devices; 1322 1323 if self.config.clone().lock().unwrap().pvpanic { 1324 self.pvpanic_device = self.add_pvpanic_device()?; 1325 } 1326 1327 Ok(()) 1328 } 1329 1330 fn state(&self) -> DeviceManagerState { 1331 DeviceManagerState { 1332 device_tree: self.device_tree.lock().unwrap().clone(), 1333 device_id_cnt: self.device_id_cnt, 1334 } 1335 } 1336 1337 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1338 #[cfg(target_arch = "aarch64")] 1339 { 1340 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1341 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1342 ( 1343 vgic_config.msi_addr, 1344 vgic_config.msi_addr + vgic_config.msi_size - 1, 1345 ) 1346 } 1347 #[cfg(target_arch = "x86_64")] 1348 (0xfee0_0000, 0xfeef_ffff) 1349 } 1350 1351 #[cfg(target_arch = "aarch64")] 1352 /// Gets the information of the devices registered up to some point in time. 1353 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1354 &self.id_to_dev_info 1355 } 1356 1357 #[allow(unused_variables)] 1358 fn add_pci_devices( 1359 &mut self, 1360 virtio_devices: Vec<MetaVirtioDevice>, 1361 ) -> DeviceManagerResult<()> { 1362 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1363 1364 let iommu_device = if self.config.lock().unwrap().iommu { 1365 let (device, mapping) = virtio_devices::Iommu::new( 1366 iommu_id.clone(), 1367 self.seccomp_action.clone(), 1368 self.exit_evt 1369 .try_clone() 1370 .map_err(DeviceManagerError::EventFd)?, 1371 self.get_msi_iova_space(), 1372 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1373 .map_err(DeviceManagerError::RestoreGetState)?, 1374 ) 1375 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1376 let device = Arc::new(Mutex::new(device)); 1377 self.iommu_device = Some(Arc::clone(&device)); 1378 self.iommu_mapping = Some(mapping); 1379 1380 // Fill the device tree with a new node. In case of restore, we 1381 // know there is nothing to do, so we can simply override the 1382 // existing entry. 1383 self.device_tree 1384 .lock() 1385 .unwrap() 1386 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1387 1388 Some(device) 1389 } else { 1390 None 1391 }; 1392 1393 let mut iommu_attached_devices = Vec::new(); 1394 { 1395 for handle in virtio_devices { 1396 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1397 self.iommu_mapping.clone() 1398 } else { 1399 None 1400 }; 1401 1402 let dev_id = self.add_virtio_pci_device( 1403 handle.virtio_device, 1404 &mapping, 1405 handle.id, 1406 handle.pci_segment, 1407 handle.dma_handler, 1408 )?; 1409 1410 if handle.iommu { 1411 iommu_attached_devices.push(dev_id); 1412 } 1413 } 1414 1415 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1416 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1417 1418 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1419 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1420 1421 // Add all devices from forced iommu segments 1422 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1423 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1424 for segment in iommu_segments { 1425 for device in 0..32 { 1426 let bdf = PciBdf::new(*segment, 0, device, 0); 1427 if !iommu_attached_devices.contains(&bdf) { 1428 iommu_attached_devices.push(bdf); 1429 } 1430 } 1431 } 1432 } 1433 } 1434 1435 if let Some(iommu_device) = iommu_device { 1436 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1437 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1438 } 1439 } 1440 1441 for segment in &self.pci_segments { 1442 #[cfg(target_arch = "x86_64")] 1443 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1444 self.bus_devices 1445 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1446 } 1447 1448 self.bus_devices 1449 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1450 } 1451 1452 Ok(()) 1453 } 1454 1455 #[cfg(target_arch = "aarch64")] 1456 fn add_interrupt_controller( 1457 &mut self, 1458 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1459 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1460 gic::Gic::new( 1461 self.config.lock().unwrap().cpus.boot_vcpus, 1462 Arc::clone(&self.msi_interrupt_manager), 1463 self.address_manager.vm.clone(), 1464 ) 1465 .map_err(DeviceManagerError::CreateInterruptController)?, 1466 )); 1467 1468 self.interrupt_controller = Some(interrupt_controller.clone()); 1469 1470 // Restore the vGic if this is in the process of restoration 1471 let id = String::from(gic::GIC_SNAPSHOT_ID); 1472 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1473 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1474 if self 1475 .cpu_manager 1476 .lock() 1477 .unwrap() 1478 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1479 .is_err() 1480 { 1481 info!("Failed to initialize PMU"); 1482 } 1483 1484 let vgic_state = vgic_snapshot 1485 .to_state() 1486 .map_err(DeviceManagerError::RestoreGetState)?; 1487 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1488 interrupt_controller 1489 .lock() 1490 .unwrap() 1491 .restore_vgic(vgic_state, &saved_vcpu_states) 1492 .unwrap(); 1493 } 1494 1495 self.device_tree 1496 .lock() 1497 .unwrap() 1498 .insert(id.clone(), device_node!(id, interrupt_controller)); 1499 1500 Ok(interrupt_controller) 1501 } 1502 1503 #[cfg(target_arch = "aarch64")] 1504 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1505 self.interrupt_controller.as_ref() 1506 } 1507 1508 #[cfg(target_arch = "x86_64")] 1509 fn add_interrupt_controller( 1510 &mut self, 1511 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1512 let id = String::from(IOAPIC_DEVICE_NAME); 1513 1514 // Create IOAPIC 1515 let interrupt_controller = Arc::new(Mutex::new( 1516 ioapic::Ioapic::new( 1517 id.clone(), 1518 APIC_START, 1519 Arc::clone(&self.msi_interrupt_manager), 1520 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1521 .map_err(DeviceManagerError::RestoreGetState)?, 1522 ) 1523 .map_err(DeviceManagerError::CreateInterruptController)?, 1524 )); 1525 1526 self.interrupt_controller = Some(interrupt_controller.clone()); 1527 1528 self.address_manager 1529 .mmio_bus 1530 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1531 .map_err(DeviceManagerError::BusError)?; 1532 1533 self.bus_devices 1534 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1535 1536 // Fill the device tree with a new node. In case of restore, we 1537 // know there is nothing to do, so we can simply override the 1538 // existing entry. 1539 self.device_tree 1540 .lock() 1541 .unwrap() 1542 .insert(id.clone(), device_node!(id, interrupt_controller)); 1543 1544 Ok(interrupt_controller) 1545 } 1546 1547 fn add_acpi_devices( 1548 &mut self, 1549 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1550 reset_evt: EventFd, 1551 exit_evt: EventFd, 1552 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1553 let vcpus_kill_signalled = self 1554 .cpu_manager 1555 .lock() 1556 .unwrap() 1557 .vcpus_kill_signalled() 1558 .clone(); 1559 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1560 exit_evt, 1561 reset_evt, 1562 vcpus_kill_signalled, 1563 ))); 1564 1565 self.bus_devices 1566 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1567 1568 #[cfg(target_arch = "x86_64")] 1569 { 1570 let shutdown_pio_address: u16 = 0x600; 1571 1572 self.address_manager 1573 .allocator 1574 .lock() 1575 .unwrap() 1576 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1577 .ok_or(DeviceManagerError::AllocateIoPort)?; 1578 1579 self.address_manager 1580 .io_bus 1581 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1582 .map_err(DeviceManagerError::BusError)?; 1583 1584 self.acpi_platform_addresses.sleep_control_reg_address = 1585 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1586 self.acpi_platform_addresses.sleep_status_reg_address = 1587 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1588 self.acpi_platform_addresses.reset_reg_address = 1589 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1590 } 1591 1592 let ged_irq = self 1593 .address_manager 1594 .allocator 1595 .lock() 1596 .unwrap() 1597 .allocate_irq() 1598 .unwrap(); 1599 let interrupt_group = interrupt_manager 1600 .create_group(LegacyIrqGroupConfig { 1601 irq: ged_irq as InterruptIndex, 1602 }) 1603 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1604 let ged_address = self 1605 .address_manager 1606 .allocator 1607 .lock() 1608 .unwrap() 1609 .allocate_platform_mmio_addresses( 1610 None, 1611 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1612 None, 1613 ) 1614 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1615 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1616 interrupt_group, 1617 ged_irq, 1618 ged_address, 1619 ))); 1620 self.address_manager 1621 .mmio_bus 1622 .insert( 1623 ged_device.clone(), 1624 ged_address.0, 1625 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1626 ) 1627 .map_err(DeviceManagerError::BusError)?; 1628 self.bus_devices 1629 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1630 1631 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1632 1633 self.bus_devices 1634 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1635 1636 #[cfg(target_arch = "x86_64")] 1637 { 1638 let pm_timer_pio_address: u16 = 0x608; 1639 1640 self.address_manager 1641 .allocator 1642 .lock() 1643 .unwrap() 1644 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1645 .ok_or(DeviceManagerError::AllocateIoPort)?; 1646 1647 self.address_manager 1648 .io_bus 1649 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1650 .map_err(DeviceManagerError::BusError)?; 1651 1652 self.acpi_platform_addresses.pm_timer_address = 1653 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1654 } 1655 1656 Ok(Some(ged_device)) 1657 } 1658 1659 #[cfg(target_arch = "x86_64")] 1660 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1661 let vcpus_kill_signalled = self 1662 .cpu_manager 1663 .lock() 1664 .unwrap() 1665 .vcpus_kill_signalled() 1666 .clone(); 1667 // Add a shutdown device (i8042) 1668 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1669 reset_evt.try_clone().unwrap(), 1670 vcpus_kill_signalled.clone(), 1671 ))); 1672 1673 self.bus_devices 1674 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1675 1676 self.address_manager 1677 .io_bus 1678 .insert(i8042, 0x61, 0x4) 1679 .map_err(DeviceManagerError::BusError)?; 1680 { 1681 // Add a CMOS emulated device 1682 let mem_size = self 1683 .memory_manager 1684 .lock() 1685 .unwrap() 1686 .guest_memory() 1687 .memory() 1688 .last_addr() 1689 .0 1690 + 1; 1691 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1692 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1693 1694 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1695 mem_below_4g, 1696 mem_above_4g, 1697 reset_evt, 1698 Some(vcpus_kill_signalled), 1699 ))); 1700 1701 self.bus_devices 1702 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1703 1704 self.address_manager 1705 .io_bus 1706 .insert(cmos, 0x70, 0x2) 1707 .map_err(DeviceManagerError::BusError)?; 1708 1709 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1710 1711 self.bus_devices 1712 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1713 1714 self.address_manager 1715 .io_bus 1716 .insert(fwdebug, 0x402, 0x1) 1717 .map_err(DeviceManagerError::BusError)?; 1718 } 1719 1720 // 0x80 debug port 1721 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1722 self.bus_devices 1723 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1724 self.address_manager 1725 .io_bus 1726 .insert(debug_port, 0x80, 0x1) 1727 .map_err(DeviceManagerError::BusError)?; 1728 1729 Ok(()) 1730 } 1731 1732 #[cfg(target_arch = "aarch64")] 1733 fn add_legacy_devices( 1734 &mut self, 1735 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1736 ) -> DeviceManagerResult<()> { 1737 // Add a RTC device 1738 let rtc_irq = self 1739 .address_manager 1740 .allocator 1741 .lock() 1742 .unwrap() 1743 .allocate_irq() 1744 .unwrap(); 1745 1746 let interrupt_group = interrupt_manager 1747 .create_group(LegacyIrqGroupConfig { 1748 irq: rtc_irq as InterruptIndex, 1749 }) 1750 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1751 1752 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1753 1754 self.bus_devices 1755 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1756 1757 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1758 1759 self.address_manager 1760 .mmio_bus 1761 .insert(rtc_device, addr.0, MMIO_LEN) 1762 .map_err(DeviceManagerError::BusError)?; 1763 1764 self.id_to_dev_info.insert( 1765 (DeviceType::Rtc, "rtc".to_string()), 1766 MmioDeviceInfo { 1767 addr: addr.0, 1768 len: MMIO_LEN, 1769 irq: rtc_irq, 1770 }, 1771 ); 1772 1773 // Add a GPIO device 1774 let id = String::from(GPIO_DEVICE_NAME); 1775 let gpio_irq = self 1776 .address_manager 1777 .allocator 1778 .lock() 1779 .unwrap() 1780 .allocate_irq() 1781 .unwrap(); 1782 1783 let interrupt_group = interrupt_manager 1784 .create_group(LegacyIrqGroupConfig { 1785 irq: gpio_irq as InterruptIndex, 1786 }) 1787 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1788 1789 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1790 id.clone(), 1791 interrupt_group, 1792 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1793 .map_err(DeviceManagerError::RestoreGetState)?, 1794 ))); 1795 1796 self.bus_devices 1797 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1798 1799 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1800 1801 self.address_manager 1802 .mmio_bus 1803 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1804 .map_err(DeviceManagerError::BusError)?; 1805 1806 self.gpio_device = Some(gpio_device.clone()); 1807 1808 self.id_to_dev_info.insert( 1809 (DeviceType::Gpio, "gpio".to_string()), 1810 MmioDeviceInfo { 1811 addr: addr.0, 1812 len: MMIO_LEN, 1813 irq: gpio_irq, 1814 }, 1815 ); 1816 1817 self.device_tree 1818 .lock() 1819 .unwrap() 1820 .insert(id.clone(), device_node!(id, gpio_device)); 1821 1822 Ok(()) 1823 } 1824 1825 #[cfg(target_arch = "x86_64")] 1826 fn add_debug_console_device( 1827 &mut self, 1828 debug_console_writer: Box<dyn io::Write + Send>, 1829 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1830 let id = String::from(DEBUGCON_DEVICE_NAME); 1831 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1832 id.clone(), 1833 debug_console_writer, 1834 ))); 1835 1836 let port = self 1837 .config 1838 .lock() 1839 .unwrap() 1840 .debug_console 1841 .clone() 1842 .iobase 1843 .map(|port| port as u64) 1844 .unwrap_or(debug_console::DEFAULT_PORT); 1845 1846 self.bus_devices 1847 .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>); 1848 1849 self.address_manager 1850 .allocator 1851 .lock() 1852 .unwrap() 1853 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1854 .ok_or(DeviceManagerError::AllocateIoPort)?; 1855 1856 self.address_manager 1857 .io_bus 1858 .insert(debug_console.clone(), port, 0x1) 1859 .map_err(DeviceManagerError::BusError)?; 1860 1861 // Fill the device tree with a new node. In case of restore, we 1862 // know there is nothing to do, so we can simply override the 1863 // existing entry. 1864 self.device_tree 1865 .lock() 1866 .unwrap() 1867 .insert(id.clone(), device_node!(id, debug_console)); 1868 1869 Ok(debug_console) 1870 } 1871 1872 #[cfg(target_arch = "x86_64")] 1873 fn add_serial_device( 1874 &mut self, 1875 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1876 serial_writer: Option<Box<dyn io::Write + Send>>, 1877 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1878 // Serial is tied to IRQ #4 1879 let serial_irq = 4; 1880 1881 let id = String::from(SERIAL_DEVICE_NAME); 1882 1883 let interrupt_group = interrupt_manager 1884 .create_group(LegacyIrqGroupConfig { 1885 irq: serial_irq as InterruptIndex, 1886 }) 1887 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1888 1889 let serial = Arc::new(Mutex::new(Serial::new( 1890 id.clone(), 1891 interrupt_group, 1892 serial_writer, 1893 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1894 .map_err(DeviceManagerError::RestoreGetState)?, 1895 ))); 1896 1897 self.bus_devices 1898 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1899 1900 self.address_manager 1901 .allocator 1902 .lock() 1903 .unwrap() 1904 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1905 .ok_or(DeviceManagerError::AllocateIoPort)?; 1906 1907 self.address_manager 1908 .io_bus 1909 .insert(serial.clone(), 0x3f8, 0x8) 1910 .map_err(DeviceManagerError::BusError)?; 1911 1912 // Fill the device tree with a new node. In case of restore, we 1913 // know there is nothing to do, so we can simply override the 1914 // existing entry. 1915 self.device_tree 1916 .lock() 1917 .unwrap() 1918 .insert(id.clone(), device_node!(id, serial)); 1919 1920 Ok(serial) 1921 } 1922 1923 #[cfg(target_arch = "aarch64")] 1924 fn add_serial_device( 1925 &mut self, 1926 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1927 serial_writer: Option<Box<dyn io::Write + Send>>, 1928 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1929 let id = String::from(SERIAL_DEVICE_NAME); 1930 1931 let serial_irq = self 1932 .address_manager 1933 .allocator 1934 .lock() 1935 .unwrap() 1936 .allocate_irq() 1937 .unwrap(); 1938 1939 let interrupt_group = interrupt_manager 1940 .create_group(LegacyIrqGroupConfig { 1941 irq: serial_irq as InterruptIndex, 1942 }) 1943 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1944 1945 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1946 id.clone(), 1947 interrupt_group, 1948 serial_writer, 1949 self.timestamp, 1950 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1951 .map_err(DeviceManagerError::RestoreGetState)?, 1952 ))); 1953 1954 self.bus_devices 1955 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1956 1957 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1958 1959 self.address_manager 1960 .mmio_bus 1961 .insert(serial.clone(), addr.0, MMIO_LEN) 1962 .map_err(DeviceManagerError::BusError)?; 1963 1964 self.id_to_dev_info.insert( 1965 (DeviceType::Serial, DeviceType::Serial.to_string()), 1966 MmioDeviceInfo { 1967 addr: addr.0, 1968 len: MMIO_LEN, 1969 irq: serial_irq, 1970 }, 1971 ); 1972 1973 self.cmdline_additions 1974 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1975 1976 // Fill the device tree with a new node. In case of restore, we 1977 // know there is nothing to do, so we can simply override the 1978 // existing entry. 1979 self.device_tree 1980 .lock() 1981 .unwrap() 1982 .insert(id.clone(), device_node!(id, serial)); 1983 1984 Ok(serial) 1985 } 1986 1987 fn modify_mode<F: FnOnce(&mut termios)>( 1988 &mut self, 1989 fd: RawFd, 1990 f: F, 1991 ) -> vmm_sys_util::errno::Result<()> { 1992 // SAFETY: safe because we check the return value of isatty. 1993 if unsafe { isatty(fd) } != 1 { 1994 return Ok(()); 1995 } 1996 1997 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1998 // and we check the return result. 1999 let mut termios: termios = unsafe { zeroed() }; 2000 // SAFETY: see above 2001 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 2002 if ret < 0 { 2003 return vmm_sys_util::errno::errno_result(); 2004 } 2005 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 2006 if original_termios_opt.is_none() { 2007 *original_termios_opt = Some(termios); 2008 } 2009 f(&mut termios); 2010 // SAFETY: Safe because the syscall will only read the extent of termios and we check 2011 // the return result. 2012 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 2013 if ret < 0 { 2014 return vmm_sys_util::errno::errno_result(); 2015 } 2016 2017 Ok(()) 2018 } 2019 2020 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 2021 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 2022 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 2023 } 2024 2025 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 2026 let seccomp_filter = get_seccomp_filter( 2027 &self.seccomp_action, 2028 Thread::PtyForeground, 2029 self.hypervisor_type, 2030 ) 2031 .unwrap(); 2032 2033 self.console_resize_pipe = 2034 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 2035 2036 Ok(()) 2037 } 2038 2039 fn add_virtio_console_device( 2040 &mut self, 2041 virtio_devices: &mut Vec<MetaVirtioDevice>, 2042 console_pty: Option<PtyPair>, 2043 resize_pipe: Option<File>, 2044 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2045 let console_config = self.config.lock().unwrap().console.clone(); 2046 let endpoint = match console_config.mode { 2047 ConsoleOutputMode::File => { 2048 let file = File::create(console_config.file.as_ref().unwrap()) 2049 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 2050 Endpoint::File(file) 2051 } 2052 ConsoleOutputMode::Pty => { 2053 if let Some(pty) = console_pty { 2054 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 2055 let file = pty.main.try_clone().unwrap(); 2056 self.console_pty = Some(Arc::new(Mutex::new(pty))); 2057 self.console_resize_pipe = resize_pipe.map(Arc::new); 2058 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2059 } else { 2060 let (main, sub, path) = 2061 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 2062 self.set_raw_mode(&sub) 2063 .map_err(DeviceManagerError::SetPtyRaw)?; 2064 self.config.lock().unwrap().console.file = Some(path.clone()); 2065 let file = main.try_clone().unwrap(); 2066 assert!(resize_pipe.is_none()); 2067 self.listen_for_sigwinch_on_tty(sub).unwrap(); 2068 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2069 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2070 } 2071 } 2072 ConsoleOutputMode::Tty => { 2073 // Duplicating the file descriptors like this is needed as otherwise 2074 // they will be closed on a reboot and the numbers reused 2075 2076 // SAFETY: FFI call to dup. Trivially safe. 2077 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 2078 if stdout == -1 { 2079 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 2080 } 2081 // SAFETY: stdout is valid and owned solely by us. 2082 let stdout = unsafe { File::from_raw_fd(stdout) }; 2083 2084 // Make sure stdout is in raw mode, if it's a terminal. 2085 let _ = self.set_raw_mode(&stdout); 2086 2087 // SAFETY: FFI call. Trivially safe. 2088 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 2089 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 2090 .unwrap(); 2091 } 2092 2093 // If an interactive TTY then we can accept input 2094 // SAFETY: FFI call. Trivially safe. 2095 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2096 // SAFETY: FFI call to dup. Trivially safe. 2097 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2098 if stdin == -1 { 2099 return vmm_sys_util::errno::errno_result() 2100 .map_err(DeviceManagerError::DupFd); 2101 } 2102 // SAFETY: stdin is valid and owned solely by us. 2103 let stdin = unsafe { File::from_raw_fd(stdin) }; 2104 2105 Endpoint::FilePair(stdout, stdin) 2106 } else { 2107 Endpoint::File(stdout) 2108 } 2109 } 2110 ConsoleOutputMode::Socket => { 2111 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2112 } 2113 ConsoleOutputMode::Null => Endpoint::Null, 2114 ConsoleOutputMode::Off => return Ok(None), 2115 }; 2116 let id = String::from(CONSOLE_DEVICE_NAME); 2117 2118 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2119 id.clone(), 2120 endpoint, 2121 self.console_resize_pipe 2122 .as_ref() 2123 .map(|p| p.try_clone().unwrap()), 2124 self.force_iommu | console_config.iommu, 2125 self.seccomp_action.clone(), 2126 self.exit_evt 2127 .try_clone() 2128 .map_err(DeviceManagerError::EventFd)?, 2129 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2130 .map_err(DeviceManagerError::RestoreGetState)?, 2131 ) 2132 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2133 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2134 virtio_devices.push(MetaVirtioDevice { 2135 virtio_device: Arc::clone(&virtio_console_device) 2136 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2137 iommu: console_config.iommu, 2138 id: id.clone(), 2139 pci_segment: 0, 2140 dma_handler: None, 2141 }); 2142 2143 // Fill the device tree with a new node. In case of restore, we 2144 // know there is nothing to do, so we can simply override the 2145 // existing entry. 2146 self.device_tree 2147 .lock() 2148 .unwrap() 2149 .insert(id.clone(), device_node!(id, virtio_console_device)); 2150 2151 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2152 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2153 Some(console_resizer) 2154 } else { 2155 None 2156 }) 2157 } 2158 2159 /// Adds all devices that behave like a console with respect to the VM 2160 /// configuration. This includes: 2161 /// - debug-console 2162 /// - serial-console 2163 /// - virtio-console 2164 fn add_console_devices( 2165 &mut self, 2166 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2167 virtio_devices: &mut Vec<MetaVirtioDevice>, 2168 serial_pty: Option<PtyPair>, 2169 console_pty: Option<PtyPair>, 2170 #[cfg(target_arch = "x86_64")] debug_console_pty: Option<PtyPair>, 2171 #[cfg(not(target_arch = "x86_64"))] _: Option<PtyPair>, 2172 console_resize_pipe: Option<File>, 2173 ) -> DeviceManagerResult<Arc<Console>> { 2174 let serial_config = self.config.lock().unwrap().serial.clone(); 2175 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2176 ConsoleOutputMode::File => Some(Box::new( 2177 File::create(serial_config.file.as_ref().unwrap()) 2178 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2179 )), 2180 ConsoleOutputMode::Pty => { 2181 if let Some(pty) = serial_pty.clone() { 2182 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2183 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2184 } else { 2185 let (main, sub, path) = 2186 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2187 self.set_raw_mode(&sub) 2188 .map_err(DeviceManagerError::SetPtyRaw)?; 2189 self.config.lock().unwrap().serial.file = Some(path.clone()); 2190 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2191 } 2192 None 2193 } 2194 ConsoleOutputMode::Tty => { 2195 let out = stdout(); 2196 let _ = self.set_raw_mode(&out); 2197 Some(Box::new(out)) 2198 } 2199 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2200 }; 2201 if serial_config.mode != ConsoleOutputMode::Off { 2202 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2203 self.serial_manager = match serial_config.mode { 2204 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2205 let serial_manager = SerialManager::new( 2206 serial, 2207 self.serial_pty.clone(), 2208 serial_config.mode, 2209 serial_config.socket, 2210 ) 2211 .map_err(DeviceManagerError::CreateSerialManager)?; 2212 if let Some(mut serial_manager) = serial_manager { 2213 serial_manager 2214 .start_thread( 2215 self.exit_evt 2216 .try_clone() 2217 .map_err(DeviceManagerError::EventFd)?, 2218 ) 2219 .map_err(DeviceManagerError::SpawnSerialManager)?; 2220 Some(Arc::new(serial_manager)) 2221 } else { 2222 None 2223 } 2224 } 2225 _ => None, 2226 }; 2227 } 2228 2229 #[cfg(target_arch = "x86_64")] 2230 { 2231 let debug_console_config = self.config.lock().unwrap().debug_console.clone(); 2232 let debug_console_writer: Option<Box<dyn io::Write + Send>> = match debug_console_config 2233 .mode 2234 { 2235 ConsoleOutputMode::File => Some(Box::new( 2236 File::create(debug_console_config.file.as_ref().unwrap()) 2237 .map_err(DeviceManagerError::DebugconOutputFileOpen)?, 2238 )), 2239 ConsoleOutputMode::Pty => { 2240 if let Some(pty) = debug_console_pty { 2241 self.config.lock().unwrap().debug_console.file = Some(pty.path.clone()); 2242 self.debug_console_pty = Some(Arc::new(Mutex::new(pty))); 2243 } else { 2244 let (main, sub, path) = 2245 create_pty().map_err(DeviceManagerError::DebugconPtyOpen)?; 2246 self.set_raw_mode(&sub) 2247 .map_err(DeviceManagerError::SetPtyRaw)?; 2248 self.config.lock().unwrap().debug_console.file = Some(path.clone()); 2249 self.debug_console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2250 } 2251 None 2252 } 2253 ConsoleOutputMode::Tty => { 2254 let out = stdout(); 2255 let _ = self.set_raw_mode(&out); 2256 Some(Box::new(out)) 2257 } 2258 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => { 2259 None 2260 } 2261 }; 2262 if let Some(writer) = debug_console_writer { 2263 let _ = self.add_debug_console_device(writer)?; 2264 } 2265 } 2266 2267 let console_resizer = 2268 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2269 2270 Ok(Arc::new(Console { console_resizer })) 2271 } 2272 2273 fn add_tpm_device( 2274 &mut self, 2275 tpm_path: PathBuf, 2276 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2277 // Create TPM Device 2278 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2279 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2280 })?; 2281 let tpm = Arc::new(Mutex::new(tpm)); 2282 2283 // Add TPM Device to mmio 2284 self.address_manager 2285 .mmio_bus 2286 .insert( 2287 tpm.clone(), 2288 arch::layout::TPM_START.0, 2289 arch::layout::TPM_SIZE, 2290 ) 2291 .map_err(DeviceManagerError::BusError)?; 2292 2293 Ok(tpm) 2294 } 2295 2296 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2297 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2298 2299 // Create "standard" virtio devices (net/block/rng) 2300 devices.append(&mut self.make_virtio_block_devices()?); 2301 devices.append(&mut self.make_virtio_net_devices()?); 2302 devices.append(&mut self.make_virtio_rng_devices()?); 2303 2304 // Add virtio-fs if required 2305 devices.append(&mut self.make_virtio_fs_devices()?); 2306 2307 // Add virtio-pmem if required 2308 devices.append(&mut self.make_virtio_pmem_devices()?); 2309 2310 // Add virtio-vsock if required 2311 devices.append(&mut self.make_virtio_vsock_devices()?); 2312 2313 devices.append(&mut self.make_virtio_mem_devices()?); 2314 2315 // Add virtio-balloon if required 2316 devices.append(&mut self.make_virtio_balloon_devices()?); 2317 2318 // Add virtio-watchdog device 2319 devices.append(&mut self.make_virtio_watchdog_devices()?); 2320 2321 // Add vDPA devices if required 2322 devices.append(&mut self.make_vdpa_devices()?); 2323 2324 Ok(devices) 2325 } 2326 2327 // Cache whether aio is supported to avoid checking for very block device 2328 fn aio_is_supported(&mut self) -> bool { 2329 if let Some(supported) = self.aio_supported { 2330 return supported; 2331 } 2332 2333 let supported = block_aio_is_supported(); 2334 self.aio_supported = Some(supported); 2335 supported 2336 } 2337 2338 // Cache whether io_uring is supported to avoid probing for very block device 2339 fn io_uring_is_supported(&mut self) -> bool { 2340 if let Some(supported) = self.io_uring_supported { 2341 return supported; 2342 } 2343 2344 let supported = block_io_uring_is_supported(); 2345 self.io_uring_supported = Some(supported); 2346 supported 2347 } 2348 2349 fn make_virtio_block_device( 2350 &mut self, 2351 disk_cfg: &mut DiskConfig, 2352 ) -> DeviceManagerResult<MetaVirtioDevice> { 2353 let id = if let Some(id) = &disk_cfg.id { 2354 id.clone() 2355 } else { 2356 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2357 disk_cfg.id = Some(id.clone()); 2358 id 2359 }; 2360 2361 info!("Creating virtio-block device: {:?}", disk_cfg); 2362 2363 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2364 2365 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2366 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2367 let vu_cfg = VhostUserConfig { 2368 socket, 2369 num_queues: disk_cfg.num_queues, 2370 queue_size: disk_cfg.queue_size, 2371 }; 2372 let vhost_user_block = Arc::new(Mutex::new( 2373 match virtio_devices::vhost_user::Blk::new( 2374 id.clone(), 2375 vu_cfg, 2376 self.seccomp_action.clone(), 2377 self.exit_evt 2378 .try_clone() 2379 .map_err(DeviceManagerError::EventFd)?, 2380 self.force_iommu, 2381 snapshot 2382 .map(|s| s.to_versioned_state()) 2383 .transpose() 2384 .map_err(DeviceManagerError::RestoreGetState)?, 2385 ) { 2386 Ok(vub_device) => vub_device, 2387 Err(e) => { 2388 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2389 } 2390 }, 2391 )); 2392 2393 ( 2394 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2395 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2396 ) 2397 } else { 2398 let mut options = OpenOptions::new(); 2399 options.read(true); 2400 options.write(!disk_cfg.readonly); 2401 if disk_cfg.direct { 2402 options.custom_flags(libc::O_DIRECT); 2403 } 2404 // Open block device path 2405 let mut file: File = options 2406 .open( 2407 disk_cfg 2408 .path 2409 .as_ref() 2410 .ok_or(DeviceManagerError::NoDiskPath)? 2411 .clone(), 2412 ) 2413 .map_err(DeviceManagerError::Disk)?; 2414 let image_type = 2415 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2416 2417 let image = match image_type { 2418 ImageType::FixedVhd => { 2419 // Use asynchronous backend relying on io_uring if the 2420 // syscalls are supported. 2421 if cfg!(feature = "io_uring") 2422 && !disk_cfg.disable_io_uring 2423 && self.io_uring_is_supported() 2424 { 2425 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2426 2427 #[cfg(not(feature = "io_uring"))] 2428 unreachable!("Checked in if statement above"); 2429 #[cfg(feature = "io_uring")] 2430 { 2431 Box::new( 2432 FixedVhdDiskAsync::new(file) 2433 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2434 ) as Box<dyn DiskFile> 2435 } 2436 } else { 2437 info!("Using synchronous fixed VHD disk file"); 2438 Box::new( 2439 FixedVhdDiskSync::new(file) 2440 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2441 ) as Box<dyn DiskFile> 2442 } 2443 } 2444 ImageType::Raw => { 2445 // Use asynchronous backend relying on io_uring if the 2446 // syscalls are supported. 2447 if cfg!(feature = "io_uring") 2448 && !disk_cfg.disable_io_uring 2449 && self.io_uring_is_supported() 2450 { 2451 info!("Using asynchronous RAW disk file (io_uring)"); 2452 2453 #[cfg(not(feature = "io_uring"))] 2454 unreachable!("Checked in if statement above"); 2455 #[cfg(feature = "io_uring")] 2456 { 2457 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2458 } 2459 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2460 info!("Using asynchronous RAW disk file (aio)"); 2461 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2462 } else { 2463 info!("Using synchronous RAW disk file"); 2464 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2465 } 2466 } 2467 ImageType::Qcow2 => { 2468 info!("Using synchronous QCOW disk file"); 2469 Box::new( 2470 QcowDiskSync::new(file, disk_cfg.direct) 2471 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2472 ) as Box<dyn DiskFile> 2473 } 2474 ImageType::Vhdx => { 2475 info!("Using synchronous VHDX disk file"); 2476 Box::new( 2477 VhdxDiskSync::new(file) 2478 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2479 ) as Box<dyn DiskFile> 2480 } 2481 }; 2482 2483 let rate_limit_group = 2484 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2485 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2486 // is dropped. 2487 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2488 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2489 let mut rate_limit_group = RateLimiterGroup::new( 2490 disk_cfg.id.as_ref().unwrap(), 2491 bw.size, 2492 bw.one_time_burst.unwrap_or(0), 2493 bw.refill_time, 2494 ops.size, 2495 ops.one_time_burst.unwrap_or(0), 2496 ops.refill_time, 2497 ) 2498 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2499 2500 rate_limit_group 2501 .start_thread( 2502 self.exit_evt 2503 .try_clone() 2504 .map_err(DeviceManagerError::EventFd)?, 2505 ) 2506 .unwrap(); 2507 2508 Some(Arc::new(rate_limit_group)) 2509 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2510 self.rate_limit_groups.get(rate_limit_group).cloned() 2511 } else { 2512 None 2513 }; 2514 2515 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2516 queue_affinity 2517 .iter() 2518 .map(|a| (a.queue_index, a.host_cpus.clone())) 2519 .collect() 2520 } else { 2521 BTreeMap::new() 2522 }; 2523 2524 let virtio_block = Arc::new(Mutex::new( 2525 virtio_devices::Block::new( 2526 id.clone(), 2527 image, 2528 disk_cfg 2529 .path 2530 .as_ref() 2531 .ok_or(DeviceManagerError::NoDiskPath)? 2532 .clone(), 2533 disk_cfg.readonly, 2534 self.force_iommu | disk_cfg.iommu, 2535 disk_cfg.num_queues, 2536 disk_cfg.queue_size, 2537 disk_cfg.serial.clone(), 2538 self.seccomp_action.clone(), 2539 rate_limit_group, 2540 self.exit_evt 2541 .try_clone() 2542 .map_err(DeviceManagerError::EventFd)?, 2543 snapshot 2544 .map(|s| s.to_versioned_state()) 2545 .transpose() 2546 .map_err(DeviceManagerError::RestoreGetState)?, 2547 queue_affinity, 2548 ) 2549 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2550 )); 2551 2552 ( 2553 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2554 virtio_block as Arc<Mutex<dyn Migratable>>, 2555 ) 2556 }; 2557 2558 // Fill the device tree with a new node. In case of restore, we 2559 // know there is nothing to do, so we can simply override the 2560 // existing entry. 2561 self.device_tree 2562 .lock() 2563 .unwrap() 2564 .insert(id.clone(), device_node!(id, migratable_device)); 2565 2566 Ok(MetaVirtioDevice { 2567 virtio_device, 2568 iommu: disk_cfg.iommu, 2569 id, 2570 pci_segment: disk_cfg.pci_segment, 2571 dma_handler: None, 2572 }) 2573 } 2574 2575 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2576 let mut devices = Vec::new(); 2577 2578 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2579 if let Some(disk_list_cfg) = &mut block_devices { 2580 for disk_cfg in disk_list_cfg.iter_mut() { 2581 devices.push(self.make_virtio_block_device(disk_cfg)?); 2582 } 2583 } 2584 self.config.lock().unwrap().disks = block_devices; 2585 2586 Ok(devices) 2587 } 2588 2589 fn make_virtio_net_device( 2590 &mut self, 2591 net_cfg: &mut NetConfig, 2592 ) -> DeviceManagerResult<MetaVirtioDevice> { 2593 let id = if let Some(id) = &net_cfg.id { 2594 id.clone() 2595 } else { 2596 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2597 net_cfg.id = Some(id.clone()); 2598 id 2599 }; 2600 info!("Creating virtio-net device: {:?}", net_cfg); 2601 2602 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2603 2604 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2605 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2606 let vu_cfg = VhostUserConfig { 2607 socket, 2608 num_queues: net_cfg.num_queues, 2609 queue_size: net_cfg.queue_size, 2610 }; 2611 let server = match net_cfg.vhost_mode { 2612 VhostMode::Client => false, 2613 VhostMode::Server => true, 2614 }; 2615 let vhost_user_net = Arc::new(Mutex::new( 2616 match virtio_devices::vhost_user::Net::new( 2617 id.clone(), 2618 net_cfg.mac, 2619 net_cfg.mtu, 2620 vu_cfg, 2621 server, 2622 self.seccomp_action.clone(), 2623 self.exit_evt 2624 .try_clone() 2625 .map_err(DeviceManagerError::EventFd)?, 2626 self.force_iommu, 2627 snapshot 2628 .map(|s| s.to_versioned_state()) 2629 .transpose() 2630 .map_err(DeviceManagerError::RestoreGetState)?, 2631 net_cfg.offload_tso, 2632 net_cfg.offload_ufo, 2633 net_cfg.offload_csum, 2634 ) { 2635 Ok(vun_device) => vun_device, 2636 Err(e) => { 2637 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2638 } 2639 }, 2640 )); 2641 2642 ( 2643 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2644 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2645 ) 2646 } else { 2647 let state = snapshot 2648 .map(|s| s.to_versioned_state()) 2649 .transpose() 2650 .map_err(DeviceManagerError::RestoreGetState)?; 2651 2652 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2653 Arc::new(Mutex::new( 2654 virtio_devices::Net::new( 2655 id.clone(), 2656 Some(tap_if_name), 2657 Some(net_cfg.ip), 2658 Some(net_cfg.mask), 2659 Some(net_cfg.mac), 2660 &mut net_cfg.host_mac, 2661 net_cfg.mtu, 2662 self.force_iommu | net_cfg.iommu, 2663 net_cfg.num_queues, 2664 net_cfg.queue_size, 2665 self.seccomp_action.clone(), 2666 net_cfg.rate_limiter_config, 2667 self.exit_evt 2668 .try_clone() 2669 .map_err(DeviceManagerError::EventFd)?, 2670 state, 2671 net_cfg.offload_tso, 2672 net_cfg.offload_ufo, 2673 net_cfg.offload_csum, 2674 ) 2675 .map_err(DeviceManagerError::CreateVirtioNet)?, 2676 )) 2677 } else if let Some(fds) = &net_cfg.fds { 2678 let net = virtio_devices::Net::from_tap_fds( 2679 id.clone(), 2680 fds, 2681 Some(net_cfg.mac), 2682 net_cfg.mtu, 2683 self.force_iommu | net_cfg.iommu, 2684 net_cfg.queue_size, 2685 self.seccomp_action.clone(), 2686 net_cfg.rate_limiter_config, 2687 self.exit_evt 2688 .try_clone() 2689 .map_err(DeviceManagerError::EventFd)?, 2690 state, 2691 net_cfg.offload_tso, 2692 net_cfg.offload_ufo, 2693 net_cfg.offload_csum, 2694 ) 2695 .map_err(DeviceManagerError::CreateVirtioNet)?; 2696 2697 // SAFETY: 'fds' are valid because TAP devices are created successfully 2698 unsafe { 2699 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2700 } 2701 2702 Arc::new(Mutex::new(net)) 2703 } else { 2704 Arc::new(Mutex::new( 2705 virtio_devices::Net::new( 2706 id.clone(), 2707 None, 2708 Some(net_cfg.ip), 2709 Some(net_cfg.mask), 2710 Some(net_cfg.mac), 2711 &mut net_cfg.host_mac, 2712 net_cfg.mtu, 2713 self.force_iommu | net_cfg.iommu, 2714 net_cfg.num_queues, 2715 net_cfg.queue_size, 2716 self.seccomp_action.clone(), 2717 net_cfg.rate_limiter_config, 2718 self.exit_evt 2719 .try_clone() 2720 .map_err(DeviceManagerError::EventFd)?, 2721 state, 2722 net_cfg.offload_tso, 2723 net_cfg.offload_ufo, 2724 net_cfg.offload_csum, 2725 ) 2726 .map_err(DeviceManagerError::CreateVirtioNet)?, 2727 )) 2728 }; 2729 2730 ( 2731 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2732 virtio_net as Arc<Mutex<dyn Migratable>>, 2733 ) 2734 }; 2735 2736 // Fill the device tree with a new node. In case of restore, we 2737 // know there is nothing to do, so we can simply override the 2738 // existing entry. 2739 self.device_tree 2740 .lock() 2741 .unwrap() 2742 .insert(id.clone(), device_node!(id, migratable_device)); 2743 2744 Ok(MetaVirtioDevice { 2745 virtio_device, 2746 iommu: net_cfg.iommu, 2747 id, 2748 pci_segment: net_cfg.pci_segment, 2749 dma_handler: None, 2750 }) 2751 } 2752 2753 /// Add virto-net and vhost-user-net devices 2754 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2755 let mut devices = Vec::new(); 2756 let mut net_devices = self.config.lock().unwrap().net.clone(); 2757 if let Some(net_list_cfg) = &mut net_devices { 2758 for net_cfg in net_list_cfg.iter_mut() { 2759 devices.push(self.make_virtio_net_device(net_cfg)?); 2760 } 2761 } 2762 self.config.lock().unwrap().net = net_devices; 2763 2764 Ok(devices) 2765 } 2766 2767 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2768 let mut devices = Vec::new(); 2769 2770 // Add virtio-rng if required 2771 let rng_config = self.config.lock().unwrap().rng.clone(); 2772 if let Some(rng_path) = rng_config.src.to_str() { 2773 info!("Creating virtio-rng device: {:?}", rng_config); 2774 let id = String::from(RNG_DEVICE_NAME); 2775 2776 let virtio_rng_device = Arc::new(Mutex::new( 2777 virtio_devices::Rng::new( 2778 id.clone(), 2779 rng_path, 2780 self.force_iommu | rng_config.iommu, 2781 self.seccomp_action.clone(), 2782 self.exit_evt 2783 .try_clone() 2784 .map_err(DeviceManagerError::EventFd)?, 2785 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2786 .map_err(DeviceManagerError::RestoreGetState)?, 2787 ) 2788 .map_err(DeviceManagerError::CreateVirtioRng)?, 2789 )); 2790 devices.push(MetaVirtioDevice { 2791 virtio_device: Arc::clone(&virtio_rng_device) 2792 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2793 iommu: rng_config.iommu, 2794 id: id.clone(), 2795 pci_segment: 0, 2796 dma_handler: None, 2797 }); 2798 2799 // Fill the device tree with a new node. In case of restore, we 2800 // know there is nothing to do, so we can simply override the 2801 // existing entry. 2802 self.device_tree 2803 .lock() 2804 .unwrap() 2805 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2806 } 2807 2808 Ok(devices) 2809 } 2810 2811 fn make_virtio_fs_device( 2812 &mut self, 2813 fs_cfg: &mut FsConfig, 2814 ) -> DeviceManagerResult<MetaVirtioDevice> { 2815 let id = if let Some(id) = &fs_cfg.id { 2816 id.clone() 2817 } else { 2818 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2819 fs_cfg.id = Some(id.clone()); 2820 id 2821 }; 2822 2823 info!("Creating virtio-fs device: {:?}", fs_cfg); 2824 2825 let mut node = device_node!(id); 2826 2827 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2828 let virtio_fs_device = Arc::new(Mutex::new( 2829 virtio_devices::vhost_user::Fs::new( 2830 id.clone(), 2831 fs_socket, 2832 &fs_cfg.tag, 2833 fs_cfg.num_queues, 2834 fs_cfg.queue_size, 2835 None, 2836 self.seccomp_action.clone(), 2837 self.exit_evt 2838 .try_clone() 2839 .map_err(DeviceManagerError::EventFd)?, 2840 self.force_iommu, 2841 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2842 .map_err(DeviceManagerError::RestoreGetState)?, 2843 ) 2844 .map_err(DeviceManagerError::CreateVirtioFs)?, 2845 )); 2846 2847 // Update the device tree with the migratable device. 2848 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2849 self.device_tree.lock().unwrap().insert(id.clone(), node); 2850 2851 Ok(MetaVirtioDevice { 2852 virtio_device: Arc::clone(&virtio_fs_device) 2853 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2854 iommu: false, 2855 id, 2856 pci_segment: fs_cfg.pci_segment, 2857 dma_handler: None, 2858 }) 2859 } else { 2860 Err(DeviceManagerError::NoVirtioFsSock) 2861 } 2862 } 2863 2864 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2865 let mut devices = Vec::new(); 2866 2867 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2868 if let Some(fs_list_cfg) = &mut fs_devices { 2869 for fs_cfg in fs_list_cfg.iter_mut() { 2870 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2871 } 2872 } 2873 self.config.lock().unwrap().fs = fs_devices; 2874 2875 Ok(devices) 2876 } 2877 2878 fn make_virtio_pmem_device( 2879 &mut self, 2880 pmem_cfg: &mut PmemConfig, 2881 ) -> DeviceManagerResult<MetaVirtioDevice> { 2882 let id = if let Some(id) = &pmem_cfg.id { 2883 id.clone() 2884 } else { 2885 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2886 pmem_cfg.id = Some(id.clone()); 2887 id 2888 }; 2889 2890 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2891 2892 let mut node = device_node!(id); 2893 2894 // Look for the id in the device tree. If it can be found, that means 2895 // the device is being restored, otherwise it's created from scratch. 2896 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2897 info!("Restoring virtio-pmem {} resources", id); 2898 2899 let mut region_range: Option<(u64, u64)> = None; 2900 for resource in node.resources.iter() { 2901 match resource { 2902 Resource::MmioAddressRange { base, size } => { 2903 if region_range.is_some() { 2904 return Err(DeviceManagerError::ResourceAlreadyExists); 2905 } 2906 2907 region_range = Some((*base, *size)); 2908 } 2909 _ => { 2910 error!("Unexpected resource {:?} for {}", resource, id); 2911 } 2912 } 2913 } 2914 2915 if region_range.is_none() { 2916 return Err(DeviceManagerError::MissingVirtioPmemResources); 2917 } 2918 2919 region_range 2920 } else { 2921 None 2922 }; 2923 2924 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2925 if pmem_cfg.size.is_none() { 2926 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2927 } 2928 (O_TMPFILE, true) 2929 } else { 2930 (0, false) 2931 }; 2932 2933 let mut file = OpenOptions::new() 2934 .read(true) 2935 .write(!pmem_cfg.discard_writes) 2936 .custom_flags(custom_flags) 2937 .open(&pmem_cfg.file) 2938 .map_err(DeviceManagerError::PmemFileOpen)?; 2939 2940 let size = if let Some(size) = pmem_cfg.size { 2941 if set_len { 2942 file.set_len(size) 2943 .map_err(DeviceManagerError::PmemFileSetLen)?; 2944 } 2945 size 2946 } else { 2947 file.seek(SeekFrom::End(0)) 2948 .map_err(DeviceManagerError::PmemFileSetLen)? 2949 }; 2950 2951 if size % 0x20_0000 != 0 { 2952 return Err(DeviceManagerError::PmemSizeNotAligned); 2953 } 2954 2955 let (region_base, region_size) = if let Some((base, size)) = region_range { 2956 // The memory needs to be 2MiB aligned in order to support 2957 // hugepages. 2958 self.pci_segments[pmem_cfg.pci_segment as usize] 2959 .mem64_allocator 2960 .lock() 2961 .unwrap() 2962 .allocate( 2963 Some(GuestAddress(base)), 2964 size as GuestUsize, 2965 Some(0x0020_0000), 2966 ) 2967 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2968 2969 (base, size) 2970 } else { 2971 // The memory needs to be 2MiB aligned in order to support 2972 // hugepages. 2973 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2974 .mem64_allocator 2975 .lock() 2976 .unwrap() 2977 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2978 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2979 2980 (base.raw_value(), size) 2981 }; 2982 2983 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2984 let mmap_region = MmapRegion::build( 2985 Some(FileOffset::new(cloned_file, 0)), 2986 region_size as usize, 2987 PROT_READ | PROT_WRITE, 2988 MAP_NORESERVE 2989 | if pmem_cfg.discard_writes { 2990 MAP_PRIVATE 2991 } else { 2992 MAP_SHARED 2993 }, 2994 ) 2995 .map_err(DeviceManagerError::NewMmapRegion)?; 2996 let host_addr: u64 = mmap_region.as_ptr() as u64; 2997 2998 let mem_slot = self 2999 .memory_manager 3000 .lock() 3001 .unwrap() 3002 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 3003 .map_err(DeviceManagerError::MemoryManager)?; 3004 3005 let mapping = virtio_devices::UserspaceMapping { 3006 host_addr, 3007 mem_slot, 3008 addr: GuestAddress(region_base), 3009 len: region_size, 3010 mergeable: false, 3011 }; 3012 3013 let virtio_pmem_device = Arc::new(Mutex::new( 3014 virtio_devices::Pmem::new( 3015 id.clone(), 3016 file, 3017 GuestAddress(region_base), 3018 mapping, 3019 mmap_region, 3020 self.force_iommu | pmem_cfg.iommu, 3021 self.seccomp_action.clone(), 3022 self.exit_evt 3023 .try_clone() 3024 .map_err(DeviceManagerError::EventFd)?, 3025 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3026 .map_err(DeviceManagerError::RestoreGetState)?, 3027 ) 3028 .map_err(DeviceManagerError::CreateVirtioPmem)?, 3029 )); 3030 3031 // Update the device tree with correct resource information and with 3032 // the migratable device. 3033 node.resources.push(Resource::MmioAddressRange { 3034 base: region_base, 3035 size: region_size, 3036 }); 3037 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 3038 self.device_tree.lock().unwrap().insert(id.clone(), node); 3039 3040 Ok(MetaVirtioDevice { 3041 virtio_device: Arc::clone(&virtio_pmem_device) 3042 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3043 iommu: pmem_cfg.iommu, 3044 id, 3045 pci_segment: pmem_cfg.pci_segment, 3046 dma_handler: None, 3047 }) 3048 } 3049 3050 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3051 let mut devices = Vec::new(); 3052 // Add virtio-pmem if required 3053 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 3054 if let Some(pmem_list_cfg) = &mut pmem_devices { 3055 for pmem_cfg in pmem_list_cfg.iter_mut() { 3056 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 3057 } 3058 } 3059 self.config.lock().unwrap().pmem = pmem_devices; 3060 3061 Ok(devices) 3062 } 3063 3064 fn make_virtio_vsock_device( 3065 &mut self, 3066 vsock_cfg: &mut VsockConfig, 3067 ) -> DeviceManagerResult<MetaVirtioDevice> { 3068 let id = if let Some(id) = &vsock_cfg.id { 3069 id.clone() 3070 } else { 3071 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 3072 vsock_cfg.id = Some(id.clone()); 3073 id 3074 }; 3075 3076 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 3077 3078 let socket_path = vsock_cfg 3079 .socket 3080 .to_str() 3081 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 3082 let backend = 3083 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 3084 .map_err(DeviceManagerError::CreateVsockBackend)?; 3085 3086 let vsock_device = Arc::new(Mutex::new( 3087 virtio_devices::Vsock::new( 3088 id.clone(), 3089 vsock_cfg.cid, 3090 vsock_cfg.socket.clone(), 3091 backend, 3092 self.force_iommu | vsock_cfg.iommu, 3093 self.seccomp_action.clone(), 3094 self.exit_evt 3095 .try_clone() 3096 .map_err(DeviceManagerError::EventFd)?, 3097 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3098 .map_err(DeviceManagerError::RestoreGetState)?, 3099 ) 3100 .map_err(DeviceManagerError::CreateVirtioVsock)?, 3101 )); 3102 3103 // Fill the device tree with a new node. In case of restore, we 3104 // know there is nothing to do, so we can simply override the 3105 // existing entry. 3106 self.device_tree 3107 .lock() 3108 .unwrap() 3109 .insert(id.clone(), device_node!(id, vsock_device)); 3110 3111 Ok(MetaVirtioDevice { 3112 virtio_device: Arc::clone(&vsock_device) 3113 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3114 iommu: vsock_cfg.iommu, 3115 id, 3116 pci_segment: vsock_cfg.pci_segment, 3117 dma_handler: None, 3118 }) 3119 } 3120 3121 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3122 let mut devices = Vec::new(); 3123 3124 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3125 if let Some(ref mut vsock_cfg) = &mut vsock { 3126 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3127 } 3128 self.config.lock().unwrap().vsock = vsock; 3129 3130 Ok(devices) 3131 } 3132 3133 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3134 let mut devices = Vec::new(); 3135 3136 let mm = self.memory_manager.clone(); 3137 let mut mm = mm.lock().unwrap(); 3138 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3139 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3140 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3141 3142 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3143 .map(|i| i as u16); 3144 3145 let virtio_mem_device = Arc::new(Mutex::new( 3146 virtio_devices::Mem::new( 3147 memory_zone_id.clone(), 3148 virtio_mem_zone.region(), 3149 self.seccomp_action.clone(), 3150 node_id, 3151 virtio_mem_zone.hotplugged_size(), 3152 virtio_mem_zone.hugepages(), 3153 self.exit_evt 3154 .try_clone() 3155 .map_err(DeviceManagerError::EventFd)?, 3156 virtio_mem_zone.blocks_state().clone(), 3157 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3158 .map_err(DeviceManagerError::RestoreGetState)?, 3159 ) 3160 .map_err(DeviceManagerError::CreateVirtioMem)?, 3161 )); 3162 3163 // Update the virtio-mem zone so that it has a handle onto the 3164 // virtio-mem device, which will be used for triggering a resize 3165 // if needed. 3166 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3167 3168 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3169 3170 devices.push(MetaVirtioDevice { 3171 virtio_device: Arc::clone(&virtio_mem_device) 3172 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3173 iommu: false, 3174 id: memory_zone_id.clone(), 3175 pci_segment: 0, 3176 dma_handler: None, 3177 }); 3178 3179 // Fill the device tree with a new node. In case of restore, we 3180 // know there is nothing to do, so we can simply override the 3181 // existing entry. 3182 self.device_tree.lock().unwrap().insert( 3183 memory_zone_id.clone(), 3184 device_node!(memory_zone_id, virtio_mem_device), 3185 ); 3186 } 3187 } 3188 3189 Ok(devices) 3190 } 3191 3192 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3193 let mut devices = Vec::new(); 3194 3195 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3196 let id = String::from(BALLOON_DEVICE_NAME); 3197 info!("Creating virtio-balloon device: id = {}", id); 3198 3199 let virtio_balloon_device = Arc::new(Mutex::new( 3200 virtio_devices::Balloon::new( 3201 id.clone(), 3202 balloon_config.size, 3203 balloon_config.deflate_on_oom, 3204 balloon_config.free_page_reporting, 3205 self.seccomp_action.clone(), 3206 self.exit_evt 3207 .try_clone() 3208 .map_err(DeviceManagerError::EventFd)?, 3209 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3210 .map_err(DeviceManagerError::RestoreGetState)?, 3211 ) 3212 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3213 )); 3214 3215 self.balloon = Some(virtio_balloon_device.clone()); 3216 3217 devices.push(MetaVirtioDevice { 3218 virtio_device: Arc::clone(&virtio_balloon_device) 3219 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3220 iommu: false, 3221 id: id.clone(), 3222 pci_segment: 0, 3223 dma_handler: None, 3224 }); 3225 3226 self.device_tree 3227 .lock() 3228 .unwrap() 3229 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3230 } 3231 3232 Ok(devices) 3233 } 3234 3235 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3236 let mut devices = Vec::new(); 3237 3238 if !self.config.lock().unwrap().watchdog { 3239 return Ok(devices); 3240 } 3241 3242 let id = String::from(WATCHDOG_DEVICE_NAME); 3243 info!("Creating virtio-watchdog device: id = {}", id); 3244 3245 let virtio_watchdog_device = Arc::new(Mutex::new( 3246 virtio_devices::Watchdog::new( 3247 id.clone(), 3248 self.reset_evt.try_clone().unwrap(), 3249 self.seccomp_action.clone(), 3250 self.exit_evt 3251 .try_clone() 3252 .map_err(DeviceManagerError::EventFd)?, 3253 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3254 .map_err(DeviceManagerError::RestoreGetState)?, 3255 ) 3256 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3257 )); 3258 devices.push(MetaVirtioDevice { 3259 virtio_device: Arc::clone(&virtio_watchdog_device) 3260 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3261 iommu: false, 3262 id: id.clone(), 3263 pci_segment: 0, 3264 dma_handler: None, 3265 }); 3266 3267 self.device_tree 3268 .lock() 3269 .unwrap() 3270 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3271 3272 Ok(devices) 3273 } 3274 3275 fn make_vdpa_device( 3276 &mut self, 3277 vdpa_cfg: &mut VdpaConfig, 3278 ) -> DeviceManagerResult<MetaVirtioDevice> { 3279 let id = if let Some(id) = &vdpa_cfg.id { 3280 id.clone() 3281 } else { 3282 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3283 vdpa_cfg.id = Some(id.clone()); 3284 id 3285 }; 3286 3287 info!("Creating vDPA device: {:?}", vdpa_cfg); 3288 3289 let device_path = vdpa_cfg 3290 .path 3291 .to_str() 3292 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3293 3294 let vdpa_device = Arc::new(Mutex::new( 3295 virtio_devices::Vdpa::new( 3296 id.clone(), 3297 device_path, 3298 self.memory_manager.lock().unwrap().guest_memory(), 3299 vdpa_cfg.num_queues as u16, 3300 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3301 .map_err(DeviceManagerError::RestoreGetState)?, 3302 ) 3303 .map_err(DeviceManagerError::CreateVdpa)?, 3304 )); 3305 3306 // Create the DMA handler that is required by the vDPA device 3307 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3308 Arc::clone(&vdpa_device), 3309 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3310 )); 3311 3312 self.device_tree 3313 .lock() 3314 .unwrap() 3315 .insert(id.clone(), device_node!(id, vdpa_device)); 3316 3317 Ok(MetaVirtioDevice { 3318 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3319 iommu: vdpa_cfg.iommu, 3320 id, 3321 pci_segment: vdpa_cfg.pci_segment, 3322 dma_handler: Some(vdpa_mapping), 3323 }) 3324 } 3325 3326 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3327 let mut devices = Vec::new(); 3328 // Add vdpa if required 3329 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3330 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3331 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3332 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3333 } 3334 } 3335 self.config.lock().unwrap().vdpa = vdpa_devices; 3336 3337 Ok(devices) 3338 } 3339 3340 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3341 let start_id = self.device_id_cnt; 3342 loop { 3343 // Generate the temporary name. 3344 let name = format!("{}{}", prefix, self.device_id_cnt); 3345 // Increment the counter. 3346 self.device_id_cnt += Wrapping(1); 3347 // Check if the name is already in use. 3348 if !self.boot_id_list.contains(&name) 3349 && !self.device_tree.lock().unwrap().contains_key(&name) 3350 { 3351 return Ok(name); 3352 } 3353 3354 if self.device_id_cnt == start_id { 3355 // We went through a full loop and there's nothing else we can 3356 // do. 3357 break; 3358 } 3359 } 3360 Err(DeviceManagerError::NoAvailableDeviceName) 3361 } 3362 3363 fn add_passthrough_device( 3364 &mut self, 3365 device_cfg: &mut DeviceConfig, 3366 ) -> DeviceManagerResult<(PciBdf, String)> { 3367 // If the passthrough device has not been created yet, it is created 3368 // here and stored in the DeviceManager structure for future needs. 3369 if self.passthrough_device.is_none() { 3370 self.passthrough_device = Some( 3371 self.address_manager 3372 .vm 3373 .create_passthrough_device() 3374 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3375 ); 3376 } 3377 3378 self.add_vfio_device(device_cfg) 3379 } 3380 3381 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3382 let passthrough_device = self 3383 .passthrough_device 3384 .as_ref() 3385 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3386 3387 let dup = passthrough_device 3388 .try_clone() 3389 .map_err(DeviceManagerError::VfioCreate)?; 3390 3391 Ok(Arc::new( 3392 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3393 )) 3394 } 3395 3396 fn add_vfio_device( 3397 &mut self, 3398 device_cfg: &mut DeviceConfig, 3399 ) -> DeviceManagerResult<(PciBdf, String)> { 3400 let vfio_name = if let Some(id) = &device_cfg.id { 3401 id.clone() 3402 } else { 3403 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3404 device_cfg.id = Some(id.clone()); 3405 id 3406 }; 3407 3408 let (pci_segment_id, pci_device_bdf, resources) = 3409 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3410 3411 let mut needs_dma_mapping = false; 3412 3413 // Here we create a new VFIO container for two reasons. Either this is 3414 // the first VFIO device, meaning we need a new VFIO container, which 3415 // will be shared with other VFIO devices. Or the new VFIO device is 3416 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3417 // container. In the vIOMMU use case, we can't let all devices under 3418 // the same VFIO container since we couldn't map/unmap memory for each 3419 // device. That's simply because the map/unmap operations happen at the 3420 // VFIO container level. 3421 let vfio_container = if device_cfg.iommu { 3422 let vfio_container = self.create_vfio_container()?; 3423 3424 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3425 Arc::clone(&vfio_container), 3426 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3427 )); 3428 3429 if let Some(iommu) = &self.iommu_device { 3430 iommu 3431 .lock() 3432 .unwrap() 3433 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3434 } else { 3435 return Err(DeviceManagerError::MissingVirtualIommu); 3436 } 3437 3438 vfio_container 3439 } else if let Some(vfio_container) = &self.vfio_container { 3440 Arc::clone(vfio_container) 3441 } else { 3442 let vfio_container = self.create_vfio_container()?; 3443 needs_dma_mapping = true; 3444 self.vfio_container = Some(Arc::clone(&vfio_container)); 3445 3446 vfio_container 3447 }; 3448 3449 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3450 .map_err(DeviceManagerError::VfioCreate)?; 3451 3452 if needs_dma_mapping { 3453 // Register DMA mapping in IOMMU. 3454 // Do not register virtio-mem regions, as they are handled directly by 3455 // virtio-mem device itself. 3456 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3457 for region in zone.regions() { 3458 vfio_container 3459 .vfio_dma_map( 3460 region.start_addr().raw_value(), 3461 region.len(), 3462 region.as_ptr() as u64, 3463 ) 3464 .map_err(DeviceManagerError::VfioDmaMap)?; 3465 } 3466 } 3467 3468 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3469 Arc::clone(&vfio_container), 3470 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3471 )); 3472 3473 for virtio_mem_device in self.virtio_mem_devices.iter() { 3474 virtio_mem_device 3475 .lock() 3476 .unwrap() 3477 .add_dma_mapping_handler( 3478 VirtioMemMappingSource::Container, 3479 vfio_mapping.clone(), 3480 ) 3481 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3482 } 3483 } 3484 3485 let legacy_interrupt_group = 3486 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3487 Some( 3488 legacy_interrupt_manager 3489 .create_group(LegacyIrqGroupConfig { 3490 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3491 [pci_device_bdf.device() as usize] 3492 as InterruptIndex, 3493 }) 3494 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3495 ) 3496 } else { 3497 None 3498 }; 3499 3500 let memory_manager = self.memory_manager.clone(); 3501 3502 let vfio_pci_device = VfioPciDevice::new( 3503 vfio_name.clone(), 3504 &self.address_manager.vm, 3505 vfio_device, 3506 vfio_container, 3507 self.msi_interrupt_manager.clone(), 3508 legacy_interrupt_group, 3509 device_cfg.iommu, 3510 pci_device_bdf, 3511 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3512 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3513 ) 3514 .map_err(DeviceManagerError::VfioPciCreate)?; 3515 3516 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3517 3518 let new_resources = self.add_pci_device( 3519 vfio_pci_device.clone(), 3520 vfio_pci_device.clone(), 3521 pci_segment_id, 3522 pci_device_bdf, 3523 resources, 3524 )?; 3525 3526 vfio_pci_device 3527 .lock() 3528 .unwrap() 3529 .map_mmio_regions() 3530 .map_err(DeviceManagerError::VfioMapRegion)?; 3531 3532 let mut node = device_node!(vfio_name, vfio_pci_device); 3533 3534 // Update the device tree with correct resource information. 3535 node.resources = new_resources; 3536 node.pci_bdf = Some(pci_device_bdf); 3537 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3538 3539 self.device_tree 3540 .lock() 3541 .unwrap() 3542 .insert(vfio_name.clone(), node); 3543 3544 Ok((pci_device_bdf, vfio_name)) 3545 } 3546 3547 fn add_pci_device( 3548 &mut self, 3549 bus_device: Arc<Mutex<dyn BusDevice>>, 3550 pci_device: Arc<Mutex<dyn PciDevice>>, 3551 segment_id: u16, 3552 bdf: PciBdf, 3553 resources: Option<Vec<Resource>>, 3554 ) -> DeviceManagerResult<Vec<Resource>> { 3555 let bars = pci_device 3556 .lock() 3557 .unwrap() 3558 .allocate_bars( 3559 &self.address_manager.allocator, 3560 &mut self.pci_segments[segment_id as usize] 3561 .mem32_allocator 3562 .lock() 3563 .unwrap(), 3564 &mut self.pci_segments[segment_id as usize] 3565 .mem64_allocator 3566 .lock() 3567 .unwrap(), 3568 resources, 3569 ) 3570 .map_err(DeviceManagerError::AllocateBars)?; 3571 3572 let mut pci_bus = self.pci_segments[segment_id as usize] 3573 .pci_bus 3574 .lock() 3575 .unwrap(); 3576 3577 pci_bus 3578 .add_device(bdf.device() as u32, pci_device) 3579 .map_err(DeviceManagerError::AddPciDevice)?; 3580 3581 self.bus_devices.push(Arc::clone(&bus_device)); 3582 3583 pci_bus 3584 .register_mapping( 3585 bus_device, 3586 #[cfg(target_arch = "x86_64")] 3587 self.address_manager.io_bus.as_ref(), 3588 self.address_manager.mmio_bus.as_ref(), 3589 bars.clone(), 3590 ) 3591 .map_err(DeviceManagerError::AddPciDevice)?; 3592 3593 let mut new_resources = Vec::new(); 3594 for bar in bars { 3595 new_resources.push(Resource::PciBar { 3596 index: bar.idx(), 3597 base: bar.addr(), 3598 size: bar.size(), 3599 type_: bar.region_type().into(), 3600 prefetchable: bar.prefetchable().into(), 3601 }); 3602 } 3603 3604 Ok(new_resources) 3605 } 3606 3607 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3608 let mut iommu_attached_device_ids = Vec::new(); 3609 let mut devices = self.config.lock().unwrap().devices.clone(); 3610 3611 if let Some(device_list_cfg) = &mut devices { 3612 for device_cfg in device_list_cfg.iter_mut() { 3613 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3614 if device_cfg.iommu && self.iommu_device.is_some() { 3615 iommu_attached_device_ids.push(device_id); 3616 } 3617 } 3618 } 3619 3620 // Update the list of devices 3621 self.config.lock().unwrap().devices = devices; 3622 3623 Ok(iommu_attached_device_ids) 3624 } 3625 3626 fn add_vfio_user_device( 3627 &mut self, 3628 device_cfg: &mut UserDeviceConfig, 3629 ) -> DeviceManagerResult<(PciBdf, String)> { 3630 let vfio_user_name = if let Some(id) = &device_cfg.id { 3631 id.clone() 3632 } else { 3633 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3634 device_cfg.id = Some(id.clone()); 3635 id 3636 }; 3637 3638 let (pci_segment_id, pci_device_bdf, resources) = 3639 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3640 3641 let legacy_interrupt_group = 3642 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3643 Some( 3644 legacy_interrupt_manager 3645 .create_group(LegacyIrqGroupConfig { 3646 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3647 [pci_device_bdf.device() as usize] 3648 as InterruptIndex, 3649 }) 3650 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3651 ) 3652 } else { 3653 None 3654 }; 3655 3656 let client = Arc::new(Mutex::new( 3657 vfio_user::Client::new(&device_cfg.socket) 3658 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3659 )); 3660 3661 let memory_manager = self.memory_manager.clone(); 3662 3663 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3664 vfio_user_name.clone(), 3665 &self.address_manager.vm, 3666 client.clone(), 3667 self.msi_interrupt_manager.clone(), 3668 legacy_interrupt_group, 3669 pci_device_bdf, 3670 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3671 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3672 ) 3673 .map_err(DeviceManagerError::VfioUserCreate)?; 3674 3675 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3676 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3677 for virtio_mem_device in self.virtio_mem_devices.iter() { 3678 virtio_mem_device 3679 .lock() 3680 .unwrap() 3681 .add_dma_mapping_handler( 3682 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3683 vfio_user_mapping.clone(), 3684 ) 3685 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3686 } 3687 3688 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3689 for region in zone.regions() { 3690 vfio_user_pci_device 3691 .dma_map(region) 3692 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3693 } 3694 } 3695 3696 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3697 3698 let new_resources = self.add_pci_device( 3699 vfio_user_pci_device.clone(), 3700 vfio_user_pci_device.clone(), 3701 pci_segment_id, 3702 pci_device_bdf, 3703 resources, 3704 )?; 3705 3706 // Note it is required to call 'add_pci_device()' in advance to have the list of 3707 // mmio regions provisioned correctly 3708 vfio_user_pci_device 3709 .lock() 3710 .unwrap() 3711 .map_mmio_regions() 3712 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3713 3714 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3715 3716 // Update the device tree with correct resource information. 3717 node.resources = new_resources; 3718 node.pci_bdf = Some(pci_device_bdf); 3719 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3720 3721 self.device_tree 3722 .lock() 3723 .unwrap() 3724 .insert(vfio_user_name.clone(), node); 3725 3726 Ok((pci_device_bdf, vfio_user_name)) 3727 } 3728 3729 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3730 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3731 3732 if let Some(device_list_cfg) = &mut user_devices { 3733 for device_cfg in device_list_cfg.iter_mut() { 3734 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3735 } 3736 } 3737 3738 // Update the list of devices 3739 self.config.lock().unwrap().user_devices = user_devices; 3740 3741 Ok(vec![]) 3742 } 3743 3744 fn add_virtio_pci_device( 3745 &mut self, 3746 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3747 iommu_mapping: &Option<Arc<IommuMapping>>, 3748 virtio_device_id: String, 3749 pci_segment_id: u16, 3750 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3751 ) -> DeviceManagerResult<PciBdf> { 3752 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3753 3754 // Add the new virtio-pci node to the device tree. 3755 let mut node = device_node!(id); 3756 node.children = vec![virtio_device_id.clone()]; 3757 3758 let (pci_segment_id, pci_device_bdf, resources) = 3759 self.pci_resources(&id, pci_segment_id)?; 3760 3761 // Update the existing virtio node by setting the parent. 3762 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3763 node.parent = Some(id.clone()); 3764 } else { 3765 return Err(DeviceManagerError::MissingNode); 3766 } 3767 3768 // Allows support for one MSI-X vector per queue. It also adds 1 3769 // as we need to take into account the dedicated vector to notify 3770 // about a virtio config change. 3771 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3772 3773 // Create the AccessPlatform trait from the implementation IommuMapping. 3774 // This will provide address translation for any virtio device sitting 3775 // behind a vIOMMU. 3776 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3777 { 3778 Some(Arc::new(AccessPlatformMapping::new( 3779 pci_device_bdf.into(), 3780 mapping.clone(), 3781 ))) 3782 } else { 3783 None 3784 }; 3785 3786 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3787 3788 // Map DMA ranges if a DMA handler is available and if the device is 3789 // not attached to a virtual IOMMU. 3790 if let Some(dma_handler) = &dma_handler { 3791 if iommu_mapping.is_some() { 3792 if let Some(iommu) = &self.iommu_device { 3793 iommu 3794 .lock() 3795 .unwrap() 3796 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3797 } else { 3798 return Err(DeviceManagerError::MissingVirtualIommu); 3799 } 3800 } else { 3801 // Let every virtio-mem device handle the DMA map/unmap through the 3802 // DMA handler provided. 3803 for virtio_mem_device in self.virtio_mem_devices.iter() { 3804 virtio_mem_device 3805 .lock() 3806 .unwrap() 3807 .add_dma_mapping_handler( 3808 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3809 dma_handler.clone(), 3810 ) 3811 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3812 } 3813 3814 // Do not register virtio-mem regions, as they are handled directly by 3815 // virtio-mem devices. 3816 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3817 for region in zone.regions() { 3818 let gpa = region.start_addr().0; 3819 let size = region.len(); 3820 dma_handler 3821 .map(gpa, gpa, size) 3822 .map_err(DeviceManagerError::VirtioDmaMap)?; 3823 } 3824 } 3825 } 3826 } 3827 3828 let device_type = virtio_device.lock().unwrap().device_type(); 3829 let virtio_pci_device = Arc::new(Mutex::new( 3830 VirtioPciDevice::new( 3831 id.clone(), 3832 memory, 3833 virtio_device, 3834 msix_num, 3835 access_platform, 3836 &self.msi_interrupt_manager, 3837 pci_device_bdf.into(), 3838 self.activate_evt 3839 .try_clone() 3840 .map_err(DeviceManagerError::EventFd)?, 3841 // All device types *except* virtio block devices should be allocated a 64-bit bar 3842 // The block devices should be given a 32-bit BAR so that they are easily accessible 3843 // to firmware without requiring excessive identity mapping. 3844 // The exception being if not on the default PCI segment. 3845 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3846 dma_handler, 3847 self.pending_activations.clone(), 3848 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3849 ) 3850 .map_err(DeviceManagerError::VirtioDevice)?, 3851 )); 3852 3853 let new_resources = self.add_pci_device( 3854 virtio_pci_device.clone(), 3855 virtio_pci_device.clone(), 3856 pci_segment_id, 3857 pci_device_bdf, 3858 resources, 3859 )?; 3860 3861 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3862 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3863 let io_addr = IoEventAddress::Mmio(addr); 3864 self.address_manager 3865 .vm 3866 .register_ioevent(event, &io_addr, None) 3867 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3868 } 3869 3870 // Update the device tree with correct resource information. 3871 node.resources = new_resources; 3872 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3873 node.pci_bdf = Some(pci_device_bdf); 3874 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3875 self.device_tree.lock().unwrap().insert(id, node); 3876 3877 Ok(pci_device_bdf) 3878 } 3879 3880 fn add_pvpanic_device( 3881 &mut self, 3882 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3883 let id = String::from(PVPANIC_DEVICE_NAME); 3884 let pci_segment_id = 0x0_u16; 3885 3886 info!("Creating pvpanic device {}", id); 3887 3888 let (pci_segment_id, pci_device_bdf, resources) = 3889 self.pci_resources(&id, pci_segment_id)?; 3890 3891 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3892 3893 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3894 .map_err(DeviceManagerError::PvPanicCreate)?; 3895 3896 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3897 3898 let new_resources = self.add_pci_device( 3899 pvpanic_device.clone(), 3900 pvpanic_device.clone(), 3901 pci_segment_id, 3902 pci_device_bdf, 3903 resources, 3904 )?; 3905 3906 let mut node = device_node!(id, pvpanic_device); 3907 3908 node.resources = new_resources; 3909 node.pci_bdf = Some(pci_device_bdf); 3910 node.pci_device_handle = None; 3911 3912 self.device_tree.lock().unwrap().insert(id, node); 3913 3914 Ok(Some(pvpanic_device)) 3915 } 3916 3917 fn pci_resources( 3918 &self, 3919 id: &str, 3920 pci_segment_id: u16, 3921 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3922 // Look for the id in the device tree. If it can be found, that means 3923 // the device is being restored, otherwise it's created from scratch. 3924 Ok( 3925 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3926 info!("Restoring virtio-pci {} resources", id); 3927 let pci_device_bdf: PciBdf = node 3928 .pci_bdf 3929 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3930 let pci_segment_id = pci_device_bdf.segment(); 3931 3932 self.pci_segments[pci_segment_id as usize] 3933 .pci_bus 3934 .lock() 3935 .unwrap() 3936 .get_device_id(pci_device_bdf.device() as usize) 3937 .map_err(DeviceManagerError::GetPciDeviceId)?; 3938 3939 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3940 } else { 3941 let pci_device_bdf = 3942 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3943 3944 (pci_segment_id, pci_device_bdf, None) 3945 }, 3946 ) 3947 } 3948 3949 #[cfg(target_arch = "x86_64")] 3950 pub fn io_bus(&self) -> &Arc<Bus> { 3951 &self.address_manager.io_bus 3952 } 3953 3954 pub fn mmio_bus(&self) -> &Arc<Bus> { 3955 &self.address_manager.mmio_bus 3956 } 3957 3958 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3959 &self.address_manager.allocator 3960 } 3961 3962 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3963 self.interrupt_controller 3964 .as_ref() 3965 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3966 } 3967 3968 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3969 &self.pci_segments 3970 } 3971 3972 pub fn console(&self) -> &Arc<Console> { 3973 &self.console 3974 } 3975 3976 #[cfg(target_arch = "aarch64")] 3977 pub fn cmdline_additions(&self) -> &[String] { 3978 self.cmdline_additions.as_slice() 3979 } 3980 3981 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3982 for handle in self.virtio_devices.iter() { 3983 handle 3984 .virtio_device 3985 .lock() 3986 .unwrap() 3987 .add_memory_region(new_region) 3988 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3989 3990 if let Some(dma_handler) = &handle.dma_handler { 3991 if !handle.iommu { 3992 let gpa = new_region.start_addr().0; 3993 let size = new_region.len(); 3994 dma_handler 3995 .map(gpa, gpa, size) 3996 .map_err(DeviceManagerError::VirtioDmaMap)?; 3997 } 3998 } 3999 } 4000 4001 // Take care of updating the memory for VFIO PCI devices. 4002 if let Some(vfio_container) = &self.vfio_container { 4003 vfio_container 4004 .vfio_dma_map( 4005 new_region.start_addr().raw_value(), 4006 new_region.len(), 4007 new_region.as_ptr() as u64, 4008 ) 4009 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 4010 } 4011 4012 // Take care of updating the memory for vfio-user devices. 4013 { 4014 let device_tree = self.device_tree.lock().unwrap(); 4015 for pci_device_node in device_tree.pci_devices() { 4016 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 4017 .pci_device_handle 4018 .as_ref() 4019 .ok_or(DeviceManagerError::MissingPciDevice)? 4020 { 4021 vfio_user_pci_device 4022 .lock() 4023 .unwrap() 4024 .dma_map(new_region) 4025 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 4026 } 4027 } 4028 } 4029 4030 Ok(()) 4031 } 4032 4033 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 4034 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 4035 activator 4036 .activate() 4037 .map_err(DeviceManagerError::VirtioActivate)?; 4038 } 4039 Ok(()) 4040 } 4041 4042 pub fn notify_hotplug( 4043 &self, 4044 _notification_type: AcpiNotificationFlags, 4045 ) -> DeviceManagerResult<()> { 4046 return self 4047 .ged_notification_device 4048 .as_ref() 4049 .unwrap() 4050 .lock() 4051 .unwrap() 4052 .notify(_notification_type) 4053 .map_err(DeviceManagerError::HotPlugNotification); 4054 } 4055 4056 pub fn add_device( 4057 &mut self, 4058 device_cfg: &mut DeviceConfig, 4059 ) -> DeviceManagerResult<PciDeviceInfo> { 4060 self.validate_identifier(&device_cfg.id)?; 4061 4062 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 4063 return Err(DeviceManagerError::InvalidIommuHotplug); 4064 } 4065 4066 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4067 4068 // Update the PCIU bitmap 4069 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4070 4071 Ok(PciDeviceInfo { 4072 id: device_name, 4073 bdf, 4074 }) 4075 } 4076 4077 pub fn add_user_device( 4078 &mut self, 4079 device_cfg: &mut UserDeviceConfig, 4080 ) -> DeviceManagerResult<PciDeviceInfo> { 4081 self.validate_identifier(&device_cfg.id)?; 4082 4083 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4084 4085 // Update the PCIU bitmap 4086 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4087 4088 Ok(PciDeviceInfo { 4089 id: device_name, 4090 bdf, 4091 }) 4092 } 4093 4094 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4095 // The node can be directly a PCI node in case the 'id' refers to a 4096 // VFIO device or a virtio-pci one. 4097 // In case the 'id' refers to a virtio device, we must find the PCI 4098 // node by looking at the parent. 4099 let device_tree = self.device_tree.lock().unwrap(); 4100 let node = device_tree 4101 .get(&id) 4102 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4103 4104 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4105 node 4106 } else { 4107 let parent = node 4108 .parent 4109 .as_ref() 4110 .ok_or(DeviceManagerError::MissingNode)?; 4111 device_tree 4112 .get(parent) 4113 .ok_or(DeviceManagerError::MissingNode)? 4114 }; 4115 4116 let pci_device_bdf: PciBdf = pci_device_node 4117 .pci_bdf 4118 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4119 let pci_segment_id = pci_device_bdf.segment(); 4120 4121 let pci_device_handle = pci_device_node 4122 .pci_device_handle 4123 .as_ref() 4124 .ok_or(DeviceManagerError::MissingPciDevice)?; 4125 #[allow(irrefutable_let_patterns)] 4126 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4127 let device_type = VirtioDeviceType::from( 4128 virtio_pci_device 4129 .lock() 4130 .unwrap() 4131 .virtio_device() 4132 .lock() 4133 .unwrap() 4134 .device_type(), 4135 ); 4136 match device_type { 4137 VirtioDeviceType::Net 4138 | VirtioDeviceType::Block 4139 | VirtioDeviceType::Pmem 4140 | VirtioDeviceType::Fs 4141 | VirtioDeviceType::Vsock => {} 4142 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4143 } 4144 } 4145 4146 // Update the PCID bitmap 4147 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4148 4149 Ok(()) 4150 } 4151 4152 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4153 info!( 4154 "Ejecting device_id = {} on segment_id={}", 4155 device_id, pci_segment_id 4156 ); 4157 4158 // Convert the device ID into the corresponding b/d/f. 4159 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4160 4161 // Give the PCI device ID back to the PCI bus. 4162 self.pci_segments[pci_segment_id as usize] 4163 .pci_bus 4164 .lock() 4165 .unwrap() 4166 .put_device_id(device_id as usize) 4167 .map_err(DeviceManagerError::PutPciDeviceId)?; 4168 4169 // Remove the device from the device tree along with its children. 4170 let mut device_tree = self.device_tree.lock().unwrap(); 4171 let pci_device_node = device_tree 4172 .remove_node_by_pci_bdf(pci_device_bdf) 4173 .ok_or(DeviceManagerError::MissingPciDevice)?; 4174 4175 // For VFIO and vfio-user the PCI device id is the id. 4176 // For virtio we overwrite it later as we want the id of the 4177 // underlying device. 4178 let mut id = pci_device_node.id; 4179 let pci_device_handle = pci_device_node 4180 .pci_device_handle 4181 .ok_or(DeviceManagerError::MissingPciDevice)?; 4182 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4183 // The virtio-pci device has a single child 4184 if !pci_device_node.children.is_empty() { 4185 assert_eq!(pci_device_node.children.len(), 1); 4186 let child_id = &pci_device_node.children[0]; 4187 id = child_id.clone(); 4188 } 4189 } 4190 for child in pci_device_node.children.iter() { 4191 device_tree.remove(child); 4192 } 4193 4194 let mut iommu_attached = false; 4195 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4196 if iommu_attached_devices.contains(&pci_device_bdf) { 4197 iommu_attached = true; 4198 } 4199 } 4200 4201 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4202 // No need to remove any virtio-mem mapping here as the container outlives all devices 4203 PciDeviceHandle::Vfio(vfio_pci_device) => ( 4204 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4205 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4206 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4207 false, 4208 ), 4209 PciDeviceHandle::Virtio(virtio_pci_device) => { 4210 let dev = virtio_pci_device.lock().unwrap(); 4211 let bar_addr = dev.config_bar_addr(); 4212 for (event, addr) in dev.ioeventfds(bar_addr) { 4213 let io_addr = IoEventAddress::Mmio(addr); 4214 self.address_manager 4215 .vm 4216 .unregister_ioevent(event, &io_addr) 4217 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4218 } 4219 4220 if let Some(dma_handler) = dev.dma_handler() { 4221 if !iommu_attached { 4222 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4223 for region in zone.regions() { 4224 let iova = region.start_addr().0; 4225 let size = region.len(); 4226 dma_handler 4227 .unmap(iova, size) 4228 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4229 } 4230 } 4231 } 4232 } 4233 4234 ( 4235 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4236 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4237 Some(dev.virtio_device()), 4238 dev.dma_handler().is_some() && !iommu_attached, 4239 ) 4240 } 4241 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4242 let mut dev = vfio_user_pci_device.lock().unwrap(); 4243 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4244 for region in zone.regions() { 4245 dev.dma_unmap(region) 4246 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4247 } 4248 } 4249 4250 ( 4251 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4252 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4253 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4254 true, 4255 ) 4256 } 4257 }; 4258 4259 if remove_dma_handler { 4260 for virtio_mem_device in self.virtio_mem_devices.iter() { 4261 virtio_mem_device 4262 .lock() 4263 .unwrap() 4264 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4265 pci_device_bdf.into(), 4266 )) 4267 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4268 } 4269 } 4270 4271 // Free the allocated BARs 4272 pci_device 4273 .lock() 4274 .unwrap() 4275 .free_bars( 4276 &mut self.address_manager.allocator.lock().unwrap(), 4277 &mut self.pci_segments[pci_segment_id as usize] 4278 .mem32_allocator 4279 .lock() 4280 .unwrap(), 4281 &mut self.pci_segments[pci_segment_id as usize] 4282 .mem64_allocator 4283 .lock() 4284 .unwrap(), 4285 ) 4286 .map_err(DeviceManagerError::FreePciBars)?; 4287 4288 // Remove the device from the PCI bus 4289 self.pci_segments[pci_segment_id as usize] 4290 .pci_bus 4291 .lock() 4292 .unwrap() 4293 .remove_by_device(&pci_device) 4294 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4295 4296 #[cfg(target_arch = "x86_64")] 4297 // Remove the device from the IO bus 4298 self.io_bus() 4299 .remove_by_device(&bus_device) 4300 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4301 4302 // Remove the device from the MMIO bus 4303 self.mmio_bus() 4304 .remove_by_device(&bus_device) 4305 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4306 4307 // Remove the device from the list of BusDevice held by the 4308 // DeviceManager. 4309 self.bus_devices 4310 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4311 4312 // Shutdown and remove the underlying virtio-device if present 4313 if let Some(virtio_device) = virtio_device { 4314 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4315 self.memory_manager 4316 .lock() 4317 .unwrap() 4318 .remove_userspace_mapping( 4319 mapping.addr.raw_value(), 4320 mapping.len, 4321 mapping.host_addr, 4322 mapping.mergeable, 4323 mapping.mem_slot, 4324 ) 4325 .map_err(DeviceManagerError::MemoryManager)?; 4326 } 4327 4328 virtio_device.lock().unwrap().shutdown(); 4329 4330 self.virtio_devices 4331 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4332 } 4333 4334 event!( 4335 "vm", 4336 "device-removed", 4337 "id", 4338 &id, 4339 "bdf", 4340 pci_device_bdf.to_string() 4341 ); 4342 4343 // At this point, the device has been removed from all the list and 4344 // buses where it was stored. At the end of this function, after 4345 // any_device, bus_device and pci_device are released, the actual 4346 // device will be dropped. 4347 Ok(()) 4348 } 4349 4350 fn hotplug_virtio_pci_device( 4351 &mut self, 4352 handle: MetaVirtioDevice, 4353 ) -> DeviceManagerResult<PciDeviceInfo> { 4354 // Add the virtio device to the device manager list. This is important 4355 // as the list is used to notify virtio devices about memory updates 4356 // for instance. 4357 self.virtio_devices.push(handle.clone()); 4358 4359 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4360 self.iommu_mapping.clone() 4361 } else { 4362 None 4363 }; 4364 4365 let bdf = self.add_virtio_pci_device( 4366 handle.virtio_device, 4367 &mapping, 4368 handle.id.clone(), 4369 handle.pci_segment, 4370 handle.dma_handler, 4371 )?; 4372 4373 // Update the PCIU bitmap 4374 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4375 4376 Ok(PciDeviceInfo { id: handle.id, bdf }) 4377 } 4378 4379 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4380 self.config 4381 .lock() 4382 .as_ref() 4383 .unwrap() 4384 .platform 4385 .as_ref() 4386 .map(|pc| { 4387 pc.iommu_segments 4388 .as_ref() 4389 .map(|v| v.contains(&pci_segment_id)) 4390 .unwrap_or_default() 4391 }) 4392 .unwrap_or_default() 4393 } 4394 4395 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4396 self.validate_identifier(&disk_cfg.id)?; 4397 4398 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4399 return Err(DeviceManagerError::InvalidIommuHotplug); 4400 } 4401 4402 let device = self.make_virtio_block_device(disk_cfg)?; 4403 self.hotplug_virtio_pci_device(device) 4404 } 4405 4406 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4407 self.validate_identifier(&fs_cfg.id)?; 4408 4409 let device = self.make_virtio_fs_device(fs_cfg)?; 4410 self.hotplug_virtio_pci_device(device) 4411 } 4412 4413 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4414 self.validate_identifier(&pmem_cfg.id)?; 4415 4416 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4417 return Err(DeviceManagerError::InvalidIommuHotplug); 4418 } 4419 4420 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4421 self.hotplug_virtio_pci_device(device) 4422 } 4423 4424 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4425 self.validate_identifier(&net_cfg.id)?; 4426 4427 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4428 return Err(DeviceManagerError::InvalidIommuHotplug); 4429 } 4430 4431 let device = self.make_virtio_net_device(net_cfg)?; 4432 self.hotplug_virtio_pci_device(device) 4433 } 4434 4435 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4436 self.validate_identifier(&vdpa_cfg.id)?; 4437 4438 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4439 return Err(DeviceManagerError::InvalidIommuHotplug); 4440 } 4441 4442 let device = self.make_vdpa_device(vdpa_cfg)?; 4443 self.hotplug_virtio_pci_device(device) 4444 } 4445 4446 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4447 self.validate_identifier(&vsock_cfg.id)?; 4448 4449 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4450 return Err(DeviceManagerError::InvalidIommuHotplug); 4451 } 4452 4453 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4454 self.hotplug_virtio_pci_device(device) 4455 } 4456 4457 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4458 let mut counters = HashMap::new(); 4459 4460 for handle in &self.virtio_devices { 4461 let virtio_device = handle.virtio_device.lock().unwrap(); 4462 if let Some(device_counters) = virtio_device.counters() { 4463 counters.insert(handle.id.clone(), device_counters.clone()); 4464 } 4465 } 4466 4467 counters 4468 } 4469 4470 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4471 if let Some(balloon) = &self.balloon { 4472 return balloon 4473 .lock() 4474 .unwrap() 4475 .resize(size) 4476 .map_err(DeviceManagerError::VirtioBalloonResize); 4477 } 4478 4479 warn!("No balloon setup: Can't resize the balloon"); 4480 Err(DeviceManagerError::MissingVirtioBalloon) 4481 } 4482 4483 pub fn balloon_size(&self) -> u64 { 4484 if let Some(balloon) = &self.balloon { 4485 return balloon.lock().unwrap().get_actual(); 4486 } 4487 4488 0 4489 } 4490 4491 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4492 self.device_tree.clone() 4493 } 4494 4495 #[cfg(target_arch = "x86_64")] 4496 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4497 self.ged_notification_device 4498 .as_ref() 4499 .unwrap() 4500 .lock() 4501 .unwrap() 4502 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4503 .map_err(DeviceManagerError::PowerButtonNotification) 4504 } 4505 4506 #[cfg(target_arch = "aarch64")] 4507 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4508 // There are two use cases: 4509 // 1. Users will use direct kernel boot with device tree. 4510 // 2. Users will use ACPI+UEFI boot. 4511 4512 // Trigger a GPIO pin 3 event to satisfy use case 1. 4513 self.gpio_device 4514 .as_ref() 4515 .unwrap() 4516 .lock() 4517 .unwrap() 4518 .trigger_key(3) 4519 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4520 // Trigger a GED power button event to satisfy use case 2. 4521 return self 4522 .ged_notification_device 4523 .as_ref() 4524 .unwrap() 4525 .lock() 4526 .unwrap() 4527 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4528 .map_err(DeviceManagerError::PowerButtonNotification); 4529 } 4530 4531 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4532 &self.iommu_attached_devices 4533 } 4534 4535 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4536 if let Some(id) = id { 4537 if id.starts_with("__") { 4538 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4539 } 4540 4541 if self.device_tree.lock().unwrap().contains_key(id) { 4542 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4543 } 4544 } 4545 4546 Ok(()) 4547 } 4548 4549 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4550 &self.acpi_platform_addresses 4551 } 4552 } 4553 4554 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4555 for (numa_node_id, numa_node) in numa_nodes.iter() { 4556 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4557 return Some(*numa_node_id); 4558 } 4559 } 4560 4561 None 4562 } 4563 4564 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4565 for (numa_node_id, numa_node) in numa_nodes.iter() { 4566 if numa_node.pci_segments.contains(&pci_segment_id) { 4567 return *numa_node_id; 4568 } 4569 } 4570 4571 0 4572 } 4573 4574 struct TpmDevice {} 4575 4576 impl Aml for TpmDevice { 4577 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4578 aml::Device::new( 4579 "TPM2".into(), 4580 vec![ 4581 &aml::Name::new("_HID".into(), &"MSFT0101"), 4582 &aml::Name::new("_STA".into(), &(0xF_usize)), 4583 &aml::Name::new( 4584 "_CRS".into(), 4585 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4586 true, 4587 layout::TPM_START.0 as u32, 4588 layout::TPM_SIZE as u32, 4589 )]), 4590 ), 4591 ], 4592 ) 4593 .to_aml_bytes(sink) 4594 } 4595 } 4596 4597 impl Aml for DeviceManager { 4598 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4599 #[cfg(target_arch = "aarch64")] 4600 use arch::aarch64::DeviceInfoForFdt; 4601 4602 let mut pci_scan_methods = Vec::new(); 4603 for i in 0..self.pci_segments.len() { 4604 pci_scan_methods.push(aml::MethodCall::new( 4605 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4606 vec![], 4607 )); 4608 } 4609 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4610 for method in &pci_scan_methods { 4611 pci_scan_inner.push(method) 4612 } 4613 4614 // PCI hotplug controller 4615 aml::Device::new( 4616 "_SB_.PHPR".into(), 4617 vec![ 4618 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4619 &aml::Name::new("_STA".into(), &0x0bu8), 4620 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4621 &aml::Mutex::new("BLCK".into(), 0), 4622 &aml::Name::new( 4623 "_CRS".into(), 4624 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4625 aml::AddressSpaceCacheable::NotCacheable, 4626 true, 4627 self.acpi_address.0, 4628 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4629 None, 4630 )]), 4631 ), 4632 // OpRegion and Fields map MMIO range into individual field values 4633 &aml::OpRegion::new( 4634 "PCST".into(), 4635 aml::OpRegionSpace::SystemMemory, 4636 &(self.acpi_address.0 as usize), 4637 &DEVICE_MANAGER_ACPI_SIZE, 4638 ), 4639 &aml::Field::new( 4640 "PCST".into(), 4641 aml::FieldAccessType::DWord, 4642 aml::FieldLockRule::NoLock, 4643 aml::FieldUpdateRule::WriteAsZeroes, 4644 vec![ 4645 aml::FieldEntry::Named(*b"PCIU", 32), 4646 aml::FieldEntry::Named(*b"PCID", 32), 4647 aml::FieldEntry::Named(*b"B0EJ", 32), 4648 aml::FieldEntry::Named(*b"PSEG", 32), 4649 ], 4650 ), 4651 &aml::Method::new( 4652 "PCEJ".into(), 4653 2, 4654 true, 4655 vec![ 4656 // Take lock defined above 4657 &aml::Acquire::new("BLCK".into(), 0xffff), 4658 // Choose the current segment 4659 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4660 // Write PCI bus number (in first argument) to I/O port via field 4661 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4662 // Release lock 4663 &aml::Release::new("BLCK".into()), 4664 // Return 0 4665 &aml::Return::new(&aml::ZERO), 4666 ], 4667 ), 4668 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4669 ], 4670 ) 4671 .to_aml_bytes(sink); 4672 4673 for segment in &self.pci_segments { 4674 segment.to_aml_bytes(sink); 4675 } 4676 4677 let mut mbrd_memory = Vec::new(); 4678 4679 for segment in &self.pci_segments { 4680 mbrd_memory.push(aml::Memory32Fixed::new( 4681 true, 4682 segment.mmio_config_address as u32, 4683 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4684 )) 4685 } 4686 4687 let mut mbrd_memory_refs = Vec::new(); 4688 for mbrd_memory_ref in &mbrd_memory { 4689 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4690 } 4691 4692 aml::Device::new( 4693 "_SB_.MBRD".into(), 4694 vec![ 4695 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4696 &aml::Name::new("_UID".into(), &aml::ZERO), 4697 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4698 ], 4699 ) 4700 .to_aml_bytes(sink); 4701 4702 // Serial device 4703 #[cfg(target_arch = "x86_64")] 4704 let serial_irq = 4; 4705 #[cfg(target_arch = "aarch64")] 4706 let serial_irq = 4707 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4708 self.get_device_info() 4709 .clone() 4710 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4711 .unwrap() 4712 .irq() 4713 } else { 4714 // If serial is turned off, add a fake device with invalid irq. 4715 31 4716 }; 4717 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4718 aml::Device::new( 4719 "_SB_.COM1".into(), 4720 vec![ 4721 &aml::Name::new( 4722 "_HID".into(), 4723 #[cfg(target_arch = "x86_64")] 4724 &aml::EISAName::new("PNP0501"), 4725 #[cfg(target_arch = "aarch64")] 4726 &"ARMH0011", 4727 ), 4728 &aml::Name::new("_UID".into(), &aml::ZERO), 4729 &aml::Name::new("_DDN".into(), &"COM1"), 4730 &aml::Name::new( 4731 "_CRS".into(), 4732 &aml::ResourceTemplate::new(vec![ 4733 &aml::Interrupt::new(true, true, false, false, serial_irq), 4734 #[cfg(target_arch = "x86_64")] 4735 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4736 #[cfg(target_arch = "aarch64")] 4737 &aml::Memory32Fixed::new( 4738 true, 4739 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4740 MMIO_LEN as u32, 4741 ), 4742 ]), 4743 ), 4744 ], 4745 ) 4746 .to_aml_bytes(sink); 4747 } 4748 4749 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4750 4751 aml::Device::new( 4752 "_SB_.PWRB".into(), 4753 vec![ 4754 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4755 &aml::Name::new("_UID".into(), &aml::ZERO), 4756 ], 4757 ) 4758 .to_aml_bytes(sink); 4759 4760 if self.config.lock().unwrap().tpm.is_some() { 4761 // Add tpm device 4762 TpmDevice {}.to_aml_bytes(sink); 4763 } 4764 4765 self.ged_notification_device 4766 .as_ref() 4767 .unwrap() 4768 .lock() 4769 .unwrap() 4770 .to_aml_bytes(sink) 4771 } 4772 } 4773 4774 impl Pausable for DeviceManager { 4775 fn pause(&mut self) -> result::Result<(), MigratableError> { 4776 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4777 if let Some(migratable) = &device_node.migratable { 4778 migratable.lock().unwrap().pause()?; 4779 } 4780 } 4781 // On AArch64, the pause of device manager needs to trigger 4782 // a "pause" of GIC, which will flush the GIC pending tables 4783 // and ITS tables to guest RAM. 4784 #[cfg(target_arch = "aarch64")] 4785 { 4786 self.get_interrupt_controller() 4787 .unwrap() 4788 .lock() 4789 .unwrap() 4790 .pause()?; 4791 }; 4792 4793 Ok(()) 4794 } 4795 4796 fn resume(&mut self) -> result::Result<(), MigratableError> { 4797 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4798 if let Some(migratable) = &device_node.migratable { 4799 migratable.lock().unwrap().resume()?; 4800 } 4801 } 4802 4803 Ok(()) 4804 } 4805 } 4806 4807 impl Snapshottable for DeviceManager { 4808 fn id(&self) -> String { 4809 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4810 } 4811 4812 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4813 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4814 4815 // We aggregate all devices snapshots. 4816 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4817 if let Some(migratable) = &device_node.migratable { 4818 let mut migratable = migratable.lock().unwrap(); 4819 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4820 } 4821 } 4822 4823 Ok(snapshot) 4824 } 4825 } 4826 4827 impl Transportable for DeviceManager {} 4828 4829 impl Migratable for DeviceManager { 4830 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4831 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4832 if let Some(migratable) = &device_node.migratable { 4833 migratable.lock().unwrap().start_dirty_log()?; 4834 } 4835 } 4836 Ok(()) 4837 } 4838 4839 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4840 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4841 if let Some(migratable) = &device_node.migratable { 4842 migratable.lock().unwrap().stop_dirty_log()?; 4843 } 4844 } 4845 Ok(()) 4846 } 4847 4848 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4849 let mut tables = Vec::new(); 4850 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4851 if let Some(migratable) = &device_node.migratable { 4852 tables.push(migratable.lock().unwrap().dirty_log()?); 4853 } 4854 } 4855 Ok(MemoryRangeTable::new_from_tables(tables)) 4856 } 4857 4858 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4859 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4860 if let Some(migratable) = &device_node.migratable { 4861 migratable.lock().unwrap().start_migration()?; 4862 } 4863 } 4864 Ok(()) 4865 } 4866 4867 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4868 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4869 if let Some(migratable) = &device_node.migratable { 4870 migratable.lock().unwrap().complete_migration()?; 4871 } 4872 } 4873 Ok(()) 4874 } 4875 } 4876 4877 const PCIU_FIELD_OFFSET: u64 = 0; 4878 const PCID_FIELD_OFFSET: u64 = 4; 4879 const B0EJ_FIELD_OFFSET: u64 = 8; 4880 const PSEG_FIELD_OFFSET: u64 = 12; 4881 const PCIU_FIELD_SIZE: usize = 4; 4882 const PCID_FIELD_SIZE: usize = 4; 4883 const B0EJ_FIELD_SIZE: usize = 4; 4884 const PSEG_FIELD_SIZE: usize = 4; 4885 4886 impl BusDevice for DeviceManager { 4887 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4888 match offset { 4889 PCIU_FIELD_OFFSET => { 4890 assert!(data.len() == PCIU_FIELD_SIZE); 4891 data.copy_from_slice( 4892 &self.pci_segments[self.selected_segment] 4893 .pci_devices_up 4894 .to_le_bytes(), 4895 ); 4896 // Clear the PCIU bitmap 4897 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4898 } 4899 PCID_FIELD_OFFSET => { 4900 assert!(data.len() == PCID_FIELD_SIZE); 4901 data.copy_from_slice( 4902 &self.pci_segments[self.selected_segment] 4903 .pci_devices_down 4904 .to_le_bytes(), 4905 ); 4906 // Clear the PCID bitmap 4907 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4908 } 4909 B0EJ_FIELD_OFFSET => { 4910 assert!(data.len() == B0EJ_FIELD_SIZE); 4911 // Always return an empty bitmap since the eject is always 4912 // taken care of right away during a write access. 4913 data.fill(0); 4914 } 4915 PSEG_FIELD_OFFSET => { 4916 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4917 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4918 } 4919 _ => error!( 4920 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4921 base, offset 4922 ), 4923 } 4924 4925 debug!( 4926 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4927 base, offset, data 4928 ) 4929 } 4930 4931 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4932 match offset { 4933 B0EJ_FIELD_OFFSET => { 4934 assert!(data.len() == B0EJ_FIELD_SIZE); 4935 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4936 data_array.copy_from_slice(data); 4937 let mut slot_bitmap = u32::from_le_bytes(data_array); 4938 4939 while slot_bitmap > 0 { 4940 let slot_id = slot_bitmap.trailing_zeros(); 4941 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4942 error!("Failed ejecting device {}: {:?}", slot_id, e); 4943 } 4944 slot_bitmap &= !(1 << slot_id); 4945 } 4946 } 4947 PSEG_FIELD_OFFSET => { 4948 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4949 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4950 data_array.copy_from_slice(data); 4951 let selected_segment = u32::from_le_bytes(data_array) as usize; 4952 if selected_segment >= self.pci_segments.len() { 4953 error!( 4954 "Segment selection out of range: {} >= {}", 4955 selected_segment, 4956 self.pci_segments.len() 4957 ); 4958 return None; 4959 } 4960 self.selected_segment = selected_segment; 4961 } 4962 _ => error!( 4963 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4964 base, offset 4965 ), 4966 } 4967 4968 debug!( 4969 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4970 base, offset, data 4971 ); 4972 4973 None 4974 } 4975 } 4976 4977 impl Drop for DeviceManager { 4978 fn drop(&mut self) { 4979 for handle in self.virtio_devices.drain(..) { 4980 handle.virtio_device.lock().unwrap().shutdown(); 4981 } 4982 4983 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4984 // SAFETY: FFI call 4985 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4986 } 4987 } 4988 } 4989